You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_samplers_test.cc 26 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
  18. #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
  19. #include "minddata/dataset/include/dataset/datasets.h"
  20. #include <functional>
  21. using namespace mindspore::dataset;
  22. using mindspore::dataset::Tensor;
  23. class MindDataTestPipeline : public UT::DatasetOpTesting {
  24. protected:
  25. };
  26. TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
  27. std::shared_ptr<Sampler> sampl = std::make_shared<DistributedSampler>(2, 1);
  28. EXPECT_NE(sampl, nullptr);
  29. sampl = std::make_shared<PKSampler>(3);
  30. EXPECT_NE(sampl, nullptr);
  31. sampl = std::make_shared<RandomSampler>(false, 12);
  32. EXPECT_NE(sampl, nullptr);
  33. sampl = std::make_shared<SequentialSampler>(0, 12);
  34. EXPECT_NE(sampl, nullptr);
  35. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  36. sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
  37. EXPECT_NE(sampl, nullptr);
  38. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
  39. sampl = std::make_shared<SubsetSampler>(indices);
  40. EXPECT_NE(sampl, nullptr);
  41. sampl = std::make_shared<SubsetRandomSampler>(indices);
  42. EXPECT_NE(sampl, nullptr);
  43. // Create an ImageFolder Dataset
  44. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  45. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  46. EXPECT_NE(ds, nullptr);
  47. // Create a Repeat operation on ds
  48. int32_t repeat_num = 2;
  49. ds = ds->Repeat(repeat_num);
  50. EXPECT_NE(ds, nullptr);
  51. // Create a Batch operation on ds
  52. int32_t batch_size = 2;
  53. ds = ds->Batch(batch_size);
  54. EXPECT_NE(ds, nullptr);
  55. // Create an iterator over the result of the above dataset
  56. // This will trigger the creation of the Execution Tree and launch it.
  57. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  58. EXPECT_NE(iter, nullptr);
  59. // Iterate the dataset and get each row
  60. std::unordered_map<std::string, mindspore::MSTensor> row;
  61. ASSERT_OK(iter->GetNextRow(&row));
  62. uint64_t i = 0;
  63. while (row.size() != 0) {
  64. i++;
  65. auto image = row["image"];
  66. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  67. ASSERT_OK(iter->GetNextRow(&row));
  68. }
  69. EXPECT_EQ(i, 12);
  70. // Manually terminate the pipeline
  71. iter->Stop();
  72. }
  73. // Feature: Test ImageFolder with WeightedRandomSampler
  74. // Description: Create ImageFolder dataset with WeightedRandomRampler given num_samples=12,
  75. // iterate through dataset and count rows
  76. // Expectation: There should be 12 rows
  77. TEST_F(MindDataTestPipeline, TestWeightedRandomSamplerImageFolder) {
  78. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  79. std::shared_ptr<Sampler> sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
  80. EXPECT_NE(sampl, nullptr);
  81. // Create an ImageFolder Dataset
  82. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  83. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  84. EXPECT_NE(ds, nullptr);
  85. // Create an iterator over the result of the above dataset
  86. // This will trigger the creation of the Execution Tree and launch it.
  87. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  88. EXPECT_NE(iter, nullptr);
  89. // Iterate the dataset and get each row
  90. std::unordered_map<std::string, mindspore::MSTensor> row;
  91. ASSERT_OK(iter->GetNextRow(&row));
  92. uint64_t i = 0;
  93. while (row.size() != 0) {
  94. i++;
  95. auto image = row["image"];
  96. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  97. ASSERT_OK(iter->GetNextRow(&row));
  98. }
  99. EXPECT_EQ(i, 12);
  100. // Manually terminate the pipeline
  101. iter->Stop();
  102. }
  103. TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) {
  104. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1.";
  105. // Test building a dataset with no sampler provided (defaults to random sampler
  106. // Create an ImageFolder Dataset
  107. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  108. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false);
  109. EXPECT_NE(ds, nullptr);
  110. // Iterate the dataset and get each row
  111. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  112. EXPECT_NE(iter, nullptr);
  113. std::unordered_map<std::string, mindspore::MSTensor> row;
  114. ASSERT_OK(iter->GetNextRow(&row));
  115. uint64_t i = 0;
  116. while (row.size() != 0) {
  117. i++;
  118. auto label = row["label"];
  119. ASSERT_OK(iter->GetNextRow(&row));
  120. }
  121. EXPECT_EQ(i, ds->GetDatasetSize());
  122. iter->Stop();
  123. }
  124. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess1) {
  125. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess1.";
  126. // Test basic setting of distributed_sampler
  127. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  128. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, -1, true);
  129. EXPECT_NE(sampler, nullptr);
  130. // Create an ImageFolder Dataset
  131. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  132. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  133. EXPECT_NE(ds, nullptr);
  134. // Iterate the dataset and get each row
  135. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  136. EXPECT_NE(iter, nullptr);
  137. std::unordered_map<std::string, mindspore::MSTensor> row;
  138. ASSERT_OK(iter->GetNextRow(&row));
  139. uint64_t i = 0;
  140. while (row.size() != 0) {
  141. i++;
  142. auto label = row["label"];
  143. ASSERT_OK(iter->GetNextRow(&row));
  144. }
  145. EXPECT_EQ(i, 11);
  146. iter->Stop();
  147. }
  148. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess2) {
  149. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess2.";
  150. // Test basic setting of distributed_sampler
  151. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  152. auto sampler(new DistributedSampler(4, 0, false, 0, 0, -1, true));
  153. // Note that with new, we have to explicitly delete the allocated object as shown below.
  154. // Note: No need to check for output after calling API class constructor
  155. // Create an ImageFolder Dataset
  156. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  157. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  158. EXPECT_NE(ds, nullptr);
  159. // Iterate the dataset and get each row
  160. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  161. EXPECT_NE(iter, nullptr);
  162. std::unordered_map<std::string, mindspore::MSTensor> row;
  163. ASSERT_OK(iter->GetNextRow(&row));
  164. uint64_t i = 0;
  165. while (row.size() != 0) {
  166. i++;
  167. auto label = row["label"];
  168. ASSERT_OK(iter->GetNextRow(&row));
  169. }
  170. EXPECT_EQ(i, 11);
  171. iter->Stop();
  172. // Delete allocated objects with raw pointers
  173. delete sampler;
  174. }
  175. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess3) {
  176. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess3.";
  177. // Test basic setting of distributed_sampler
  178. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  179. DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, -1, true);
  180. // Create an ImageFolder Dataset
  181. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  182. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  183. EXPECT_NE(ds, nullptr);
  184. // Iterate the dataset and get each row
  185. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  186. EXPECT_NE(iter, nullptr);
  187. std::unordered_map<std::string, mindspore::MSTensor> row;
  188. ASSERT_OK(iter->GetNextRow(&row));
  189. uint64_t i = 0;
  190. while (row.size() != 0) {
  191. i++;
  192. auto label = row["label"];
  193. ASSERT_OK(iter->GetNextRow(&row));
  194. }
  195. EXPECT_EQ(i, 11);
  196. iter->Stop();
  197. }
  198. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess4) {
  199. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess4.";
  200. // Test pointer of distributed_sampler
  201. SequentialSampler sampler = SequentialSampler(0, 4);
  202. // Create an ImageFolder Dataset
  203. std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
  204. std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, &sampler);
  205. EXPECT_NE(ds, nullptr);
  206. // Iterate the dataset and get each row
  207. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  208. EXPECT_NE(iter, nullptr);
  209. std::unordered_map<std::string, mindspore::MSTensor> row;
  210. ASSERT_OK(iter->GetNextRow(&row));
  211. uint64_t i = 0;
  212. while (row.size() != 0) {
  213. i++;
  214. auto label = row["label"];
  215. ASSERT_OK(iter->GetNextRow(&row));
  216. }
  217. EXPECT_EQ(i, 4);
  218. iter->Stop();
  219. }
  220. // Feature: Test ImageFolder with DistributedSampler
  221. // Description: Create ImageFolder dataset with DistributedSampler given num_shards=11 and shard_id=10,
  222. // count rows in dataset
  223. // Expectation: There should be 4 rows (44 rows in original data/11 = 4)
  224. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess5) {
  225. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess5.";
  226. // Test basic setting of distributed_sampler
  227. // num_shards=11, shard_id=10, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  228. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(11, 10, false, 0, 0, -1, true);
  229. EXPECT_NE(sampler, nullptr);
  230. // Create an ImageFolder Dataset
  231. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  232. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  233. EXPECT_NE(ds, nullptr);
  234. // Iterate the dataset and get each row
  235. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  236. EXPECT_NE(iter, nullptr);
  237. std::unordered_map<std::string, mindspore::MSTensor> row;
  238. ASSERT_OK(iter->GetNextRow(&row));
  239. uint64_t i = 0;
  240. while (row.size() != 0) {
  241. i++;
  242. auto label = row["label"];
  243. ASSERT_OK(iter->GetNextRow(&row));
  244. }
  245. EXPECT_EQ(i, 4);
  246. iter->Stop();
  247. }
  248. // Feature: Test ImageFolder with DistributedSampler
  249. // Description: Create ImageFolder dataset with DistributedSampler given num_shards=4 and shard_id=3,
  250. // count rows in dataset
  251. // Expectation: There should be 11 rows (44 rows in original data/4 = 11)
  252. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess6) {
  253. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess6.";
  254. // Test basic setting of distributed_sampler
  255. // num_shards=4, shard_id=3, shuffle=false, num_samplers=12, seed=0, offset=-1, even_dist=true
  256. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 3, false, 12, 0, -1, true);
  257. EXPECT_NE(sampler, nullptr);
  258. // Create an ImageFolder Dataset
  259. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  260. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  261. EXPECT_NE(ds, nullptr);
  262. // Iterate the dataset and get each row
  263. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  264. EXPECT_NE(iter, nullptr);
  265. std::unordered_map<std::string, mindspore::MSTensor> row;
  266. ASSERT_OK(iter->GetNextRow(&row));
  267. uint64_t i = 0;
  268. while (row.size() != 0) {
  269. i++;
  270. auto label = row["label"];
  271. ASSERT_OK(iter->GetNextRow(&row));
  272. }
  273. EXPECT_EQ(i, 11);
  274. iter->Stop();
  275. }
  276. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) {
  277. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1.";
  278. // Test basic setting of distributed_sampler
  279. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  280. // offset=5 which is greater than num_shards=4 --> will fail later
  281. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, 5, false);
  282. EXPECT_NE(sampler, nullptr);
  283. // Create an ImageFolder Dataset
  284. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  285. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  286. EXPECT_NE(ds, nullptr);
  287. // Iterate will fail because sampler is not initiated successfully.
  288. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  289. EXPECT_EQ(iter, nullptr);
  290. }
  291. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail2) {
  292. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail2.";
  293. // Test basic setting of distributed_sampler
  294. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  295. // offset=5 which is greater than num_shards=4 --> will fail later
  296. auto sampler(new DistributedSampler(4, 0, false, 0, 0, 5, false));
  297. // Note that with new, we have to explicitly delete the allocated object as shown below.
  298. // Note: No need to check for output after calling API class constructor
  299. // Create an ImageFolder Dataset
  300. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  301. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  302. EXPECT_NE(ds, nullptr);
  303. // Iterate will fail because sampler is not initiated successfully.
  304. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  305. EXPECT_EQ(iter, nullptr);
  306. // Delete allocated objects with raw pointers
  307. delete sampler;
  308. }
  309. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail3) {
  310. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail3.";
  311. // Test basic setting of distributed_sampler
  312. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  313. // offset=5 which is greater than num_shards=4 --> will fail later
  314. DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, 5, false);
  315. // Create an ImageFolder Dataset
  316. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  317. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  318. EXPECT_NE(ds, nullptr);
  319. // Iterate will fail because sampler is not initiated successfully.
  320. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  321. EXPECT_EQ(iter, nullptr);
  322. }
  323. TEST_F(MindDataTestPipeline, TestSamplerAddChild) {
  324. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild.";
  325. auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 5, 0, -1, true);
  326. EXPECT_NE(sampler, nullptr);
  327. auto child_sampler = std::make_shared<SequentialSampler>();
  328. EXPECT_NE(child_sampler, nullptr);
  329. sampler->AddChild(child_sampler);
  330. // Create an ImageFolder Dataset
  331. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  332. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  333. EXPECT_NE(ds, nullptr);
  334. // Iterate the dataset and get each row
  335. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  336. EXPECT_NE(iter, nullptr);
  337. std::unordered_map<std::string, mindspore::MSTensor> row;
  338. ASSERT_OK(iter->GetNextRow(&row));
  339. uint64_t i = 0;
  340. while (row.size() != 0) {
  341. i++;
  342. ASSERT_OK(iter->GetNextRow(&row));
  343. }
  344. EXPECT_EQ(ds->GetDatasetSize(), 5);
  345. iter->Stop();
  346. }
  347. /// Feature: MindData Sampler Support
  348. /// Description: Test MindData Sampler AddChild with nested children
  349. /// Expectation: Result dataset has expected number of samples.
  350. TEST_F(MindDataTestPipeline, TestSamplerAddChild2) {
  351. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild2.";
  352. // num_samples of parent sampler > num_sampler of child sampler, namely 5 > 2, num_shards is 2 to output dataset with
  353. // 1 sampler
  354. auto sampler = std::make_shared<DistributedSampler>(2, 0, false, 5, 0, -1, true);
  355. EXPECT_NE(sampler, nullptr);
  356. // num_samples of parent sampler > num_samples of child sampler, namely 4 > 2
  357. auto child_sampler = std::make_shared<RandomSampler>(true, 4);
  358. EXPECT_NE(child_sampler, nullptr);
  359. auto child_sampler2 = std::make_shared<SequentialSampler>(0, 2);
  360. EXPECT_NE(child_sampler2, nullptr);
  361. child_sampler->AddChild(child_sampler2);
  362. sampler->AddChild(child_sampler);
  363. // Create an ImageFolder Dataset
  364. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  365. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  366. EXPECT_NE(ds, nullptr);
  367. // Iterate the dataset and get each row
  368. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  369. EXPECT_NE(iter, nullptr);
  370. std::unordered_map<std::string, mindspore::MSTensor> row;
  371. ASSERT_OK(iter->GetNextRow(&row));
  372. uint64_t i = 0;
  373. while (row.size() != 0) {
  374. i++;
  375. ASSERT_OK(iter->GetNextRow(&row));
  376. }
  377. EXPECT_EQ(i, 1);
  378. EXPECT_EQ(ds->GetDatasetSize(), 1);
  379. iter->Stop();
  380. }
  381. /// Feature: MindData Sampler Support
  382. /// Description: Test MindData Sampler AddChild with num_samples of parent sampler > num_samples of child sampler
  383. /// Expectation: Result dataset has expected number of samples.
  384. TEST_F(MindDataTestPipeline, TestSamplerAddChild3) {
  385. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild3.";
  386. // num_samples of parent sampler > num_samples of child sampler, namely 5 > 4
  387. std::vector<double> weights = {1.0, 0.1, 0.02, 0.3};
  388. auto sampler = std::make_shared<WeightedRandomSampler>(weights, 5);
  389. EXPECT_NE(sampler, nullptr);
  390. auto child_sampler = std::make_shared<SequentialSampler>(0, 4);
  391. EXPECT_NE(child_sampler, nullptr);
  392. sampler->AddChild(child_sampler);
  393. // Create an ImageFolder Dataset
  394. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  395. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  396. EXPECT_NE(ds, nullptr);
  397. // Iterate the dataset and get each row
  398. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  399. EXPECT_NE(iter, nullptr);
  400. std::unordered_map<std::string, mindspore::MSTensor> row;
  401. ASSERT_OK(iter->GetNextRow(&row));
  402. uint64_t i = 0;
  403. while (row.size() != 0) {
  404. i++;
  405. ASSERT_OK(iter->GetNextRow(&row));
  406. }
  407. EXPECT_EQ(i, 4);
  408. EXPECT_EQ(ds->GetDatasetSize(), 4);
  409. iter->Stop();
  410. }
  411. /// Feature: MindData Sampler Support
  412. /// Description: Test MindData Sampler AddChild with num_samples of parent sampler < num_samples of child sampler
  413. /// Expectation: Result dataset has expected number of samples.
  414. TEST_F(MindDataTestPipeline, TestSamplerAddChild4) {
  415. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild4.";
  416. // num_samples of parent sampler < num_samples of child sampler, namely 5 < 7
  417. auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 5, 0, -1, true);
  418. EXPECT_NE(sampler, nullptr);
  419. auto child_sampler = std::make_shared<PKSampler>(3, true, 7);
  420. EXPECT_NE(child_sampler, nullptr);
  421. sampler->AddChild(child_sampler);
  422. // Create an ImageFolder Dataset
  423. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  424. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  425. EXPECT_NE(ds, nullptr);
  426. // Iterate the dataset and get each row
  427. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  428. EXPECT_NE(iter, nullptr);
  429. std::unordered_map<std::string, mindspore::MSTensor> row;
  430. ASSERT_OK(iter->GetNextRow(&row));
  431. uint64_t i = 0;
  432. while (row.size() != 0) {
  433. i++;
  434. ASSERT_OK(iter->GetNextRow(&row));
  435. }
  436. EXPECT_EQ(i, 5);
  437. EXPECT_EQ(ds->GetDatasetSize(), 5);
  438. iter->Stop();
  439. }
  440. /// Feature: MindData Sampler Support
  441. /// Description: Test MindData Sampler AddChild with several children
  442. /// Expectation: Result dataset has expected number of samples, and output error messages for more than 1 child.
  443. TEST_F(MindDataTestPipeline, TestSamplerAddChild5) {
  444. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild5.";
  445. // Use all samples (num_sampler=0) for parent DistributedSampler
  446. auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 0, 0, -1, true);
  447. EXPECT_NE(sampler, nullptr);
  448. auto child_sampler1 = std::make_shared<SequentialSampler>(0, 10);
  449. EXPECT_NE(child_sampler1, nullptr);
  450. sampler->AddChild(child_sampler1);
  451. // Attempt to add more than one child_sampler is expected to fail
  452. auto child_sampler2 = std::make_shared<SequentialSampler>(0, 6);
  453. EXPECT_NE(child_sampler2, nullptr);
  454. sampler->AddChild(child_sampler2);
  455. auto child_sampler3 = std::make_shared<SequentialSampler>(0, 7);
  456. EXPECT_NE(child_sampler3, nullptr);
  457. sampler->AddChild(child_sampler3);
  458. // Create an ImageFolder Dataset
  459. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  460. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  461. EXPECT_NE(ds, nullptr);
  462. // Iterate the dataset and get each row
  463. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  464. EXPECT_NE(iter, nullptr);
  465. std::unordered_map<std::string, mindspore::MSTensor> row;
  466. ASSERT_OK(iter->GetNextRow(&row));
  467. uint64_t i = 0;
  468. while (row.size() != 0) {
  469. i++;
  470. ASSERT_OK(iter->GetNextRow(&row));
  471. }
  472. EXPECT_EQ(i, 10);
  473. EXPECT_EQ(ds->GetDatasetSize(), 10);
  474. iter->Stop();
  475. }
  476. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess1) {
  477. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess1.";
  478. // Test basic setting of subset_sampler with default num_samples
  479. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  480. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
  481. EXPECT_NE(sampl, nullptr);
  482. // Create an ImageFolder Dataset
  483. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  484. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  485. EXPECT_NE(ds, nullptr);
  486. // Iterate the dataset and get each row
  487. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  488. EXPECT_NE(iter, nullptr);
  489. std::unordered_map<std::string, mindspore::MSTensor> row;
  490. ASSERT_OK(iter->GetNextRow(&row));
  491. uint64_t i = 0;
  492. while (row.size() != 0) {
  493. i++;
  494. auto image = row["image"];
  495. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  496. ASSERT_OK(iter->GetNextRow(&row));
  497. }
  498. EXPECT_EQ(i, 6);
  499. iter->Stop();
  500. }
  501. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess2) {
  502. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess2.";
  503. // Test subset_sampler with num_samples
  504. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  505. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 3);
  506. EXPECT_NE(sampl, nullptr);
  507. // Create an ImageFolder Dataset
  508. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  509. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  510. EXPECT_NE(ds, nullptr);
  511. // Iterate the dataset and get each row
  512. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  513. EXPECT_NE(iter, nullptr);
  514. std::unordered_map<std::string, mindspore::MSTensor> row;
  515. ASSERT_OK(iter->GetNextRow(&row));
  516. uint64_t i = 0;
  517. while (row.size() != 0) {
  518. i++;
  519. auto image = row["image"];
  520. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  521. ASSERT_OK(iter->GetNextRow(&row));
  522. }
  523. EXPECT_EQ(i, 3);
  524. iter->Stop();
  525. }
  526. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess3) {
  527. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess3.";
  528. // Test subset_sampler with num_samples larger than the indices size.
  529. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  530. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 8);
  531. EXPECT_NE(sampl, nullptr);
  532. // Create an ImageFolder Dataset
  533. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  534. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  535. EXPECT_NE(ds, nullptr);
  536. // Iterate the dataset and get each row
  537. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  538. EXPECT_NE(iter, nullptr);
  539. std::unordered_map<std::string, mindspore::MSTensor> row;
  540. ASSERT_OK(iter->GetNextRow(&row));
  541. uint64_t i = 0;
  542. while (row.size() != 0) {
  543. i++;
  544. auto image = row["image"];
  545. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  546. ASSERT_OK(iter->GetNextRow(&row));
  547. }
  548. EXPECT_EQ(i, 6);
  549. iter->Stop();
  550. }
  551. TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) {
  552. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerFail.";
  553. // Test subset_sampler with index out of bounds.
  554. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 100}; // Sample ID (100) is out of bound
  555. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
  556. EXPECT_NE(sampl, nullptr);
  557. // Create an ImageFolder Dataset
  558. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  559. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  560. EXPECT_NE(ds, nullptr);
  561. // Iterate the dataset and get each row
  562. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  563. EXPECT_NE(iter, nullptr);
  564. std::unordered_map<std::string, mindspore::MSTensor> row;
  565. // Expect failure: index 100 is out of dataset bounds
  566. EXPECT_ERROR(iter->GetNextRow(&row));
  567. iter->Stop();
  568. }
  569. // Feature: Test ImageFolder with PKSampler
  570. // Description: Create ImageFolder dataset with DistributedSampler given num_val=3 and count rows
  571. // Expectation: There should be 12 rows
  572. TEST_F(MindDataTestPipeline, TestPKSamplerImageFolder) {
  573. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPKSamplerImageFolder.";
  574. std::shared_ptr<Sampler> sampler = std::make_shared<PKSampler>(3, false);
  575. EXPECT_NE(sampler, nullptr);
  576. // Create an ImageFolder Dataset
  577. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  578. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  579. EXPECT_NE(ds, nullptr);
  580. // Iterate the dataset and get each row
  581. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  582. EXPECT_NE(iter, nullptr);
  583. std::unordered_map<std::string, mindspore::MSTensor> row;
  584. ASSERT_OK(iter->GetNextRow(&row));
  585. uint64_t i = 0;
  586. while (row.size() != 0) {
  587. i++;
  588. ASSERT_OK(iter->GetNextRow(&row));
  589. }
  590. EXPECT_EQ(i, 12);
  591. iter->Stop();
  592. }