You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_samplers_test.cc 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
  18. #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
  19. #include "minddata/dataset/include/dataset/datasets.h"
  20. #include <functional>
  21. using namespace mindspore::dataset;
  22. using mindspore::dataset::Tensor;
  23. class MindDataTestPipeline : public UT::DatasetOpTesting {
  24. protected:
  25. };
  26. TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
  27. std::shared_ptr<Sampler> sampl = std::make_shared<DistributedSampler>(2, 1);
  28. EXPECT_NE(sampl, nullptr);
  29. sampl = std::make_shared<PKSampler>(3);
  30. EXPECT_NE(sampl, nullptr);
  31. sampl = std::make_shared<RandomSampler>(false, 12);
  32. EXPECT_NE(sampl, nullptr);
  33. sampl = std::make_shared<SequentialSampler>(0, 12);
  34. EXPECT_NE(sampl, nullptr);
  35. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  36. sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
  37. EXPECT_NE(sampl, nullptr);
  38. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
  39. sampl = std::make_shared<SubsetSampler>(indices);
  40. EXPECT_NE(sampl, nullptr);
  41. sampl = std::make_shared<SubsetRandomSampler>(indices);
  42. EXPECT_NE(sampl, nullptr);
  43. // Create an ImageFolder Dataset
  44. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  45. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  46. EXPECT_NE(ds, nullptr);
  47. // Create a Repeat operation on ds
  48. int32_t repeat_num = 2;
  49. ds = ds->Repeat(repeat_num);
  50. EXPECT_NE(ds, nullptr);
  51. // Create a Batch operation on ds
  52. int32_t batch_size = 2;
  53. ds = ds->Batch(batch_size);
  54. EXPECT_NE(ds, nullptr);
  55. // Create an iterator over the result of the above dataset
  56. // This will trigger the creation of the Execution Tree and launch it.
  57. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  58. EXPECT_NE(iter, nullptr);
  59. // Iterate the dataset and get each row
  60. std::unordered_map<std::string, mindspore::MSTensor> row;
  61. ASSERT_OK(iter->GetNextRow(&row));
  62. uint64_t i = 0;
  63. while (row.size() != 0) {
  64. i++;
  65. auto image = row["image"];
  66. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  67. ASSERT_OK(iter->GetNextRow(&row));
  68. }
  69. EXPECT_EQ(i, 12);
  70. // Manually terminate the pipeline
  71. iter->Stop();
  72. }
  73. TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) {
  74. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1.";
  75. // Test building a dataset with no sampler provided (defaults to random sampler
  76. // Create an ImageFolder Dataset
  77. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  78. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false);
  79. EXPECT_NE(ds, nullptr);
  80. // Iterate the dataset and get each row
  81. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  82. EXPECT_NE(iter, nullptr);
  83. std::unordered_map<std::string, mindspore::MSTensor> row;
  84. ASSERT_OK(iter->GetNextRow(&row));
  85. uint64_t i = 0;
  86. while (row.size() != 0) {
  87. i++;
  88. auto label = row["label"];
  89. ASSERT_OK(iter->GetNextRow(&row));
  90. }
  91. EXPECT_EQ(i, ds->GetDatasetSize());
  92. iter->Stop();
  93. }
  94. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess1) {
  95. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess1.";
  96. // Test basic setting of distributed_sampler
  97. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  98. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, -1, true);
  99. EXPECT_NE(sampler, nullptr);
  100. // Create an ImageFolder Dataset
  101. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  102. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  103. EXPECT_NE(ds, nullptr);
  104. // Iterate the dataset and get each row
  105. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  106. EXPECT_NE(iter, nullptr);
  107. std::unordered_map<std::string, mindspore::MSTensor> row;
  108. ASSERT_OK(iter->GetNextRow(&row));
  109. uint64_t i = 0;
  110. while (row.size() != 0) {
  111. i++;
  112. auto label = row["label"];
  113. ASSERT_OK(iter->GetNextRow(&row));
  114. }
  115. EXPECT_EQ(i, 11);
  116. iter->Stop();
  117. }
  118. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess2) {
  119. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess2.";
  120. // Test basic setting of distributed_sampler
  121. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  122. auto sampler(new DistributedSampler(4, 0, false, 0, 0, -1, true));
  123. // Note that with new, we have to explicitly delete the allocated object as shown below.
  124. // Note: No need to check for output after calling API class constructor
  125. // Create an ImageFolder Dataset
  126. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  127. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  128. EXPECT_NE(ds, nullptr);
  129. // Iterate the dataset and get each row
  130. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  131. EXPECT_NE(iter, nullptr);
  132. std::unordered_map<std::string, mindspore::MSTensor> row;
  133. ASSERT_OK(iter->GetNextRow(&row));
  134. uint64_t i = 0;
  135. while (row.size() != 0) {
  136. i++;
  137. auto label = row["label"];
  138. ASSERT_OK(iter->GetNextRow(&row));
  139. }
  140. EXPECT_EQ(i, 11);
  141. iter->Stop();
  142. // Delete allocated objects with raw pointers
  143. delete sampler;
  144. }
  145. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess3) {
  146. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess3.";
  147. // Test basic setting of distributed_sampler
  148. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  149. DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, -1, true);
  150. // Create an ImageFolder Dataset
  151. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  152. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  153. EXPECT_NE(ds, nullptr);
  154. // Iterate the dataset and get each row
  155. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  156. EXPECT_NE(iter, nullptr);
  157. std::unordered_map<std::string, mindspore::MSTensor> row;
  158. ASSERT_OK(iter->GetNextRow(&row));
  159. uint64_t i = 0;
  160. while (row.size() != 0) {
  161. i++;
  162. auto label = row["label"];
  163. ASSERT_OK(iter->GetNextRow(&row));
  164. }
  165. EXPECT_EQ(i, 11);
  166. iter->Stop();
  167. }
  168. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess4) {
  169. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess4.";
  170. // Test pointer of distributed_sampler
  171. SequentialSampler sampler = SequentialSampler(0, 4);
  172. // Create an ImageFolder Dataset
  173. std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
  174. std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, &sampler);
  175. EXPECT_NE(ds, nullptr);
  176. // Iterate the dataset and get each row
  177. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  178. EXPECT_NE(iter, nullptr);
  179. std::unordered_map<std::string, mindspore::MSTensor> row;
  180. ASSERT_OK(iter->GetNextRow(&row));
  181. uint64_t i = 0;
  182. while (row.size() != 0) {
  183. i++;
  184. auto label = row["label"];
  185. ASSERT_OK(iter->GetNextRow(&row));
  186. }
  187. EXPECT_EQ(i, 4);
  188. iter->Stop();
  189. }
  190. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) {
  191. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1.";
  192. // Test basic setting of distributed_sampler
  193. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  194. // offset=5 which is greater than num_shards=4 --> will fail later
  195. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, 5, false);
  196. EXPECT_NE(sampler, nullptr);
  197. // Create an ImageFolder Dataset
  198. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  199. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  200. EXPECT_NE(ds, nullptr);
  201. // Iterate will fail because sampler is not initiated successfully.
  202. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  203. EXPECT_EQ(iter, nullptr);
  204. }
  205. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail2) {
  206. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail2.";
  207. // Test basic setting of distributed_sampler
  208. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  209. // offset=5 which is greater than num_shards=4 --> will fail later
  210. auto sampler(new DistributedSampler(4, 0, false, 0, 0, 5, false));
  211. // Note that with new, we have to explicitly delete the allocated object as shown below.
  212. // Note: No need to check for output after calling API class constructor
  213. // Create an ImageFolder Dataset
  214. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  215. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  216. EXPECT_NE(ds, nullptr);
  217. // Iterate will fail because sampler is not initiated successfully.
  218. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  219. EXPECT_EQ(iter, nullptr);
  220. // Delete allocated objects with raw pointers
  221. delete sampler;
  222. }
  223. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail3) {
  224. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail3.";
  225. // Test basic setting of distributed_sampler
  226. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  227. // offset=5 which is greater than num_shards=4 --> will fail later
  228. DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, 5, false);
  229. // Create an ImageFolder Dataset
  230. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  231. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  232. EXPECT_NE(ds, nullptr);
  233. // Iterate will fail because sampler is not initiated successfully.
  234. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  235. EXPECT_EQ(iter, nullptr);
  236. }
  237. TEST_F(MindDataTestPipeline, TestSamplerAddChild) {
  238. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild.";
  239. auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 5, 0, -1, true);
  240. EXPECT_NE(sampler, nullptr);
  241. auto child_sampler = std::make_shared<SequentialSampler>();
  242. EXPECT_NE(child_sampler, nullptr);
  243. sampler->AddChild(child_sampler);
  244. // Create an ImageFolder Dataset
  245. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  246. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  247. EXPECT_NE(ds, nullptr);
  248. // Iterate the dataset and get each row
  249. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  250. EXPECT_NE(iter, nullptr);
  251. std::unordered_map<std::string, mindspore::MSTensor> row;
  252. ASSERT_OK(iter->GetNextRow(&row));
  253. uint64_t i = 0;
  254. while (row.size() != 0) {
  255. i++;
  256. ASSERT_OK(iter->GetNextRow(&row));
  257. }
  258. EXPECT_EQ(ds->GetDatasetSize(), 5);
  259. iter->Stop();
  260. }
  261. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess1) {
  262. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess1.";
  263. // Test basic setting of subset_sampler with default num_samples
  264. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  265. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
  266. EXPECT_NE(sampl, nullptr);
  267. // Create an ImageFolder Dataset
  268. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  269. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  270. EXPECT_NE(ds, nullptr);
  271. // Iterate the dataset and get each row
  272. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  273. EXPECT_NE(iter, nullptr);
  274. std::unordered_map<std::string, mindspore::MSTensor> row;
  275. ASSERT_OK(iter->GetNextRow(&row));
  276. uint64_t i = 0;
  277. while (row.size() != 0) {
  278. i++;
  279. auto image = row["image"];
  280. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  281. ASSERT_OK(iter->GetNextRow(&row));
  282. }
  283. EXPECT_EQ(i, 6);
  284. iter->Stop();
  285. }
  286. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess2) {
  287. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess2.";
  288. // Test subset_sampler with num_samples
  289. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  290. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 3);
  291. EXPECT_NE(sampl, nullptr);
  292. // Create an ImageFolder Dataset
  293. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  294. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  295. EXPECT_NE(ds, nullptr);
  296. // Iterate the dataset and get each row
  297. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  298. EXPECT_NE(iter, nullptr);
  299. std::unordered_map<std::string, mindspore::MSTensor> row;
  300. ASSERT_OK(iter->GetNextRow(&row));
  301. uint64_t i = 0;
  302. while (row.size() != 0) {
  303. i++;
  304. auto image = row["image"];
  305. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  306. ASSERT_OK(iter->GetNextRow(&row));
  307. }
  308. EXPECT_EQ(i, 3);
  309. iter->Stop();
  310. }
  311. TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess3) {
  312. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess3.";
  313. // Test subset_sampler with num_samples larger than the indices size.
  314. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
  315. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 8);
  316. EXPECT_NE(sampl, nullptr);
  317. // Create an ImageFolder Dataset
  318. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  319. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  320. EXPECT_NE(ds, nullptr);
  321. // Iterate the dataset and get each row
  322. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  323. EXPECT_NE(iter, nullptr);
  324. std::unordered_map<std::string, mindspore::MSTensor> row;
  325. ASSERT_OK(iter->GetNextRow(&row));
  326. uint64_t i = 0;
  327. while (row.size() != 0) {
  328. i++;
  329. auto image = row["image"];
  330. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  331. ASSERT_OK(iter->GetNextRow(&row));
  332. }
  333. EXPECT_EQ(i, 6);
  334. iter->Stop();
  335. }
  336. TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) {
  337. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerFail.";
  338. // Test subset_sampler with index out of bounds.
  339. std::vector<int64_t> indices = {2, 4, 6, 8, 10, 100}; // Sample ID (100) is out of bound
  340. std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
  341. EXPECT_NE(sampl, nullptr);
  342. // Create an ImageFolder Dataset
  343. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  344. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  345. EXPECT_NE(ds, nullptr);
  346. // Iterate the dataset and get each row
  347. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  348. EXPECT_NE(iter, nullptr);
  349. std::unordered_map<std::string, mindspore::MSTensor> row;
  350. // Expect failure: index 100 is out of dataset bounds
  351. EXPECT_ERROR(iter->GetNextRow(&row));
  352. iter->Stop();
  353. }