You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_samplers_test.cc 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
  18. #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
  19. #include "minddata/dataset/include/datasets.h"
  20. #include <functional>
  21. using namespace mindspore::dataset;
  22. using mindspore::dataset::Tensor;
  23. class MindDataTestPipeline : public UT::DatasetOpTesting {
  24. protected:
  25. };
  26. TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
  27. std::shared_ptr<Sampler> sampl = std::make_shared<DistributedSampler>(2, 1);
  28. EXPECT_NE(sampl, nullptr);
  29. sampl = std::make_shared<PKSampler>(3);
  30. EXPECT_NE(sampl, nullptr);
  31. sampl = std::make_shared<RandomSampler>(false, 12);
  32. EXPECT_NE(sampl, nullptr);
  33. sampl = std::make_shared<SequentialSampler>(0, 12);
  34. EXPECT_NE(sampl, nullptr);
  35. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  36. sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
  37. EXPECT_NE(sampl, nullptr);
  38. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
  39. sampl = std::make_shared<SubsetSampler>(indices);
  40. EXPECT_NE(sampl, nullptr);
  41. sampl = std::make_shared<SubsetRandomSampler>(indices);
  42. EXPECT_NE(sampl, nullptr);
  43. // Create an ImageFolder Dataset
  44. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  45. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  46. EXPECT_NE(ds, nullptr);
  47. // Create a Repeat operation on ds
  48. int32_t repeat_num = 2;
  49. ds = ds->Repeat(repeat_num);
  50. EXPECT_NE(ds, nullptr);
  51. // Create a Batch operation on ds
  52. int32_t batch_size = 2;
  53. ds = ds->Batch(batch_size);
  54. EXPECT_NE(ds, nullptr);
  55. // Create an iterator over the result of the above dataset
  56. // This will trigger the creation of the Execution Tree and launch it.
  57. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  58. EXPECT_NE(iter, nullptr);
  59. // Iterate the dataset and get each row
  60. std::unordered_map<std::string, mindspore::MSTensor> row;
  61. iter->GetNextRow(&row);
  62. uint64_t i = 0;
  63. while (row.size() != 0) {
  64. i++;
  65. auto image = row["image"];
  66. MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
  67. iter->GetNextRow(&row);
  68. }
  69. EXPECT_EQ(i, 12);
  70. // Manually terminate the pipeline
  71. iter->Stop();
  72. }
  73. TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) {
  74. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1.";
  75. // Test building a dataset with no sampler provided (defaults to random sampler
  76. // Create an ImageFolder Dataset
  77. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  78. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false);
  79. EXPECT_NE(ds, nullptr);
  80. // Iterate the dataset and get each row
  81. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  82. EXPECT_NE(iter, nullptr);
  83. std::unordered_map<std::string, mindspore::MSTensor> row;
  84. iter->GetNextRow(&row);
  85. uint64_t i = 0;
  86. while (row.size() != 0) {
  87. i++;
  88. auto label = row["label"];
  89. iter->GetNextRow(&row);
  90. }
  91. EXPECT_EQ(i, ds->GetDatasetSize());
  92. iter->Stop();
  93. }
  94. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess1) {
  95. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess1.";
  96. // Test basic setting of distributed_sampler
  97. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  98. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, -1, true);
  99. EXPECT_NE(sampler, nullptr);
  100. // Create an ImageFolder Dataset
  101. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  102. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  103. EXPECT_NE(ds, nullptr);
  104. // Iterate the dataset and get each row
  105. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  106. EXPECT_NE(iter, nullptr);
  107. std::unordered_map<std::string, mindspore::MSTensor> row;
  108. iter->GetNextRow(&row);
  109. uint64_t i = 0;
  110. while (row.size() != 0) {
  111. i++;
  112. auto label = row["label"];
  113. iter->GetNextRow(&row);
  114. }
  115. EXPECT_EQ(i, 11);
  116. iter->Stop();
  117. }
  118. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess2) {
  119. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess2.";
  120. // Test basic setting of distributed_sampler
  121. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  122. auto sampler(new DistributedSampler(4, 0, false, 0, 0, -1, true));
  123. // Note that with new, we have to explicitly delete the allocated object as shown below.
  124. // Note: No need to check for output after calling API class constructor
  125. // Create an ImageFolder Dataset
  126. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  127. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  128. EXPECT_NE(ds, nullptr);
  129. // Iterate the dataset and get each row
  130. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  131. EXPECT_NE(iter, nullptr);
  132. std::unordered_map<std::string, mindspore::MSTensor> row;
  133. iter->GetNextRow(&row);
  134. uint64_t i = 0;
  135. while (row.size() != 0) {
  136. i++;
  137. auto label = row["label"];
  138. iter->GetNextRow(&row);
  139. }
  140. EXPECT_EQ(i, 11);
  141. iter->Stop();
  142. // Delete allocated objects with raw pointers
  143. delete sampler;
  144. }
  145. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess3) {
  146. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess3.";
  147. // Test basic setting of distributed_sampler
  148. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  149. DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, -1, true);
  150. // Create an ImageFolder Dataset
  151. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  152. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  153. EXPECT_NE(ds, nullptr);
  154. // Iterate the dataset and get each row
  155. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  156. EXPECT_NE(iter, nullptr);
  157. std::unordered_map<std::string, mindspore::MSTensor> row;
  158. iter->GetNextRow(&row);
  159. uint64_t i = 0;
  160. while (row.size() != 0) {
  161. i++;
  162. auto label = row["label"];
  163. iter->GetNextRow(&row);
  164. }
  165. EXPECT_EQ(i, 11);
  166. iter->Stop();
  167. }
  168. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) {
  169. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1.";
  170. // Test basic setting of distributed_sampler
  171. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  172. // offset=5 which is greater than num_shards=4 --> will fail later
  173. std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, 5, false);
  174. EXPECT_NE(sampler, nullptr);
  175. // Create an ImageFolder Dataset
  176. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  177. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  178. EXPECT_NE(ds, nullptr);
  179. // Iterate will fail because sampler is not initiated successfully.
  180. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  181. EXPECT_EQ(iter, nullptr);
  182. }
  183. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail2) {
  184. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail2.";
  185. // Test basic setting of distributed_sampler
  186. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  187. // offset=5 which is greater than num_shards=4 --> will fail later
  188. auto sampler(new DistributedSampler(4, 0, false, 0, 0, 5, false));
  189. // Note that with new, we have to explicitly delete the allocated object as shown below.
  190. // Note: No need to check for output after calling API class constructor
  191. // Create an ImageFolder Dataset
  192. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  193. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  194. EXPECT_NE(ds, nullptr);
  195. // Iterate will fail because sampler is not initiated successfully.
  196. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  197. EXPECT_EQ(iter, nullptr);
  198. // Delete allocated objects with raw pointers
  199. delete sampler;
  200. }
  201. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail3) {
  202. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail3.";
  203. // Test basic setting of distributed_sampler
  204. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
  205. // offset=5 which is greater than num_shards=4 --> will fail later
  206. DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, 5, false);
  207. // Create an ImageFolder Dataset
  208. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  209. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  210. EXPECT_NE(ds, nullptr);
  211. // Iterate will fail because sampler is not initiated successfully.
  212. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  213. EXPECT_EQ(iter, nullptr);
  214. }
  215. TEST_F(MindDataTestPipeline, TestSamplerAddChild) {
  216. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild.";
  217. auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 5, 0, -1, true);
  218. EXPECT_NE(sampler, nullptr);
  219. auto child_sampler = std::make_shared<SequentialSampler>();
  220. EXPECT_NE(child_sampler, nullptr);
  221. sampler->AddChild(child_sampler);
  222. // Create an ImageFolder Dataset
  223. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  224. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  225. EXPECT_NE(ds, nullptr);
  226. // Iterate the dataset and get each row
  227. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  228. EXPECT_NE(iter, nullptr);
  229. std::unordered_map<std::string, mindspore::MSTensor> row;
  230. iter->GetNextRow(&row);
  231. uint64_t i = 0;
  232. while (row.size() != 0) {
  233. i++;
  234. iter->GetNextRow(&row);
  235. }
  236. EXPECT_EQ(ds->GetDatasetSize(), 5);
  237. iter->Stop();
  238. }