You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_samplers_test.cc 8.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
  18. #include "minddata/dataset/include/datasets.h"
  19. using namespace mindspore::dataset;
  20. using mindspore::dataset::Tensor;
  21. class MindDataTestPipeline : public UT::DatasetOpTesting {
  22. protected:
  23. };
  24. TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
  25. std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1);
  26. EXPECT_NE(sampl, nullptr);
  27. sampl = PKSampler(3);
  28. EXPECT_NE(sampl, nullptr);
  29. sampl = RandomSampler(false, 12);
  30. EXPECT_NE(sampl, nullptr);
  31. sampl = SequentialSampler(0, 12);
  32. EXPECT_NE(sampl, nullptr);
  33. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  34. sampl = WeightedRandomSampler(weights, 12);
  35. EXPECT_NE(sampl, nullptr);
  36. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
  37. sampl = SubsetSampler(indices);
  38. EXPECT_NE(sampl, nullptr);
  39. sampl = SubsetRandomSampler(indices);
  40. EXPECT_NE(sampl, nullptr);
  41. // Create an ImageFolder Dataset
  42. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  43. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  44. EXPECT_NE(ds, nullptr);
  45. // Create a Repeat operation on ds
  46. int32_t repeat_num = 2;
  47. ds = ds->Repeat(repeat_num);
  48. EXPECT_NE(ds, nullptr);
  49. // Create a Batch operation on ds
  50. int32_t batch_size = 2;
  51. ds = ds->Batch(batch_size);
  52. EXPECT_NE(ds, nullptr);
  53. // Create an iterator over the result of the above dataset
  54. // This will trigger the creation of the Execution Tree and launch it.
  55. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  56. EXPECT_NE(iter, nullptr);
  57. // Iterate the dataset and get each row
  58. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  59. iter->GetNextRow(&row);
  60. uint64_t i = 0;
  61. while (row.size() != 0) {
  62. i++;
  63. auto image = row["image"];
  64. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  65. iter->GetNextRow(&row);
  66. }
  67. EXPECT_EQ(i, 12);
  68. // Manually terminate the pipeline
  69. iter->Stop();
  70. }
  71. TEST_F(MindDataTestPipeline, TestCalculateNumSamples) {
  72. int64_t num_rows = 30; // dummy variable for number of rows in the dataset
  73. std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1, false, 6);
  74. EXPECT_NE(sampl, nullptr);
  75. std::shared_ptr<SamplerRT> sampler_rt = sampl->SamplerBuild();
  76. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 6);
  77. sampl = PKSampler(3, false);
  78. EXPECT_NE(sampl, nullptr);
  79. sampler_rt = sampl->SamplerBuild();
  80. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 30);
  81. sampl = RandomSampler(false, 12);
  82. EXPECT_NE(sampl, nullptr);
  83. sampler_rt = sampl->SamplerBuild();
  84. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 12);
  85. sampl = SequentialSampler(0, 10);
  86. EXPECT_NE(sampl, nullptr);
  87. sampler_rt = sampl->SamplerBuild();
  88. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 10);
  89. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  90. sampl = WeightedRandomSampler(weights, 12);
  91. EXPECT_NE(sampl, nullptr);
  92. sampler_rt = sampl->SamplerBuild();
  93. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 12);
  94. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21};
  95. sampl = SubsetRandomSampler(indices, 11);
  96. EXPECT_NE(sampl, nullptr);
  97. sampler_rt = sampl->SamplerBuild();
  98. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 11);
  99. // Testing chains
  100. // Parent and child have num_samples
  101. std::shared_ptr<SamplerObj> sampl1 = WeightedRandomSampler(weights, 12);
  102. EXPECT_NE(sampl1, nullptr);
  103. std::shared_ptr<SamplerRT> sampler_rt1 = sampl1->SamplerBuild();
  104. std::shared_ptr<SamplerObj> sampl2 = SequentialSampler(0, 10);
  105. EXPECT_NE(sampl2, nullptr);
  106. std::shared_ptr<SamplerRT> sampler_rt2 = sampl2->SamplerBuild();
  107. sampler_rt2->AddChild(sampler_rt1);
  108. EXPECT_EQ(sampler_rt2->CalculateNumSamples(num_rows), 10);
  109. // Parent doesn't have num_samples
  110. std::shared_ptr<SamplerObj> sampl3 = WeightedRandomSampler(weights, 12);
  111. EXPECT_NE(sampl3, nullptr);
  112. std::shared_ptr<SamplerRT> sampler_rt3 = sampl3->SamplerBuild();
  113. std::shared_ptr<SamplerObj> sampl4 = SubsetRandomSampler(indices);
  114. EXPECT_NE(sampl4, nullptr);
  115. std::shared_ptr<SamplerRT> sampler_rt4 = sampl4->SamplerBuild();
  116. sampler_rt4->AddChild(sampler_rt3);
  117. EXPECT_EQ(sampler_rt4->CalculateNumSamples(num_rows), 11);
  118. // Child doesn't have num_samples
  119. std::shared_ptr<SamplerObj> sampl5 = RandomSampler(false);
  120. EXPECT_NE(sampl5, nullptr);
  121. std::shared_ptr<SamplerRT> sampler_rt5 = sampl5->SamplerBuild();
  122. std::shared_ptr<SamplerObj> sampl6 = PKSampler(3, false, 7);
  123. EXPECT_NE(sampl6, nullptr);
  124. std::shared_ptr<SamplerRT> sampler_rt6 = sampl6->SamplerBuild();
  125. sampler_rt6->AddChild(sampler_rt5);
  126. EXPECT_EQ(sampler_rt6->CalculateNumSamples(num_rows), 7);
  127. }
  128. TEST_F(MindDataTestPipeline, TestSamplersMoveParameters) {
  129. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
  130. std::shared_ptr<SamplerObj> sampl1 = SubsetRandomSampler(indices);
  131. EXPECT_FALSE(indices.empty());
  132. EXPECT_NE(sampl1->SamplerBuild(), nullptr);
  133. std::shared_ptr<SamplerObj> sampl2 = SubsetRandomSampler(std::move(indices));
  134. EXPECT_TRUE(indices.empty());
  135. EXPECT_NE(sampl2->SamplerBuild(), nullptr);
  136. }
  137. TEST_F(MindDataTestPipeline, TestWeightedRandomSamplerFail) {
  138. // weights is empty
  139. std::vector<double> weights1 = {};
  140. std::shared_ptr<SamplerObj> sampl1 = WeightedRandomSampler(weights1);
  141. EXPECT_EQ(sampl1, nullptr);
  142. // weights has negative number
  143. std::vector<double> weights2 = {0.5, 0.2, -0.4};
  144. std::shared_ptr<SamplerObj> sampl2 = WeightedRandomSampler(weights2);
  145. EXPECT_EQ(sampl2, nullptr);
  146. // weights elements are all zero
  147. std::vector<double> weights3 = {0, 0, 0};
  148. std::shared_ptr<SamplerObj> sampl3 = WeightedRandomSampler(weights3);
  149. EXPECT_EQ(sampl3, nullptr);
  150. }
  151. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess) {
  152. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess.";
  153. // Test basic setting of distributed_sampler
  154. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  155. std::shared_ptr<SamplerObj> sampler = DistributedSampler(4, 0, false, 0, 0, -1, true);
  156. EXPECT_NE(sampler, nullptr);
  157. // Create an ImageFolder Dataset
  158. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  159. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  160. EXPECT_NE(ds, nullptr);
  161. // Iterate the dataset and get each row
  162. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  163. EXPECT_NE(iter, nullptr);
  164. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  165. iter->GetNextRow(&row);
  166. uint64_t i = 0;
  167. while (row.size() != 0) {
  168. i++;
  169. auto label = row["label"];
  170. iter->GetNextRow(&row);
  171. }
  172. EXPECT_EQ(i, 11);
  173. iter->Stop();
  174. }
  175. TEST_F(MindDataTestPipeline, TestSamplerAddChild) {
  176. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild.";
  177. auto sampler = DistributedSampler(1, 0, false, 5, 0, -1, true);
  178. EXPECT_NE(sampler, nullptr);
  179. auto child_sampler = SequentialSampler();
  180. sampler->AddChildSampler(child_sampler);
  181. EXPECT_NE(child_sampler, nullptr);
  182. // Create an ImageFolder Dataset
  183. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  184. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  185. EXPECT_NE(ds, nullptr);
  186. // Iterate the dataset and get each row
  187. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  188. EXPECT_NE(iter, nullptr);
  189. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  190. iter->GetNextRow(&row);
  191. uint64_t i = 0;
  192. while (row.size() != 0) {
  193. i++;
  194. iter->GetNextRow(&row);
  195. }
  196. EXPECT_EQ(ds->GetDatasetSize(), 5);
  197. iter->Stop();
  198. }
  199. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail) {
  200. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail.";
  201. // Test invalid offset setting of distributed_sampler
  202. // offset=5 which is greater than num_shards=4
  203. std::shared_ptr<SamplerObj> sampler = DistributedSampler(4, 0, false, 0, 0, 5, false);
  204. EXPECT_EQ(sampler, nullptr);
  205. }