You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_samplers_test.cc 7.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/include/datasets.h"
  18. using namespace mindspore::dataset;
  19. using mindspore::dataset::Tensor;
  20. class MindDataTestPipeline : public UT::DatasetOpTesting {
  21. protected:
  22. };
  23. TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
  24. std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1);
  25. EXPECT_NE(sampl, nullptr);
  26. sampl = PKSampler(3);
  27. EXPECT_NE(sampl, nullptr);
  28. sampl = RandomSampler(false, 12);
  29. EXPECT_NE(sampl, nullptr);
  30. sampl = SequentialSampler(0, 12);
  31. EXPECT_NE(sampl, nullptr);
  32. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  33. sampl = WeightedRandomSampler(weights, 12);
  34. EXPECT_NE(sampl, nullptr);
  35. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
  36. sampl = SubsetRandomSampler(indices);
  37. EXPECT_NE(sampl, nullptr);
  38. // Create an ImageFolder Dataset
  39. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  40. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
  41. EXPECT_NE(ds, nullptr);
  42. // Create a Repeat operation on ds
  43. int32_t repeat_num = 2;
  44. ds = ds->Repeat(repeat_num);
  45. EXPECT_NE(ds, nullptr);
  46. // Create a Batch operation on ds
  47. int32_t batch_size = 2;
  48. ds = ds->Batch(batch_size);
  49. EXPECT_NE(ds, nullptr);
  50. // Create an iterator over the result of the above dataset
  51. // This will trigger the creation of the Execution Tree and launch it.
  52. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  53. EXPECT_NE(iter, nullptr);
  54. // Iterate the dataset and get each row
  55. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  56. iter->GetNextRow(&row);
  57. uint64_t i = 0;
  58. while (row.size() != 0) {
  59. i++;
  60. auto image = row["image"];
  61. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  62. iter->GetNextRow(&row);
  63. }
  64. EXPECT_EQ(i, 12);
  65. // Manually terminate the pipeline
  66. iter->Stop();
  67. }
  68. TEST_F(MindDataTestPipeline, TestCalculateNumSamples) {
  69. int64_t num_rows = 30; // dummy variable for number of rows in the dataset
  70. std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1, false, 6);
  71. EXPECT_NE(sampl, nullptr);
  72. std::shared_ptr<SamplerRT> sampler_rt = sampl->Build();
  73. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 3);
  74. sampl = PKSampler(3, false);
  75. EXPECT_NE(sampl, nullptr);
  76. sampler_rt = sampl->Build();
  77. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 30);
  78. sampl = RandomSampler(false, 12);
  79. EXPECT_NE(sampl, nullptr);
  80. sampler_rt = sampl->Build();
  81. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 12);
  82. sampl = SequentialSampler(0, 10);
  83. EXPECT_NE(sampl, nullptr);
  84. sampler_rt = sampl->Build();
  85. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 10);
  86. std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
  87. sampl = WeightedRandomSampler(weights, 12);
  88. EXPECT_NE(sampl, nullptr);
  89. sampler_rt = sampl->Build();
  90. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 12);
  91. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21};
  92. sampl = SubsetRandomSampler(indices, 11);
  93. EXPECT_NE(sampl, nullptr);
  94. sampler_rt = sampl->Build();
  95. EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 11);
  96. // Testing chains
  97. // Parent and child have num_samples
  98. std::shared_ptr<SamplerObj> sampl1 = WeightedRandomSampler(weights, 12);
  99. EXPECT_NE(sampl1, nullptr);
  100. std::shared_ptr<SamplerRT> sampler_rt1 = sampl1->Build();
  101. std::shared_ptr<SamplerObj> sampl2 = SequentialSampler(0, 10);
  102. EXPECT_NE(sampl2, nullptr);
  103. std::shared_ptr<SamplerRT> sampler_rt2 = sampl2->Build();
  104. sampler_rt2->AddChild(sampler_rt1);
  105. EXPECT_EQ(sampler_rt2->CalculateNumSamples(num_rows), 10);
  106. // Parent doesn't have num_samples
  107. std::shared_ptr<SamplerObj> sampl3 = WeightedRandomSampler(weights, 12);
  108. EXPECT_NE(sampl3, nullptr);
  109. std::shared_ptr<SamplerRT> sampler_rt3 = sampl3->Build();
  110. std::shared_ptr<SamplerObj> sampl4 = SubsetRandomSampler(indices);
  111. EXPECT_NE(sampl4, nullptr);
  112. std::shared_ptr<SamplerRT> sampler_rt4 = sampl4->Build();
  113. sampler_rt4->AddChild(sampler_rt3);
  114. EXPECT_EQ(sampler_rt4->CalculateNumSamples(num_rows), 12);
  115. // Child doesn't have num_samples
  116. std::shared_ptr<SamplerObj> sampl5 = RandomSampler(false);
  117. EXPECT_NE(sampl5, nullptr);
  118. std::shared_ptr<SamplerRT> sampler_rt5 = sampl5->Build();
  119. std::shared_ptr<SamplerObj> sampl6 = PKSampler(3, false, 7);
  120. EXPECT_NE(sampl6, nullptr);
  121. std::shared_ptr<SamplerRT> sampler_rt6 = sampl6->Build();
  122. sampler_rt6->AddChild(sampler_rt5);
  123. EXPECT_EQ(sampler_rt6->CalculateNumSamples(num_rows), 7);
  124. }
  125. TEST_F(MindDataTestPipeline, TestSamplersMoveParameters) {
  126. std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
  127. std::shared_ptr<SamplerObj> sampl1 = SubsetRandomSampler(indices);
  128. EXPECT_FALSE(indices.empty());
  129. EXPECT_NE(sampl1->Build(), nullptr);
  130. std::shared_ptr<SamplerObj> sampl2 = SubsetRandomSampler(std::move(indices));
  131. EXPECT_TRUE(indices.empty());
  132. EXPECT_NE(sampl2->Build(), nullptr);
  133. }
  134. TEST_F(MindDataTestPipeline, TestWeightedRandomSamplerFail) {
  135. // weights is empty
  136. std::vector<double> weights1 = {};
  137. std::shared_ptr<SamplerObj> sampl1 = WeightedRandomSampler(weights1);
  138. EXPECT_EQ(sampl1, nullptr);
  139. // weights has negative number
  140. std::vector<double> weights2 = {0.5, 0.2, -0.4};
  141. std::shared_ptr<SamplerObj> sampl2 = WeightedRandomSampler(weights2);
  142. EXPECT_EQ(sampl2, nullptr);
  143. // weights elements are all zero
  144. std::vector<double> weights3 = {0, 0, 0};
  145. std::shared_ptr<SamplerObj> sampl3 = WeightedRandomSampler(weights3);
  146. EXPECT_EQ(sampl3, nullptr);
  147. }
  148. TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess) {
  149. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess.";
  150. // Test basic setting of distributed_sampler
  151. // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
  152. std::shared_ptr<SamplerObj> sampler = DistributedSampler(4, 0, false, 0, 0, -1, true);
  153. EXPECT_NE(sampler, nullptr);
  154. // Create an ImageFolder Dataset
  155. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  156. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
  157. EXPECT_NE(ds, nullptr);
  158. // Iterate the dataset and get each row
  159. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  160. EXPECT_NE(iter, nullptr);
  161. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  162. iter->GetNextRow(&row);
  163. uint64_t i = 0;
  164. while (row.size() != 0) {
  165. i++;
  166. auto label = row["label"];
  167. iter->GetNextRow(&row);
  168. }
  169. EXPECT_EQ(i, 11);
  170. iter->Stop();
  171. }
  172. TEST_F(MindDataTestPipeline, TestDistributedSamplerFail) {
  173. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail.";
  174. // Test invalid offset setting of distributed_sampler
  175. // offset=5 which is greater than num_shards=4
  176. std::shared_ptr<SamplerObj> sampler = DistributedSampler(4, 0, false, 0, 0, 5, false);
  177. EXPECT_EQ(sampler, nullptr);
  178. }