You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_api_datasets_test.cc 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/common.h"
  17. #include "minddata/dataset/include/datasets.h"
  18. // IR non-leaf nodes
  19. #include "minddata/dataset/engine/ir/datasetops/batch_node.h"
  20. #include "minddata/dataset/engine/ir/datasetops/bucket_batch_by_length_node.h"
  21. #include "minddata/dataset/engine/ir/datasetops/concat_node.h"
  22. #include "minddata/dataset/engine/ir/datasetops/map_node.h"
  23. #include "minddata/dataset/engine/ir/datasetops/project_node.h"
  24. #include "minddata/dataset/engine/ir/datasetops/rename_node.h"
  25. #include "minddata/dataset/engine/ir/datasetops/shuffle_node.h"
  26. #include "minddata/dataset/engine/ir/datasetops/skip_node.h"
  27. #include "minddata/dataset/engine/ir/datasetops/zip_node.h"
  28. // IR leaf nodes
  29. #include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h"
  30. #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
  31. #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
  32. using namespace mindspore::dataset::api;
  33. using mindspore::dataset::Tensor;
  34. using mindspore::dataset::TensorShape;
  35. class MindDataTestPipeline : public UT::DatasetOpTesting {
  36. protected:
  37. };
  38. // Tests for datasets (in alphabetical order)
  39. TEST_F(MindDataTestPipeline, TestCelebADataset) {
  40. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADataset.";
  41. // Create a CelebA Dataset
  42. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  43. std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", SequentialSampler(0, 2), false, {});
  44. EXPECT_NE(ds, nullptr);
  45. // Create an iterator over the result of the above dataset
  46. // This will trigger the creation of the Execution Tree and launch it.
  47. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  48. EXPECT_NE(iter, nullptr);
  49. // Iterate the dataset and get each row
  50. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  51. iter->GetNextRow(&row);
  52. // Check if CelebAOp read correct images/attr
  53. std::string expect_file[] = {"1.JPEG", "2.jpg"};
  54. std::vector<std::vector<uint32_t>> expect_attr_vector = {
  55. {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
  56. 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
  57. {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
  58. 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}};
  59. uint64_t i = 0;
  60. while (row.size() != 0) {
  61. auto image = row["image"];
  62. auto attr = row["attr"];
  63. std::shared_ptr<Tensor> expect_image;
  64. Tensor::CreateFromFile(folder_path + expect_file[i], &expect_image);
  65. EXPECT_EQ(*image, *expect_image);
  66. std::shared_ptr<Tensor> expect_attr;
  67. Tensor::CreateFromVector(expect_attr_vector[i], TensorShape({40}), &expect_attr);
  68. EXPECT_EQ(*attr, *expect_attr);
  69. iter->GetNextRow(&row);
  70. i++;
  71. }
  72. EXPECT_EQ(i, 2);
  73. // Manually terminate the pipeline
  74. iter->Stop();
  75. }
  76. TEST_F(MindDataTestPipeline, TestCelebADefault) {
  77. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADefault.";
  78. // Create a CelebA Dataset
  79. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  80. std::shared_ptr<Dataset> ds = CelebA(folder_path);
  81. EXPECT_NE(ds, nullptr);
  82. // Create an iterator over the result of the above dataset
  83. // This will trigger the creation of the Execution Tree and launch it.
  84. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  85. EXPECT_NE(iter, nullptr);
  86. // Iterate the dataset and get each row
  87. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  88. iter->GetNextRow(&row);
  89. // Check if CelebAOp read correct images/attr
  90. uint64_t i = 0;
  91. while (row.size() != 0) {
  92. auto image = row["image"];
  93. auto attr = row["attr"];
  94. MS_LOG(INFO) << "Tensor image shape: " << image->shape();
  95. MS_LOG(INFO) << "Tensor attr shape: " << attr->shape();
  96. iter->GetNextRow(&row);
  97. i++;
  98. }
  99. EXPECT_EQ(i, 4);
  100. // Manually terminate the pipeline
  101. iter->Stop();
  102. }
  103. TEST_F(MindDataTestPipeline, TestGetRepeatCount) {
  104. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGetRepeatCount.";
  105. // Create an ImageFolder Dataset
  106. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  107. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true);
  108. EXPECT_NE(ds, nullptr);
  109. EXPECT_EQ(ds->GetRepeatCount(), 1);
  110. ds = ds->Repeat(4);
  111. EXPECT_NE(ds, nullptr);
  112. EXPECT_EQ(ds->GetRepeatCount(), 4);
  113. ds = ds->Repeat(3);
  114. EXPECT_NE(ds, nullptr);
  115. EXPECT_EQ(ds->GetRepeatCount(), 3);
  116. }
  117. TEST_F(MindDataTestPipeline, TestGetBatchSize) {
  118. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestGetRepeatCount.";
  119. // Create an ImageFolder Dataset
  120. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  121. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true)->Project({"label"});
  122. EXPECT_NE(ds, nullptr);
  123. EXPECT_EQ(ds->GetBatchSize(), 1);
  124. ds = ds->Batch(2);
  125. EXPECT_NE(ds, nullptr);
  126. EXPECT_EQ(ds->GetBatchSize(), 2);
  127. ds = ds->Batch(3);
  128. EXPECT_NE(ds, nullptr);
  129. EXPECT_EQ(ds->GetBatchSize(), 3);
  130. }
  131. TEST_F(MindDataTestPipeline, TestCelebAGetDatasetSize) {
  132. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebAGetDatasetSize.";
  133. // Create a CelebA Dataset
  134. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  135. std::shared_ptr<Dataset> ds = CelebA(folder_path, "valid");
  136. EXPECT_NE(ds, nullptr);
  137. EXPECT_EQ(ds->GetDatasetSize(), 1);
  138. }
  139. TEST_F(MindDataTestPipeline, TestCelebAException) {
  140. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebAException.";
  141. // Create a CelebA Dataset
  142. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  143. std::string invalid_folder_path = "./testNotExist";
  144. std::string invalid_dataset_type = "invalid_type";
  145. std::shared_ptr<Dataset> ds = CelebA(invalid_folder_path);
  146. EXPECT_EQ(ds, nullptr);
  147. std::shared_ptr<Dataset> ds1 = CelebA(folder_path, invalid_dataset_type);
  148. EXPECT_EQ(ds1, nullptr);
  149. }
  150. TEST_F(MindDataTestPipeline, TestCelebADatasetWithNullSampler) {
  151. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADataset.";
  152. // Create a CelebA Dataset
  153. std::string folder_path = datasets_root_path_ + "/testCelebAData/";
  154. std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", nullptr, false, {});
  155. // Expect failure: sampler can not be nullptr
  156. EXPECT_EQ(ds, nullptr);
  157. }
  158. TEST_F(MindDataTestPipeline, TestImageFolderWithWrongDatasetDir) {
  159. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderWithWrongDatasetDir.";
  160. // Create an ImageFolder Dataset
  161. std::shared_ptr<Dataset> ds = ImageFolder("", true, nullptr);
  162. EXPECT_EQ(ds, nullptr);
  163. }
  164. TEST_F(MindDataTestPipeline, TestImageFolderFailWithWrongExtension) {
  165. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithWrongExtension.";
  166. // Create an ImageFolder Dataset
  167. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  168. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 2), {".JGP"});
  169. EXPECT_NE(ds, nullptr);
  170. // Create an iterator over the result of the above dataset
  171. // This will trigger the creation of the Execution Tree and launch it.
  172. std::shared_ptr<Iterator> iter = ds->CreateIterator();
  173. EXPECT_NE(iter, nullptr);
  174. // Iterate the dataset and get each row
  175. std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
  176. iter->GetNextRow(&row);
  177. // Expect no data: can not find files with specified extension
  178. EXPECT_EQ(row.size(), 0);
  179. // Manually terminate the pipeline
  180. iter->Stop();
  181. }
  182. TEST_F(MindDataTestPipeline, TestImageFolderGetDatasetSize) {
  183. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderGetDatasetSize.";
  184. // Create an ImageFolder Dataset
  185. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  186. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true);
  187. EXPECT_NE(ds, nullptr);
  188. EXPECT_EQ(ds->GetDatasetSize(), 44);
  189. }
  190. TEST_F(MindDataTestPipeline, TestImageFolderFailWithNullSampler) {
  191. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithNullSampler.";
  192. // Create an ImageFolder Dataset
  193. std::string folder_path = datasets_root_path_ + "/testPK/data/";
  194. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, nullptr);
  195. // Expect failure: sampler can not be nullptr
  196. EXPECT_EQ(ds, nullptr);
  197. }
  198. TEST_F(MindDataTestPipeline, TestImageFolderFailWithWrongSampler) {
  199. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithWrongSampler.";
  200. // Create a Cifar10 Dataset
  201. std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
  202. std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, SequentialSampler(-2, 5));
  203. // Expect failure: sampler is not construnced correctly
  204. EXPECT_EQ(ds, nullptr);
  205. }
  206. TEST_F(MindDataTestPipeline, TestMnistGetDatasetSize) {
  207. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistGetDatasetSize.";
  208. // Create a Mnist Dataset
  209. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  210. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", RandomSampler(false, 20));
  211. EXPECT_NE(ds, nullptr);
  212. EXPECT_EQ(ds->GetDatasetSize(), 20);
  213. }
  214. TEST_F(MindDataTestPipeline, TestMnistFailWithWrongDatasetDir) {
  215. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithWrongDatasetDir.";
  216. // Create a Mnist Dataset
  217. std::shared_ptr<Dataset> ds = Mnist("", "all", RandomSampler(false, 10));
  218. EXPECT_EQ(ds, nullptr);
  219. }
  220. TEST_F(MindDataTestPipeline, TestMnistFailWithNullSampler) {
  221. MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithNullSampler.";
  222. // Create a Mnist Dataset
  223. std::string folder_path = datasets_root_path_ + "/testMnistData/";
  224. std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", nullptr);
  225. // Expect failure: sampler can not be nullptr
  226. EXPECT_EQ(ds, nullptr);
  227. }