You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

album_op_test.cc 7.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <memory>
  17. #include <string>
  18. #include "common/common.h"
  19. #include "minddata/dataset/core/client.h"
  20. #include "minddata/dataset/engine/datasetops/source/album_op.h"
  21. #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
  22. #include "minddata/dataset/util/status.h"
  23. #include "gtest/gtest.h"
  24. #include "utils/log_adapter.h"
  25. #include "securec.h"
  26. #include "minddata/dataset/include/datasets.h"
  27. #include "minddata/dataset/include/transforms.h"
  28. using namespace mindspore::dataset;
  29. using mindspore::MsLogLevel::ERROR;
  30. using mindspore::ExceptionType::NoExceptionType;
  31. using mindspore::LogStream;
  32. std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
  33. std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
  34. std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
  35. std::shared_ptr<AlbumOp> Album(int64_t num_works, int64_t rows, int64_t conns, std::string path, bool shuf = false,
  36. std::unique_ptr<SamplerRT> sampler = nullptr, bool decode = false) {
  37. std::shared_ptr<AlbumOp> so;
  38. AlbumOp::Builder builder;
  39. Status rc = builder.SetNumWorkers(num_works)
  40. .SetAlbumDir(path)
  41. .SetOpConnectorSize(conns)
  42. .SetExtensions({".json"})
  43. .SetSampler(std::move(sampler))
  44. .SetDecode(decode)
  45. .Build(&so);
  46. return so;
  47. }
  48. std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t rows, int64_t conns, std::string path,
  49. std::string schema_file, std::vector<std::string> column_names = {},
  50. bool shuf = false, std::unique_ptr<SamplerRT> sampler = nullptr,
  51. bool decode = false) {
  52. std::shared_ptr<AlbumOp> so;
  53. AlbumOp::Builder builder;
  54. Status rc = builder.SetNumWorkers(num_works)
  55. .SetSchemaFile(schema_file)
  56. .SetColumnsToLoad(column_names)
  57. .SetAlbumDir(path)
  58. .SetOpConnectorSize(conns)
  59. .SetExtensions({".json"})
  60. .SetSampler(std::move(sampler))
  61. .SetDecode(decode)
  62. .Build(&so);
  63. return so;
  64. }
  65. class MindDataTestAlbum : public UT::DatasetOpTesting {
  66. protected:
  67. };
  68. TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchema) {
  69. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  70. std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
  71. std::vector<std::string> column_names = {"image", "label", "id"};
  72. auto op1 = AlbumSchema(16, 2, 32, folder_path, schema_file, column_names, false);
  73. auto op2 = Repeat(2);
  74. op1->set_total_repeats(2);
  75. op1->set_num_repeats_per_epoch(2);
  76. auto tree = Build({op1, op2});
  77. ASSERT_OK(tree->Prepare());
  78. ASSERT_OK(tree->Launch());
  79. DatasetIterator di(tree);
  80. TensorMap tensor_map;
  81. ASSERT_OK(di.GetNextAsMap(&tensor_map));
  82. uint64_t i = 0;
  83. std::string_view label = 0;
  84. while (tensor_map.size() != 0) {
  85. EXPECT_TRUE(tensor_map["label"]->GetItemAt(&label, {0}));
  86. MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
  87. << tensor_map["label"] << "\n";
  88. i++;
  89. ASSERT_OK(di.GetNextAsMap(&tensor_map));
  90. }
  91. MS_LOG(INFO) << "got rows: " << i << "\n";
  92. EXPECT_TRUE(i == 14);
  93. }
  94. TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaNoOrder) {
  95. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  96. std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
  97. auto op1 = AlbumSchema(16, 2, 32, folder_path, schema_file);
  98. auto op2 = Repeat(2);
  99. op1->set_total_repeats(2);
  100. op1->set_num_repeats_per_epoch(2);
  101. auto tree = Build({op1, op2});
  102. ASSERT_OK(tree->Prepare());
  103. ASSERT_OK(tree->Launch());
  104. DatasetIterator di(tree);
  105. TensorMap tensor_map;
  106. ASSERT_OK(di.GetNextAsMap(&tensor_map));
  107. uint64_t i = 0;
  108. std::string_view label;
  109. while (tensor_map.size() != 0) {
  110. EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
  111. MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
  112. << tensor_map["label"] << "\n";
  113. i++;
  114. ASSERT_OK(di.GetNextAsMap(&tensor_map));
  115. }
  116. MS_LOG(INFO) << "got rows: " << i << "\n";
  117. EXPECT_TRUE(i == 14);
  118. }
  119. TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaFloat) {
  120. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  121. // add the priority column
  122. std::string schema_file = datasets_root_path_ + "/testAlbum/floatSchema.json";
  123. auto op1 = AlbumSchema(16, 2, 32, folder_path, schema_file);
  124. auto op2 = Repeat(2);
  125. op1->set_total_repeats(2);
  126. op1->set_num_repeats_per_epoch(2);
  127. auto tree = Build({op1, op2});
  128. tree->Prepare();
  129. ASSERT_OK(tree->Launch());
  130. DatasetIterator di(tree);
  131. TensorMap tensor_map;
  132. ASSERT_OK(di.GetNextAsMap(&tensor_map));
  133. uint64_t i = 0;
  134. std::string_view label;
  135. double priority = 0;
  136. while (tensor_map.size() != 0) {
  137. EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
  138. EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {0}));
  139. MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
  140. << tensor_map["label"] << "priority: " << priority << "\n";
  141. i++;
  142. ASSERT_OK(di.GetNextAsMap(&tensor_map));
  143. }
  144. MS_LOG(INFO) << "got rows: " << i << "\n";
  145. EXPECT_TRUE(i == 14);
  146. }
  147. TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) {
  148. std::string folder_path = datasets_root_path_ + "/testAlbum/images";
  149. // add the priority column
  150. std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json";
  151. auto op1 = AlbumSchema(16, 2, 32, folder_path, schema_file);
  152. auto op2 = Repeat(2);
  153. op1->set_total_repeats(2);
  154. op1->set_num_repeats_per_epoch(2);
  155. auto tree = Build({op1, op2});
  156. ASSERT_OK(tree->Prepare());
  157. ASSERT_OK(tree->Launch());
  158. DatasetIterator di(tree);
  159. TensorMap tensor_map;
  160. ASSERT_OK(di.GetNextAsMap(&tensor_map));
  161. uint64_t i = 0;
  162. std::string_view label = 0;
  163. double priority = 0;
  164. int64_t id = 0;
  165. while (tensor_map.size() != 0) {
  166. EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
  167. EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {0}));
  168. EXPECT_OK(tensor_map["id"]->GetItemAt<int64_t>(&id, {}));
  169. MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
  170. << tensor_map["label"] << "priority: " << priority
  171. << " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n";
  172. i++;
  173. ASSERT_OK(di.GetNextAsMap(&tensor_map));
  174. }
  175. MS_LOG(INFO) << "got rows: " << i << "\n";
  176. EXPECT_TRUE(i == 14);
  177. }