You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

shuffle_op_test.cc 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "minddata/dataset/core/client.h"
  17. #include "common/common.h"
  18. #include "utils/ms_utils.h"
  19. #include "gtest/gtest.h"
  20. #include "utils/log_adapter.h"
  21. #include <memory>
  22. #include <vector>
  23. #include <iostream>
  24. namespace common = mindspore::common;
  25. using namespace mindspore::dataset;
  26. using mindspore::MsLogLevel::INFO;
  27. using mindspore::ExceptionType::NoExceptionType;
  28. using mindspore::LogStream;
  29. class MindDataTestShuffleOp : public UT::DatasetOpTesting {
  30. };
  31. // Test info:
  32. // - Dataset from testDataset1 has 10 rows, 2 columns.
  33. // - RowsPerBuffer buffer setting of 2 divides evenly into total rows.
  34. // - Shuffle size is multiple of rows per buffer.
  35. //
  36. // Tree: shuffle over TFReader
  37. //
  38. // ShuffleOp
  39. // |
  40. // TFReaderOp
  41. //
  42. TEST_F(MindDataTestShuffleOp, TestShuffleBasic1) {
  43. Status rc;
  44. MS_LOG(INFO) << "UT test TestShuffleBasic1.";
  45. // Start with an empty execution tree
  46. auto my_tree = std::make_shared<ExecutionTree>();
  47. std::string dataset_path;
  48. dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data";
  49. std::shared_ptr<TFReaderOp> my_tfreader_op;
  50. rc = TFReaderOp::Builder()
  51. .SetDatasetFilesList({dataset_path})
  52. .SetWorkerConnectorSize(16)
  53. .SetNumWorkers(1)
  54. .Build(&my_tfreader_op);
  55. EXPECT_TRUE(rc.IsOk());
  56. rc = my_tree->AssociateNode(my_tfreader_op);
  57. EXPECT_TRUE(rc.IsOk());
  58. std::shared_ptr<ShuffleOp> my_shuffle_op;
  59. rc = ShuffleOp::Builder().SetShuffleSize(4).Build(&my_shuffle_op);
  60. EXPECT_TRUE(rc.IsOk());
  61. rc = my_tree->AssociateNode(my_shuffle_op);
  62. EXPECT_TRUE(rc.IsOk());
  63. // Set children/root layout.
  64. rc = my_shuffle_op->AddChild(my_tfreader_op);
  65. EXPECT_TRUE(rc.IsOk());
  66. rc = my_tree->AssignRoot(my_shuffle_op);
  67. EXPECT_TRUE(rc.IsOk());
  68. MS_LOG(INFO) << "Launching tree and begin iteration.";
  69. rc = my_tree->Prepare();
  70. EXPECT_TRUE(rc.IsOk());
  71. rc = my_tree->Launch();
  72. EXPECT_TRUE(rc.IsOk());
  73. // Start the loop of reading tensors from our pipeline
  74. DatasetIterator di(my_tree);
  75. TensorRow tensor_list;
  76. rc = di.FetchNextTensorRow(&tensor_list);
  77. EXPECT_TRUE(rc.IsOk());
  78. int row_count = 0;
  79. while (!tensor_list.empty()) {
  80. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  81. // Display the tensor by calling the printer on it
  82. for (int i = 0; i < tensor_list.size(); i++) {
  83. std::ostringstream ss;
  84. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  85. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  86. }
  87. rc = di.FetchNextTensorRow(&tensor_list);
  88. EXPECT_TRUE(rc.IsOk());
  89. row_count++;
  90. }
  91. ASSERT_EQ(row_count, 10);
  92. }
  93. // Test info:
  94. // - Dataset from testDataset1 has 10 rows, 2 columns.
  95. // - RowsPerBuffer buffer setting of 3 does not divide evenly into total rows, thereby causing
  96. // partially filled buffers.
  97. // - Shuffle size is not a multiple of rows per buffer.
  98. // - User has provided a non-default seed value.
  99. //
  100. // Tree: shuffle over TFReader
  101. //
  102. // ShuffleOp
  103. // |
  104. // TFReaderOp
  105. //
  106. TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) {
  107. Status rc;
  108. MS_LOG(INFO) << "UT test TestShuffleBasic2.";
  109. // Start with an empty execution tree
  110. auto my_tree = std::make_shared<ExecutionTree>();
  111. std::string dataset_path;
  112. dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data";
  113. std::shared_ptr<TFReaderOp> my_tfreader_op;
  114. rc = TFReaderOp::Builder()
  115. .SetDatasetFilesList({dataset_path})
  116. .SetWorkerConnectorSize(16)
  117. .SetNumWorkers(2)
  118. .Build(&my_tfreader_op);
  119. ASSERT_TRUE(rc.IsOk());
  120. rc = my_tree->AssociateNode(my_tfreader_op);
  121. EXPECT_TRUE(rc.IsOk());
  122. std::shared_ptr<ShuffleOp> my_shuffle_op;
  123. rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).Build(&my_shuffle_op);
  124. EXPECT_TRUE(rc.IsOk());
  125. rc = my_tree->AssociateNode(my_shuffle_op);
  126. EXPECT_TRUE(rc.IsOk());
  127. // Set children/root layout.
  128. rc = my_shuffle_op->AddChild(my_tfreader_op);
  129. EXPECT_TRUE(rc.IsOk());
  130. rc = my_tree->AssignRoot(my_shuffle_op);
  131. EXPECT_TRUE(rc.IsOk());
  132. MS_LOG(INFO) << "Launching tree and begin iteration.";
  133. rc = my_tree->Prepare();
  134. EXPECT_TRUE(rc.IsOk());
  135. rc = my_tree->Launch();
  136. EXPECT_TRUE(rc.IsOk());
  137. // Start the loop of reading tensors from our pipeline
  138. DatasetIterator di(my_tree);
  139. TensorRow tensor_list;
  140. rc = di.FetchNextTensorRow(&tensor_list);
  141. EXPECT_TRUE(rc.IsOk());
  142. int row_count = 0;
  143. while (!tensor_list.empty()) {
  144. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  145. // Display the tensor by calling the printer on it
  146. for (int i = 0; i < tensor_list.size(); i++) {
  147. std::ostringstream ss;
  148. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  149. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  150. }
  151. rc = di.FetchNextTensorRow(&tensor_list);
  152. EXPECT_TRUE(rc.IsOk());
  153. row_count++;
  154. }
  155. ASSERT_EQ(row_count, 10);
  156. }
  157. // Test info:
  158. // - Dataset from testDataset1 has 10 rows, 2 columns.
  159. // - RowsPerBuffer buffer setting of 3 does not divide evenly into total rows, thereby causing
  160. // partially filled buffers
  161. // - Shuffle size captures the entire dataset size (actually sets a value that is larger than the
  162. // amount of rows in the dataset.
  163. //
  164. // Tree: shuffle over TFReader
  165. //
  166. // ShuffleOp
  167. // |
  168. // TFReaderOp
  169. //
  170. TEST_F(MindDataTestShuffleOp, TestShuffleBasic3) {
  171. Status rc;
  172. MS_LOG(INFO) << "UT test TestShuffleBasic3.";
  173. // Start with an empty execution tree
  174. auto my_tree = std::make_shared<ExecutionTree>();
  175. std::string dataset_path;
  176. dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data";
  177. std::shared_ptr<TFReaderOp> my_tfreader_op;
  178. rc = TFReaderOp::Builder()
  179. .SetDatasetFilesList({dataset_path})
  180. .SetWorkerConnectorSize(16)
  181. .SetNumWorkers(2)
  182. .Build(&my_tfreader_op);
  183. EXPECT_TRUE(rc.IsOk());
  184. my_tree->AssociateNode(my_tfreader_op);
  185. std::shared_ptr<ShuffleOp> my_shuffle_op;
  186. rc = ShuffleOp::Builder().SetShuffleSize(100).Build(&my_shuffle_op);
  187. EXPECT_TRUE(rc.IsOk());
  188. rc = my_tree->AssociateNode(my_shuffle_op);
  189. EXPECT_TRUE(rc.IsOk());
  190. // Set children/root layout.
  191. rc = my_shuffle_op->AddChild(my_tfreader_op);
  192. EXPECT_TRUE(rc.IsOk());
  193. rc = my_tree->AssignRoot(my_shuffle_op);
  194. EXPECT_TRUE(rc.IsOk());
  195. MS_LOG(INFO) << "Launching tree and begin iteration.";
  196. rc = my_tree->Prepare();
  197. EXPECT_TRUE(rc.IsOk());
  198. rc = my_tree->Launch();
  199. EXPECT_TRUE(rc.IsOk());
  200. // Start the loop of reading tensors from our pipeline
  201. DatasetIterator di(my_tree);
  202. TensorRow tensor_list;
  203. rc = di.FetchNextTensorRow(&tensor_list);
  204. EXPECT_TRUE(rc.IsOk());
  205. int row_count = 0;
  206. while (!tensor_list.empty()) {
  207. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  208. // Display the tensor by calling the printer on it
  209. for (int i = 0; i < tensor_list.size(); i++) {
  210. std::ostringstream ss;
  211. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  212. MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()) << ".";
  213. }
  214. rc = di.FetchNextTensorRow(&tensor_list);
  215. EXPECT_TRUE(rc.IsOk());
  216. row_count++;
  217. }
  218. ASSERT_EQ(row_count, 10);
  219. }
  220. // Test info:
  221. // - Dataset from testDataset1 has 10 rows, 2 columns.
  222. // - RowsPerBuffer buffer setting of 3 does not divide evenly into total rows thereby causing
  223. // partially filled buffers
  224. // - Shuffle size is not a multiple of rows per buffer.
  225. // - shuffle seed is given, and subsequent epochs will change the seed each time.
  226. // - Repeat count of 2
  227. //
  228. // Tree: Repeat over shuffle over TFReader
  229. //
  230. // Repeat
  231. // |
  232. // shuffle
  233. // |
  234. // TFReaderOp
  235. //
  236. TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) {
  237. Status rc;
  238. MS_LOG(INFO) << "UT test TestRepeatShuffle.";
  239. // Start with an empty execution tree
  240. auto my_tree = std::make_shared<ExecutionTree>();
  241. std::string dataset_path;
  242. dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data";
  243. std::shared_ptr<TFReaderOp> my_tfreader_op;
  244. rc = TFReaderOp::Builder()
  245. .SetDatasetFilesList({dataset_path})
  246. .SetWorkerConnectorSize(16)
  247. .SetNumWorkers(2)
  248. .Build(&my_tfreader_op);
  249. EXPECT_TRUE(rc.IsOk());
  250. rc = my_tree->AssociateNode(my_tfreader_op);
  251. EXPECT_TRUE(rc.IsOk());
  252. std::shared_ptr<ShuffleOp> my_shuffle_op;
  253. rc = ShuffleOp::Builder()
  254. .SetShuffleSize(4)
  255. .SetShuffleSeed(100)
  256. .SetReshuffleEachEpoch(true)
  257. .Build(&my_shuffle_op);
  258. EXPECT_TRUE(rc.IsOk());
  259. rc = my_tree->AssociateNode(my_shuffle_op);
  260. EXPECT_TRUE(rc.IsOk());
  261. uint32_t numRepeats = 2;
  262. std::shared_ptr<RepeatOp> my_repeat_op;
  263. rc = RepeatOp::Builder(numRepeats).Build(&my_repeat_op);
  264. EXPECT_TRUE(rc.IsOk());
  265. rc = my_tree->AssociateNode(my_repeat_op);
  266. EXPECT_TRUE(rc.IsOk());
  267. // Set children/root layout.
  268. my_shuffle_op->set_total_repeats(numRepeats);
  269. my_shuffle_op->set_num_repeats_per_epoch(numRepeats);
  270. rc = my_repeat_op->AddChild(my_shuffle_op);
  271. EXPECT_TRUE(rc.IsOk());
  272. my_tfreader_op->set_total_repeats(numRepeats);
  273. my_tfreader_op->set_num_repeats_per_epoch(numRepeats);
  274. rc = my_shuffle_op->AddChild(my_tfreader_op);
  275. EXPECT_TRUE(rc.IsOk());
  276. rc = my_tree->AssignRoot(my_repeat_op);
  277. EXPECT_TRUE(rc.IsOk());
  278. MS_LOG(INFO) << "Launching tree and begin iteration.";
  279. rc = my_tree->Prepare();
  280. EXPECT_TRUE(rc.IsOk());
  281. rc = my_tree->Launch();
  282. EXPECT_TRUE(rc.IsOk());
  283. // Start the loop of reading tensors from our pipeline
  284. DatasetIterator di(my_tree);
  285. TensorRow tensor_list;
  286. rc = di.FetchNextTensorRow(&tensor_list);
  287. EXPECT_TRUE(rc.IsOk());
  288. int row_count = 0;
  289. while (!tensor_list.empty()) {
  290. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  291. // Display the tensor by calling the printer on it
  292. for (int i = 0; i < tensor_list.size(); i++) {
  293. std::ostringstream ss;
  294. ss << *tensor_list[i] << std::endl;
  295. MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()) << ".";
  296. }
  297. rc = di.FetchNextTensorRow(&tensor_list);
  298. EXPECT_TRUE(rc.IsOk());
  299. row_count++;
  300. }
  301. ASSERT_EQ(row_count, 20);
  302. }