You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

shuffle_op_test.cc 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "dataset/core/client.h"
  17. #include "common/common.h"
  18. #include "common/utils.h"
  19. #include "gtest/gtest.h"
  20. #include "utils/log_adapter.h"
  21. #include <memory>
  22. #include <vector>
  23. #include <iostream>
  24. namespace common = mindspore::common;
  25. using namespace mindspore::dataset;
  26. using mindspore::MsLogLevel::INFO;
  27. using mindspore::ExceptionType::NoExceptionType;
  28. using mindspore::LogStream;
  29. class MindDataTestShuffleOp : public UT::DatasetOpTesting {
  30. };
  31. // Test info:
  32. // - Dataset from testDataset1 has 10 rows, 2 columns.
  33. // - RowsPerBuffer buffer setting of 2 divides evenly into total rows.
  34. // - Shuffle size is multiple of rows per buffer.
  35. //
  36. // Tree: shuffle over storage
  37. //
  38. // ShuffleOp
  39. // |
  40. // StorageOp
  41. //
  42. TEST_F(MindDataTestShuffleOp, TestShuffleBasic1) {
  43. Status rc;
  44. MS_LOG(INFO) << "UT test TestShuffleBasic1.";
  45. // Start with an empty execution tree
  46. auto my_tree = std::make_shared<ExecutionTree>();
  47. std::string dataset_path;
  48. dataset_path = datasets_root_path_ + "/testDataset1";
  49. std::shared_ptr<StorageOp> my_storage_op;
  50. rc = StorageOp::Builder()
  51. .SetDatasetFilesDir(dataset_path)
  52. .SetRowsPerBuffer(2)
  53. .SetWorkerConnectorSize(16)
  54. .SetNumWorkers(1)
  55. .Build(&my_storage_op);
  56. EXPECT_TRUE(rc.IsOk());
  57. rc = my_tree->AssociateNode(my_storage_op);
  58. EXPECT_TRUE(rc.IsOk());
  59. std::shared_ptr<ShuffleOp> my_shuffle_op;
  60. rc = ShuffleOp::Builder().SetRowsPerBuffer(2).SetShuffleSize(4).Build(&my_shuffle_op);
  61. EXPECT_TRUE(rc.IsOk());
  62. rc = my_tree->AssociateNode(my_shuffle_op);
  63. EXPECT_TRUE(rc.IsOk());
  64. // Set children/root layout.
  65. rc = my_shuffle_op->AddChild(my_storage_op);
  66. EXPECT_TRUE(rc.IsOk());
  67. rc = my_tree->AssignRoot(my_shuffle_op);
  68. EXPECT_TRUE(rc.IsOk());
  69. MS_LOG(INFO) << "Launching tree and begin iteration.";
  70. rc = my_tree->Prepare();
  71. EXPECT_TRUE(rc.IsOk());
  72. rc = my_tree->Launch();
  73. EXPECT_TRUE(rc.IsOk());
  74. // Start the loop of reading tensors from our pipeline
  75. DatasetIterator di(my_tree);
  76. TensorRow tensor_list;
  77. rc = di.FetchNextTensorRow(&tensor_list);
  78. EXPECT_TRUE(rc.IsOk());
  79. int row_count = 0;
  80. while (!tensor_list.empty()) {
  81. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  82. // Display the tensor by calling the printer on it
  83. for (int i = 0; i < tensor_list.size(); i++) {
  84. std::ostringstream ss;
  85. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  86. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  87. }
  88. rc = di.FetchNextTensorRow(&tensor_list);
  89. EXPECT_TRUE(rc.IsOk());
  90. row_count++;
  91. }
  92. ASSERT_EQ(row_count, 10);
  93. }
  94. // Test info:
  95. // - Dataset from testDataset1 has 10 rows, 2 columns.
  96. // - RowsPerBuffer buffer setting of 3 does not divide evenly into total rows, thereby causing
  97. // partially filled buffers.
  98. // - Shuffle size is not a multiple of rows per buffer.
  99. // - User has provided a non-default seed value.
  100. //
  101. // Tree: shuffle over storage
  102. //
  103. // ShuffleOp
  104. // |
  105. // StorageOp
  106. //
  107. TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) {
  108. Status rc;
  109. MS_LOG(INFO) << "UT test TestShuffleBasic2.";
  110. // Start with an empty execution tree
  111. auto my_tree = std::make_shared<ExecutionTree>();
  112. std::string dataset_path;
  113. dataset_path = datasets_root_path_ + "/testDataset1";
  114. std::shared_ptr<StorageOp> my_storage_op;
  115. rc = StorageOp::Builder()
  116. .SetDatasetFilesDir(dataset_path)
  117. .SetRowsPerBuffer(3)
  118. .SetWorkerConnectorSize(16)
  119. .SetNumWorkers(2)
  120. .Build(&my_storage_op);
  121. ASSERT_TRUE(rc.IsOk());
  122. rc = my_tree->AssociateNode(my_storage_op);
  123. EXPECT_TRUE(rc.IsOk());
  124. std::shared_ptr<ShuffleOp> my_shuffle_op;
  125. rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).SetRowsPerBuffer(3).Build(&my_shuffle_op);
  126. EXPECT_TRUE(rc.IsOk());
  127. rc = my_tree->AssociateNode(my_shuffle_op);
  128. EXPECT_TRUE(rc.IsOk());
  129. // Set children/root layout.
  130. rc = my_shuffle_op->AddChild(my_storage_op);
  131. EXPECT_TRUE(rc.IsOk());
  132. rc = my_tree->AssignRoot(my_shuffle_op);
  133. EXPECT_TRUE(rc.IsOk());
  134. MS_LOG(INFO) << "Launching tree and begin iteration.";
  135. rc = my_tree->Prepare();
  136. EXPECT_TRUE(rc.IsOk());
  137. rc = my_tree->Launch();
  138. EXPECT_TRUE(rc.IsOk());
  139. // Start the loop of reading tensors from our pipeline
  140. DatasetIterator di(my_tree);
  141. TensorRow tensor_list;
  142. rc = di.FetchNextTensorRow(&tensor_list);
  143. EXPECT_TRUE(rc.IsOk());
  144. int row_count = 0;
  145. while (!tensor_list.empty()) {
  146. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  147. // Display the tensor by calling the printer on it
  148. for (int i = 0; i < tensor_list.size(); i++) {
  149. std::ostringstream ss;
  150. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  151. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  152. }
  153. rc = di.FetchNextTensorRow(&tensor_list);
  154. EXPECT_TRUE(rc.IsOk());
  155. row_count++;
  156. }
  157. ASSERT_EQ(row_count, 10);
  158. }
  159. // Test info:
  160. // - Dataset from testDataset1 has 10 rows, 2 columns.
  161. // - RowsPerBuffer buffer setting of 3 does not divide evenly into total rows, thereby causing
  162. // partially filled buffers
  163. // - Shuffle size captures the entire dataset size (actually sets a value that is larger than the
  164. // amount of rows in the dataset.
  165. //
  166. // Tree: shuffle over storage
  167. //
  168. // ShuffleOp
  169. // |
  170. // StorageOp
  171. //
  172. TEST_F(MindDataTestShuffleOp, TestShuffleBasic3) {
  173. Status rc;
  174. MS_LOG(INFO) << "UT test TestShuffleBasic3.";
  175. // Start with an empty execution tree
  176. auto my_tree = std::make_shared<ExecutionTree>();
  177. std::string dataset_path;
  178. dataset_path = datasets_root_path_ + "/testDataset1";
  179. std::shared_ptr<StorageOp> my_storage_op;
  180. rc = StorageOp::Builder()
  181. .SetDatasetFilesDir(dataset_path)
  182. .SetRowsPerBuffer(3)
  183. .SetWorkerConnectorSize(16)
  184. .SetNumWorkers(2)
  185. .Build(&my_storage_op);
  186. EXPECT_TRUE(rc.IsOk());
  187. my_tree->AssociateNode(my_storage_op);
  188. std::shared_ptr<ShuffleOp> my_shuffle_op;
  189. rc = ShuffleOp::Builder().SetShuffleSize(100).SetRowsPerBuffer(3).Build(&my_shuffle_op);
  190. EXPECT_TRUE(rc.IsOk());
  191. rc = my_tree->AssociateNode(my_shuffle_op);
  192. EXPECT_TRUE(rc.IsOk());
  193. // Set children/root layout.
  194. rc = my_shuffle_op->AddChild(my_storage_op);
  195. EXPECT_TRUE(rc.IsOk());
  196. rc = my_tree->AssignRoot(my_shuffle_op);
  197. EXPECT_TRUE(rc.IsOk());
  198. MS_LOG(INFO) << "Launching tree and begin iteration.";
  199. rc = my_tree->Prepare();
  200. EXPECT_TRUE(rc.IsOk());
  201. rc = my_tree->Launch();
  202. EXPECT_TRUE(rc.IsOk());
  203. // Start the loop of reading tensors from our pipeline
  204. DatasetIterator di(my_tree);
  205. TensorRow tensor_list;
  206. rc = di.FetchNextTensorRow(&tensor_list);
  207. EXPECT_TRUE(rc.IsOk());
  208. int row_count = 0;
  209. while (!tensor_list.empty()) {
  210. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  211. // Display the tensor by calling the printer on it
  212. for (int i = 0; i < tensor_list.size(); i++) {
  213. std::ostringstream ss;
  214. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  215. MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()) << ".";
  216. }
  217. rc = di.FetchNextTensorRow(&tensor_list);
  218. EXPECT_TRUE(rc.IsOk());
  219. row_count++;
  220. }
  221. ASSERT_EQ(row_count, 10);
  222. }
  223. // Test info:
  224. // - Dataset from testDataset1 has 10 rows, 2 columns.
  225. // - RowsPerBuffer buffer setting of 3 does not divide evenly into total rows thereby causing
  226. // partially filled buffers
  227. // - Shuffle size is not a multiple of rows per buffer.
  228. // - shuffle seed is given, and subsequent epochs will change the seed each time.
  229. // - Repeat count of 2
  230. //
  231. // Tree: Repeat over shuffle over storage
  232. //
  233. // Repeat
  234. // |
  235. // shuffle
  236. // |
  237. // StorageOp
  238. //
  239. TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) {
  240. Status rc;
  241. MS_LOG(INFO) << "UT test TestRepeatShuffle.";
  242. // Start with an empty execution tree
  243. auto my_tree = std::make_shared<ExecutionTree>();
  244. std::string dataset_path;
  245. dataset_path = datasets_root_path_ + "/testDataset1";
  246. std::shared_ptr<StorageOp> my_storage_op;
  247. rc = StorageOp::Builder()
  248. .SetDatasetFilesDir(dataset_path)
  249. .SetRowsPerBuffer(3)
  250. .SetWorkerConnectorSize(16)
  251. .SetNumWorkers(2)
  252. .Build(&my_storage_op);
  253. EXPECT_TRUE(rc.IsOk());
  254. rc = my_tree->AssociateNode(my_storage_op);
  255. EXPECT_TRUE(rc.IsOk());
  256. std::shared_ptr<ShuffleOp> my_shuffle_op;
  257. rc = ShuffleOp::Builder()
  258. .SetShuffleSize(4)
  259. .SetShuffleSeed(100)
  260. .SetRowsPerBuffer(3)
  261. .SetReshuffleEachEpoch(true)
  262. .Build(&my_shuffle_op);
  263. EXPECT_TRUE(rc.IsOk());
  264. rc = my_tree->AssociateNode(my_shuffle_op);
  265. EXPECT_TRUE(rc.IsOk());
  266. uint32_t numRepeats = 2;
  267. std::shared_ptr<RepeatOp> my_repeat_op;
  268. rc = RepeatOp::Builder(numRepeats).Build(&my_repeat_op);
  269. EXPECT_TRUE(rc.IsOk());
  270. rc = my_tree->AssociateNode(my_repeat_op);
  271. EXPECT_TRUE(rc.IsOk());
  272. // Set children/root layout.
  273. rc = my_repeat_op->AddChild(my_shuffle_op);
  274. EXPECT_TRUE(rc.IsOk());
  275. rc = my_shuffle_op->AddChild(my_storage_op);
  276. EXPECT_TRUE(rc.IsOk());
  277. rc = my_tree->AssignRoot(my_repeat_op);
  278. EXPECT_TRUE(rc.IsOk());
  279. MS_LOG(INFO) << "Launching tree and begin iteration.";
  280. rc = my_tree->Prepare();
  281. EXPECT_TRUE(rc.IsOk());
  282. rc = my_tree->Launch();
  283. EXPECT_TRUE(rc.IsOk());
  284. // Start the loop of reading tensors from our pipeline
  285. DatasetIterator di(my_tree);
  286. TensorRow tensor_list;
  287. rc = di.FetchNextTensorRow(&tensor_list);
  288. EXPECT_TRUE(rc.IsOk());
  289. int row_count = 0;
  290. while (!tensor_list.empty()) {
  291. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  292. // Display the tensor by calling the printer on it
  293. for (int i = 0; i < tensor_list.size(); i++) {
  294. std::ostringstream ss;
  295. ss << *tensor_list[i] << std::endl;
  296. MS_LOG(INFO) << "Tensor print: " << common::SafeCStr(ss.str()) << ".";
  297. }
  298. rc = di.FetchNextTensorRow(&tensor_list);
  299. EXPECT_TRUE(rc.IsOk());
  300. row_count++;
  301. }
  302. ASSERT_EQ(row_count, 20);
  303. }