You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

random_data_op_test.cc 13 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "minddata/dataset/core/client.h"
  17. #include "common/common.h"
  18. #include "gtest/gtest.h"
  19. #include <memory>
  20. #include <vector>
  21. #include <iostream>
  22. #include "minddata/dataset/core/tensor_shape.h"
  23. #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
  24. #include "minddata/dataset/engine/data_schema.h"
  25. using namespace mindspore::dataset;
  26. using mindspore::MsLogLevel::INFO;
  27. using mindspore::ExceptionType::NoExceptionType;
  28. using mindspore::LogStream;
  29. class MindDataTestRandomDataOp : public UT::DatasetOpTesting {
  30. };
  31. // Test info:
  32. // - Simple test with a user-provided schema generated purely from DataSchema C API
  33. // - has an interaction loop
  34. //
  35. // Tree: single node tree with RandomDataOp
  36. //
  37. // RandomDataOp
  38. //
  39. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
  40. Status rc;
  41. int32_t rank = 0; // not used
  42. MS_LOG(INFO) << "UT test RandomDataOpBasic1";
  43. // Start with an empty execution tree
  44. auto myTree = std::make_shared<ExecutionTree>();
  45. // Create a schema using the C api's
  46. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  47. // RandomDataOp can randomly fill in unknown dimension lengths of a shape.
  48. // Most other ops cannot do that as they are limited by the physical data itself. We're
  49. // more flexible with random data since it is just making stuff up on the fly.
  50. TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3});
  51. ColDescriptor c1("image",
  52. DataType(DataType::DE_INT8),
  53. TensorImpl::kFlexible,
  54. rank, // not used
  55. &c1Shape);
  56. // Column 2 will just be a scalar label number
  57. TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor
  58. ColDescriptor c2("label",
  59. DataType(DataType::DE_UINT32),
  60. TensorImpl::kFlexible,
  61. rank,
  62. &c2Shape);
  63. testSchema->AddColumn(c1);
  64. testSchema->AddColumn(c2);
  65. std::shared_ptr<RandomDataOp> myRandomDataOp;
  66. RandomDataOp::Builder builder;
  67. rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(25).Build(&myRandomDataOp);
  68. EXPECT_TRUE(rc.IsOk());
  69. rc = myTree->AssociateNode(myRandomDataOp);
  70. EXPECT_TRUE(rc.IsOk());
  71. rc = myTree->AssignRoot(myRandomDataOp);
  72. EXPECT_TRUE(rc.IsOk());
  73. std::ostringstream ss;
  74. ss << *myRandomDataOp;
  75. MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str();
  76. MS_LOG(INFO) << "Launching tree and begin iteration";
  77. rc = myTree->Prepare();
  78. EXPECT_TRUE(rc.IsOk());
  79. rc = myTree->Launch();
  80. EXPECT_TRUE(rc.IsOk());
  81. // Start the loop of reading tensors from our pipeline
  82. DatasetIterator dI(myTree);
  83. TensorRow tensorList;
  84. rc = dI.FetchNextTensorRow(&tensorList);
  85. EXPECT_TRUE(rc.IsOk());
  86. int rowCount = 0;
  87. while (!tensorList.empty()) {
  88. // Don't display these rows...too big to show
  89. MS_LOG(INFO) << "Row fetched #: " << rowCount;
  90. rc = dI.FetchNextTensorRow(&tensorList);
  91. EXPECT_TRUE(rc.IsOk());
  92. rowCount++;
  93. }
  94. ASSERT_EQ(rowCount, 25);
  95. }
  96. // Test info:
  97. // - Simple test with a randomly generated schema
  98. // - no iteration loop on this one, just create the op
  99. //
  100. // Tree: single node tree with RandomDataOp
  101. //
  102. // RandomDataOp
  103. //
  104. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
  105. Status rc;
  106. MS_LOG(INFO) << "UT test RandomDataOpBasic2";
  107. // Start with an empty execution tree
  108. auto myTree = std::make_shared<ExecutionTree>();
  109. std::shared_ptr<RandomDataOp> myRandomDataOp;
  110. RandomDataOp::Builder builder;
  111. rc = builder.SetNumWorkers(1).Build(&myRandomDataOp);
  112. EXPECT_TRUE(rc.IsOk());
  113. rc = myTree->AssociateNode(myRandomDataOp);
  114. EXPECT_TRUE(rc.IsOk());
  115. rc = myTree->AssignRoot(myRandomDataOp);
  116. EXPECT_TRUE(rc.IsOk());
  117. std::ostringstream ss;
  118. ss << *myRandomDataOp;
  119. MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
  120. }
  121. // Test info:
  122. // - json file test with iteration
  123. //
  124. // Tree: single node tree with RandomDataOp
  125. //
  126. // RandomDataOp
  127. //
  128. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
  129. Status rc;
  130. MS_LOG(INFO) << "UT test RandomDataOpBasic3";
  131. // Start with an empty execution tree
  132. auto myTree = std::make_shared<ExecutionTree>();
  133. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  134. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {});
  135. EXPECT_TRUE(rc.IsOk());
  136. std::shared_ptr<RandomDataOp> myRandomDataOp;
  137. RandomDataOp::Builder builder;
  138. rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
  139. EXPECT_TRUE(rc.IsOk());
  140. rc = myTree->AssociateNode(myRandomDataOp);
  141. EXPECT_TRUE(rc.IsOk());
  142. rc = myTree->AssignRoot(myRandomDataOp);
  143. EXPECT_TRUE(rc.IsOk());
  144. std::ostringstream ss;
  145. ss << *myRandomDataOp;
  146. MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
  147. MS_LOG(INFO) << "Launching tree and begin iteration";
  148. rc = myTree->Prepare();
  149. EXPECT_TRUE(rc.IsOk());
  150. rc = myTree->Launch();
  151. EXPECT_TRUE(rc.IsOk());
  152. // Start the loop of reading tensors from our pipeline
  153. DatasetIterator dI(myTree);
  154. TensorRow tensorList;
  155. rc = dI.FetchNextTensorRow(&tensorList);
  156. EXPECT_TRUE(rc.IsOk());
  157. int rowCount = 0;
  158. while (!tensorList.empty()) {
  159. // Don't display these rows...too big to show
  160. MS_LOG(INFO) << "Row fetched #: " << rowCount;
  161. rc = dI.FetchNextTensorRow(&tensorList);
  162. EXPECT_TRUE(rc.IsOk());
  163. rowCount++;
  164. }
  165. ASSERT_EQ(rowCount, 10);
  166. }
  167. // Test info:
  168. // - json schema input it's a fairly simple one
  169. // - has an interaction loop
  170. //
  171. // Tree: RepeatOp over RandomDataOp
  172. //
  173. // RepeatOp
  174. // |
  175. // RandomDataOp
  176. //
  177. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
  178. Status rc;
  179. MS_LOG(INFO) << "UT test RandomDataOpBasic4";
  180. // Start with an empty execution tree
  181. auto myTree = std::make_shared<ExecutionTree>();
  182. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  183. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  184. EXPECT_TRUE(rc.IsOk());
  185. std::shared_ptr<RandomDataOp> myRandomDataOp;
  186. RandomDataOp::Builder builder;
  187. rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
  188. EXPECT_TRUE(rc.IsOk());
  189. rc = myTree->AssociateNode(myRandomDataOp);
  190. EXPECT_TRUE(rc.IsOk());
  191. uint32_t numRepeats = 2;
  192. std::shared_ptr<RepeatOp> myRepeatOp;
  193. rc = RepeatOp::Builder(numRepeats)
  194. .Build(&myRepeatOp);
  195. EXPECT_TRUE(rc.IsOk());
  196. rc = myTree->AssociateNode(myRepeatOp);
  197. EXPECT_TRUE(rc.IsOk());
  198. myRandomDataOp->set_total_repeats(numRepeats);
  199. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  200. rc = myRepeatOp->AddChild(myRandomDataOp);
  201. EXPECT_TRUE(rc.IsOk());
  202. rc = myTree->AssignRoot(myRepeatOp);
  203. EXPECT_TRUE(rc.IsOk());
  204. MS_LOG(INFO) << "Launching tree and begin iteration";
  205. rc = myTree->Prepare();
  206. EXPECT_TRUE(rc.IsOk());
  207. rc = myTree->Launch();
  208. EXPECT_TRUE(rc.IsOk());
  209. // Start the loop of reading tensors from our pipeline
  210. DatasetIterator dI(myTree);
  211. TensorRow tensorList;
  212. rc = dI.FetchNextTensorRow(&tensorList);
  213. EXPECT_TRUE(rc.IsOk());
  214. int rowCount = 0;
  215. while (!tensorList.empty()) {
  216. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  217. // Display the tensor by calling the printer on it
  218. for (int i = 0; i < tensorList.size(); i++) {
  219. std::ostringstream ss;
  220. ss << *tensorList[i] << std::endl;
  221. MS_LOG(INFO) << "Tensor print: %s" << ss.str();
  222. }
  223. rc = dI.FetchNextTensorRow(&tensorList);
  224. EXPECT_TRUE(rc.IsOk());
  225. rowCount++;
  226. }
  227. ASSERT_EQ(rowCount, 20);
  228. }
  229. // Test info:
  230. // - json schema input it's a fairly simple one
  231. // - has an interaction loop
  232. // - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers
  233. //
  234. // Tree: RepeatOp over RandomDataOp
  235. //
  236. // RepeatOp
  237. // |
  238. // RandomDataOp
  239. //
  240. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
  241. Status rc;
  242. MS_LOG(INFO) << "UT test RandomDataOpBasic5";
  243. // Start with an empty execution tree
  244. auto myTree = std::make_shared<ExecutionTree>();
  245. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  246. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  247. EXPECT_TRUE(rc.IsOk());
  248. std::shared_ptr<RandomDataOp> myRandomDataOp;
  249. RandomDataOp::Builder builder;
  250. rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
  251. EXPECT_TRUE(rc.IsOk());
  252. rc = myTree->AssociateNode(myRandomDataOp);
  253. EXPECT_TRUE(rc.IsOk());
  254. uint32_t numRepeats = 3;
  255. std::shared_ptr<RepeatOp> myRepeatOp;
  256. rc = RepeatOp::Builder(numRepeats)
  257. .Build(&myRepeatOp);
  258. EXPECT_TRUE(rc.IsOk());
  259. rc = myTree->AssociateNode(myRepeatOp);
  260. EXPECT_TRUE(rc.IsOk());
  261. myRandomDataOp->set_total_repeats(numRepeats);
  262. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  263. rc = myRepeatOp->AddChild(myRandomDataOp);
  264. EXPECT_TRUE(rc.IsOk());
  265. rc = myTree->AssignRoot(myRepeatOp);
  266. EXPECT_TRUE(rc.IsOk());
  267. MS_LOG(INFO) << "Launching tree and begin iteration";
  268. rc = myTree->Prepare();
  269. EXPECT_TRUE(rc.IsOk());
  270. rc = myTree->Launch();
  271. EXPECT_TRUE(rc.IsOk());
  272. // Start the loop of reading tensors from our pipeline
  273. DatasetIterator dI(myTree);
  274. TensorRow tensorList;
  275. rc = dI.FetchNextTensorRow(&tensorList);
  276. EXPECT_TRUE(rc.IsOk());
  277. int rowCount = 0;
  278. while (!tensorList.empty()) {
  279. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  280. // Display the tensor by calling the printer on it
  281. for (int i = 0; i < tensorList.size(); i++) {
  282. std::ostringstream ss;
  283. ss << *tensorList[i] << std::endl;
  284. MS_LOG(INFO) << "Tensor print: ", ss.str();
  285. }
  286. rc = dI.FetchNextTensorRow(&tensorList);
  287. EXPECT_TRUE(rc.IsOk());
  288. rowCount++;
  289. }
  290. ASSERT_EQ(rowCount, 30);
  291. }
  292. // Test info:
  293. // - repeat shuffle random
  294. //
  295. // Tree: RepeatOp over RandomDataOp
  296. //
  297. // RepeatOp
  298. // |
  299. // ShuffleOp
  300. // |
  301. // RandomDataOp
  302. //
  303. TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
  304. Status rc;
  305. MS_LOG(INFO) << "UT test RandomDataOpTree1";
  306. // Start with an empty execution tree
  307. auto myTree = std::make_shared<ExecutionTree>();
  308. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  309. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  310. EXPECT_TRUE(rc.IsOk());
  311. std::shared_ptr<RandomDataOp> myRandomDataOp;
  312. RandomDataOp::Builder builder;
  313. rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
  314. EXPECT_TRUE(rc.IsOk());
  315. rc = myTree->AssociateNode(myRandomDataOp);
  316. EXPECT_TRUE(rc.IsOk());
  317. std::shared_ptr<ShuffleOp> myShuffleOp;
  318. rc = ShuffleOp::Builder()
  319. .SetShuffleSize(4)
  320. .Build(&myShuffleOp);
  321. EXPECT_TRUE(rc.IsOk());
  322. rc = myTree->AssociateNode(myShuffleOp);
  323. EXPECT_TRUE(rc.IsOk());
  324. uint32_t numRepeats = 3;
  325. std::shared_ptr<RepeatOp> myRepeatOp;
  326. rc = RepeatOp::Builder(numRepeats)
  327. .Build(&myRepeatOp);
  328. EXPECT_TRUE(rc.IsOk());
  329. rc = myTree->AssociateNode(myRepeatOp);
  330. EXPECT_TRUE(rc.IsOk());
  331. myShuffleOp->set_total_repeats(numRepeats);
  332. myShuffleOp->set_num_repeats_per_epoch(numRepeats);
  333. rc = myRepeatOp->AddChild(myShuffleOp);
  334. EXPECT_TRUE(rc.IsOk());
  335. myRandomDataOp->set_total_repeats(numRepeats);
  336. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  337. rc = myShuffleOp->AddChild(myRandomDataOp);
  338. EXPECT_TRUE(rc.IsOk());
  339. rc = myTree->AssignRoot(myRepeatOp);
  340. EXPECT_TRUE(rc.IsOk());
  341. MS_LOG(INFO) << "Launching tree and begin iteration";
  342. rc = myTree->Prepare();
  343. EXPECT_TRUE(rc.IsOk());
  344. rc = myTree->Launch();
  345. EXPECT_TRUE(rc.IsOk());
  346. // Start the loop of reading tensors from our pipeline
  347. DatasetIterator dI(myTree);
  348. TensorRow tensorList;
  349. rc = dI.FetchNextTensorRow(&tensorList);
  350. EXPECT_TRUE(rc.IsOk());
  351. int rowCount = 0;
  352. while (!tensorList.empty()) {
  353. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  354. // Display the tensor by calling the printer on it
  355. for (int i = 0; i < tensorList.size(); i++) {
  356. std::ostringstream ss;
  357. ss << *tensorList[i] << std::endl;
  358. MS_LOG(INFO) << "Tensor print: " << ss.str();
  359. }
  360. rc = dI.FetchNextTensorRow(&tensorList);
  361. EXPECT_TRUE(rc.IsOk());
  362. rowCount++;
  363. }
  364. ASSERT_EQ(rowCount, 30);
  365. }