You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

random_data_op_test.cc 13 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "minddata/dataset/core/client.h"
  17. #include "common/common.h"
  18. #include "gtest/gtest.h"
  19. #include <memory>
  20. #include <vector>
  21. #include <iostream>
  22. #include "minddata/dataset/core/tensor_shape.h"
  23. #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
  24. #include "minddata/dataset/engine/data_schema.h"
  25. using namespace mindspore::dataset;
  26. using mindspore::MsLogLevel::INFO;
  27. using mindspore::ExceptionType::NoExceptionType;
  28. using mindspore::LogStream;
  29. class MindDataTestRandomDataOp : public UT::DatasetOpTesting {
  30. };
  31. // Test info:
  32. // - Simple test with a user-provided schema generated purely from DataSchema C API
  33. // - has an interaction loop
  34. //
  35. // Tree: single node tree with RandomDataOp
  36. //
  37. // RandomDataOp
  38. //
  39. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
  40. Status rc;
  41. int32_t rank = 0; // not used
  42. MS_LOG(INFO) << "UT test RandomDataOpBasic1";
  43. // Start with an empty execution tree
  44. auto myTree = std::make_shared<ExecutionTree>();
  45. // Create a schema using the C api's
  46. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  47. // RandomDataOp can randomly fill in unknown dimension lengths of a shape.
  48. // Most other ops cannot do that as they are limited by the physical data itself. We're
  49. // more flexible with random data since it is just making stuff up on the fly.
  50. TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3});
  51. ColDescriptor c1("image",
  52. DataType(DataType::DE_INT8),
  53. TensorImpl::kFlexible,
  54. rank, // not used
  55. &c1Shape);
  56. // Column 2 will just be a scalar label number
  57. TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor
  58. ColDescriptor c2("label",
  59. DataType(DataType::DE_UINT32),
  60. TensorImpl::kFlexible,
  61. rank,
  62. &c2Shape);
  63. testSchema->AddColumn(c1);
  64. testSchema->AddColumn(c2);
  65. std::shared_ptr<RandomDataOp> myRandomDataOp;
  66. RandomDataOp::Builder builder;
  67. rc = builder.SetRowsPerBuffer(2)
  68. .SetNumWorkers(1)
  69. .SetDataSchema(std::move(testSchema))
  70. .SetTotalRows(25)
  71. .Build(&myRandomDataOp);
  72. EXPECT_TRUE(rc.IsOk());
  73. rc = myTree->AssociateNode(myRandomDataOp);
  74. EXPECT_TRUE(rc.IsOk());
  75. rc = myTree->AssignRoot(myRandomDataOp);
  76. EXPECT_TRUE(rc.IsOk());
  77. std::ostringstream ss;
  78. ss << *myRandomDataOp;
  79. MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str();
  80. MS_LOG(INFO) << "Launching tree and begin iteration";
  81. rc = myTree->Prepare();
  82. EXPECT_TRUE(rc.IsOk());
  83. rc = myTree->Launch();
  84. EXPECT_TRUE(rc.IsOk());
  85. // Start the loop of reading tensors from our pipeline
  86. DatasetIterator dI(myTree);
  87. TensorRow tensorList;
  88. rc = dI.FetchNextTensorRow(&tensorList);
  89. EXPECT_TRUE(rc.IsOk());
  90. int rowCount = 0;
  91. while (!tensorList.empty()) {
  92. // Don't display these rows...too big to show
  93. MS_LOG(INFO) << "Row fetched #: " << rowCount;
  94. rc = dI.FetchNextTensorRow(&tensorList);
  95. EXPECT_TRUE(rc.IsOk());
  96. rowCount++;
  97. }
  98. ASSERT_EQ(rowCount, 25);
  99. }
  100. // Test info:
  101. // - Simple test with a randomly generated schema
  102. // - no iteration loop on this one, just create the op
  103. //
  104. // Tree: single node tree with RandomDataOp
  105. //
  106. // RandomDataOp
  107. //
  108. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
  109. Status rc;
  110. MS_LOG(INFO) << "UT test RandomDataOpBasic2";
  111. // Start with an empty execution tree
  112. auto myTree = std::make_shared<ExecutionTree>();
  113. std::shared_ptr<RandomDataOp> myRandomDataOp;
  114. RandomDataOp::Builder builder;
  115. rc = builder.SetRowsPerBuffer(2)
  116. .SetNumWorkers(1)
  117. .Build(&myRandomDataOp);
  118. EXPECT_TRUE(rc.IsOk());
  119. rc = myTree->AssociateNode(myRandomDataOp);
  120. EXPECT_TRUE(rc.IsOk());
  121. rc = myTree->AssignRoot(myRandomDataOp);
  122. EXPECT_TRUE(rc.IsOk());
  123. std::ostringstream ss;
  124. ss << *myRandomDataOp;
  125. MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
  126. }
  127. // Test info:
  128. // - json file test with iteration
  129. //
  130. // Tree: single node tree with RandomDataOp
  131. //
  132. // RandomDataOp
  133. //
  134. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
  135. Status rc;
  136. MS_LOG(INFO) << "UT test RandomDataOpBasic3";
  137. // Start with an empty execution tree
  138. auto myTree = std::make_shared<ExecutionTree>();
  139. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  140. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {});
  141. EXPECT_TRUE(rc.IsOk());
  142. std::shared_ptr<RandomDataOp> myRandomDataOp;
  143. RandomDataOp::Builder builder;
  144. rc = builder.SetRowsPerBuffer(2)
  145. .SetNumWorkers(1)
  146. .SetDataSchema(std::move(testSchema))
  147. .SetTotalRows(10)
  148. .Build(&myRandomDataOp);
  149. EXPECT_TRUE(rc.IsOk());
  150. rc = myTree->AssociateNode(myRandomDataOp);
  151. EXPECT_TRUE(rc.IsOk());
  152. rc = myTree->AssignRoot(myRandomDataOp);
  153. EXPECT_TRUE(rc.IsOk());
  154. std::ostringstream ss;
  155. ss << *myRandomDataOp;
  156. MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
  157. MS_LOG(INFO) << "Launching tree and begin iteration";
  158. rc = myTree->Prepare();
  159. EXPECT_TRUE(rc.IsOk());
  160. rc = myTree->Launch();
  161. EXPECT_TRUE(rc.IsOk());
  162. // Start the loop of reading tensors from our pipeline
  163. DatasetIterator dI(myTree);
  164. TensorRow tensorList;
  165. rc = dI.FetchNextTensorRow(&tensorList);
  166. EXPECT_TRUE(rc.IsOk());
  167. int rowCount = 0;
  168. while (!tensorList.empty()) {
  169. // Don't display these rows...too big to show
  170. MS_LOG(INFO) << "Row fetched #: " << rowCount;
  171. rc = dI.FetchNextTensorRow(&tensorList);
  172. EXPECT_TRUE(rc.IsOk());
  173. rowCount++;
  174. }
  175. ASSERT_EQ(rowCount, 10);
  176. }
  177. // Test info:
  178. // - json schema input it's a fairly simple one
  179. // - has an interaction loop
  180. //
  181. // Tree: RepeatOp over RandomDataOp
  182. //
  183. // RepeatOp
  184. // |
  185. // RandomDataOp
  186. //
  187. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
  188. Status rc;
  189. MS_LOG(INFO) << "UT test RandomDataOpBasic4";
  190. // Start with an empty execution tree
  191. auto myTree = std::make_shared<ExecutionTree>();
  192. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  193. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  194. EXPECT_TRUE(rc.IsOk());
  195. std::shared_ptr<RandomDataOp> myRandomDataOp;
  196. RandomDataOp::Builder builder;
  197. rc = builder.SetRowsPerBuffer(2)
  198. .SetNumWorkers(1)
  199. .SetDataSchema(std::move(testSchema))
  200. .SetTotalRows(10)
  201. .Build(&myRandomDataOp);
  202. EXPECT_TRUE(rc.IsOk());
  203. rc = myTree->AssociateNode(myRandomDataOp);
  204. EXPECT_TRUE(rc.IsOk());
  205. uint32_t numRepeats = 2;
  206. std::shared_ptr<RepeatOp> myRepeatOp;
  207. rc = RepeatOp::Builder(numRepeats)
  208. .Build(&myRepeatOp);
  209. EXPECT_TRUE(rc.IsOk());
  210. rc = myTree->AssociateNode(myRepeatOp);
  211. EXPECT_TRUE(rc.IsOk());
  212. myRandomDataOp->set_total_repeats(numRepeats);
  213. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  214. rc = myRepeatOp->AddChild(myRandomDataOp);
  215. EXPECT_TRUE(rc.IsOk());
  216. rc = myTree->AssignRoot(myRepeatOp);
  217. EXPECT_TRUE(rc.IsOk());
  218. MS_LOG(INFO) << "Launching tree and begin iteration";
  219. rc = myTree->Prepare();
  220. EXPECT_TRUE(rc.IsOk());
  221. rc = myTree->Launch();
  222. EXPECT_TRUE(rc.IsOk());
  223. // Start the loop of reading tensors from our pipeline
  224. DatasetIterator dI(myTree);
  225. TensorRow tensorList;
  226. rc = dI.FetchNextTensorRow(&tensorList);
  227. EXPECT_TRUE(rc.IsOk());
  228. int rowCount = 0;
  229. while (!tensorList.empty()) {
  230. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  231. // Display the tensor by calling the printer on it
  232. for (int i = 0; i < tensorList.size(); i++) {
  233. std::ostringstream ss;
  234. ss << *tensorList[i] << std::endl;
  235. MS_LOG(INFO) << "Tensor print: %s" << ss.str();
  236. }
  237. rc = dI.FetchNextTensorRow(&tensorList);
  238. EXPECT_TRUE(rc.IsOk());
  239. rowCount++;
  240. }
  241. ASSERT_EQ(rowCount, 20);
  242. }
  243. // Test info:
  244. // - json schema input it's a fairly simple one
  245. // - has an interaction loop
  246. // - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers
  247. //
  248. // Tree: RepeatOp over RandomDataOp
  249. //
  250. // RepeatOp
  251. // |
  252. // RandomDataOp
  253. //
  254. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
  255. Status rc;
  256. MS_LOG(INFO) << "UT test RandomDataOpBasic5";
  257. // Start with an empty execution tree
  258. auto myTree = std::make_shared<ExecutionTree>();
  259. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  260. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  261. EXPECT_TRUE(rc.IsOk());
  262. std::shared_ptr<RandomDataOp> myRandomDataOp;
  263. RandomDataOp::Builder builder;
  264. rc = builder.SetRowsPerBuffer(2)
  265. .SetNumWorkers(4)
  266. .SetDataSchema(std::move(testSchema))
  267. .SetTotalRows(10)
  268. .Build(&myRandomDataOp);
  269. EXPECT_TRUE(rc.IsOk());
  270. rc = myTree->AssociateNode(myRandomDataOp);
  271. EXPECT_TRUE(rc.IsOk());
  272. uint32_t numRepeats = 3;
  273. std::shared_ptr<RepeatOp> myRepeatOp;
  274. rc = RepeatOp::Builder(numRepeats)
  275. .Build(&myRepeatOp);
  276. EXPECT_TRUE(rc.IsOk());
  277. rc = myTree->AssociateNode(myRepeatOp);
  278. EXPECT_TRUE(rc.IsOk());
  279. myRandomDataOp->set_total_repeats(numRepeats);
  280. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  281. rc = myRepeatOp->AddChild(myRandomDataOp);
  282. EXPECT_TRUE(rc.IsOk());
  283. rc = myTree->AssignRoot(myRepeatOp);
  284. EXPECT_TRUE(rc.IsOk());
  285. MS_LOG(INFO) << "Launching tree and begin iteration";
  286. rc = myTree->Prepare();
  287. EXPECT_TRUE(rc.IsOk());
  288. rc = myTree->Launch();
  289. EXPECT_TRUE(rc.IsOk());
  290. // Start the loop of reading tensors from our pipeline
  291. DatasetIterator dI(myTree);
  292. TensorRow tensorList;
  293. rc = dI.FetchNextTensorRow(&tensorList);
  294. EXPECT_TRUE(rc.IsOk());
  295. int rowCount = 0;
  296. while (!tensorList.empty()) {
  297. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  298. // Display the tensor by calling the printer on it
  299. for (int i = 0; i < tensorList.size(); i++) {
  300. std::ostringstream ss;
  301. ss << *tensorList[i] << std::endl;
  302. MS_LOG(INFO) << "Tensor print: ", ss.str();
  303. }
  304. rc = dI.FetchNextTensorRow(&tensorList);
  305. EXPECT_TRUE(rc.IsOk());
  306. rowCount++;
  307. }
  308. ASSERT_EQ(rowCount, 30);
  309. }
  310. // Test info:
  311. // - repeat shuffle random
  312. //
  313. // Tree: RepeatOp over RandomDataOp
  314. //
  315. // RepeatOp
  316. // |
  317. // ShuffleOp
  318. // |
  319. // RandomDataOp
  320. //
  321. TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
  322. Status rc;
  323. MS_LOG(INFO) << "UT test RandomDataOpTree1";
  324. // Start with an empty execution tree
  325. auto myTree = std::make_shared<ExecutionTree>();
  326. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  327. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  328. EXPECT_TRUE(rc.IsOk());
  329. std::shared_ptr<RandomDataOp> myRandomDataOp;
  330. RandomDataOp::Builder builder;
  331. rc = builder.SetRowsPerBuffer(2)
  332. .SetNumWorkers(4)
  333. .SetDataSchema(std::move(testSchema))
  334. .SetTotalRows(10)
  335. .Build(&myRandomDataOp);
  336. EXPECT_TRUE(rc.IsOk());
  337. rc = myTree->AssociateNode(myRandomDataOp);
  338. EXPECT_TRUE(rc.IsOk());
  339. std::shared_ptr<ShuffleOp> myShuffleOp;
  340. rc = ShuffleOp::Builder()
  341. .SetRowsPerBuffer(2)
  342. .SetShuffleSize(4)
  343. .Build(&myShuffleOp);
  344. EXPECT_TRUE(rc.IsOk());
  345. rc = myTree->AssociateNode(myShuffleOp);
  346. EXPECT_TRUE(rc.IsOk());
  347. uint32_t numRepeats = 3;
  348. std::shared_ptr<RepeatOp> myRepeatOp;
  349. rc = RepeatOp::Builder(numRepeats)
  350. .Build(&myRepeatOp);
  351. EXPECT_TRUE(rc.IsOk());
  352. rc = myTree->AssociateNode(myRepeatOp);
  353. EXPECT_TRUE(rc.IsOk());
  354. myShuffleOp->set_total_repeats(numRepeats);
  355. myShuffleOp->set_num_repeats_per_epoch(numRepeats);
  356. rc = myRepeatOp->AddChild(myShuffleOp);
  357. EXPECT_TRUE(rc.IsOk());
  358. myRandomDataOp->set_total_repeats(numRepeats);
  359. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  360. rc = myShuffleOp->AddChild(myRandomDataOp);
  361. EXPECT_TRUE(rc.IsOk());
  362. rc = myTree->AssignRoot(myRepeatOp);
  363. EXPECT_TRUE(rc.IsOk());
  364. MS_LOG(INFO) << "Launching tree and begin iteration";
  365. rc = myTree->Prepare();
  366. EXPECT_TRUE(rc.IsOk());
  367. rc = myTree->Launch();
  368. EXPECT_TRUE(rc.IsOk());
  369. // Start the loop of reading tensors from our pipeline
  370. DatasetIterator dI(myTree);
  371. TensorRow tensorList;
  372. rc = dI.FetchNextTensorRow(&tensorList);
  373. EXPECT_TRUE(rc.IsOk());
  374. int rowCount = 0;
  375. while (!tensorList.empty()) {
  376. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  377. // Display the tensor by calling the printer on it
  378. for (int i = 0; i < tensorList.size(); i++) {
  379. std::ostringstream ss;
  380. ss << *tensorList[i] << std::endl;
  381. MS_LOG(INFO) << "Tensor print: " << ss.str();
  382. }
  383. rc = dI.FetchNextTensorRow(&tensorList);
  384. EXPECT_TRUE(rc.IsOk());
  385. rowCount++;
  386. }
  387. ASSERT_EQ(rowCount, 30);
  388. }