You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

random_data_op_test.cc 13 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "dataset/core/client.h"
  17. #include "common/common.h"
  18. #include "gtest/gtest.h"
  19. #include <memory>
  20. #include <vector>
  21. #include <iostream>
  22. #include "dataset/core/tensor_shape.h"
  23. #include "dataset/engine/datasetops/source/random_data_op.h"
  24. #include "dataset/engine/data_schema.h"
  25. using namespace mindspore::dataset;
  26. using mindspore::MsLogLevel::INFO;
  27. using mindspore::ExceptionType::NoExceptionType;
  28. using mindspore::LogStream;
  29. class MindDataTestRandomDataOp : public UT::DatasetOpTesting {
  30. };
  31. // Test info:
  32. // - Simple test with a user-provided schema generated purely from DataSchema C API
  33. // - has an interation loop
  34. //
  35. // Tree: single node tree with RandomDataOp
  36. //
  37. // RandomDataOp
  38. //
  39. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
  40. Status rc;
  41. int32_t rank = 0; // not used
  42. MS_LOG(INFO) << "UT test RandomDataOpBasic1";
  43. // Start with an empty execution tree
  44. auto myTree = std::make_shared<ExecutionTree>();
  45. // Create a schema using the C api's
  46. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  47. // RandomDataOp can randomly fill in unknown dimension lengths of a shape.
  48. // Most other ops cannot do that as they are limited by the physical data itself. We're
  49. // more flexible with random data since it is just making stuff up on the fly.
  50. TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3});
  51. ColDescriptor c1("image",
  52. DataType(DataType::DE_INT8),
  53. TensorImpl::kFlexible,
  54. rank, // not used
  55. &c1Shape);
  56. // Column 2 will just be a scalar label number
  57. TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor
  58. ColDescriptor c2("label",
  59. DataType(DataType::DE_UINT32),
  60. TensorImpl::kFlexible,
  61. rank,
  62. &c2Shape);
  63. testSchema->AddColumn(c1);
  64. testSchema->AddColumn(c2);
  65. std::shared_ptr<RandomDataOp> myRandomDataOp;
  66. RandomDataOp::Builder builder;
  67. rc = builder.SetRowsPerBuffer(2)
  68. .SetNumWorkers(1)
  69. .SetDataSchema(std::move(testSchema))
  70. .SetTotalRows(25)
  71. .Build(&myRandomDataOp);
  72. EXPECT_TRUE(rc.IsOk());
  73. rc = myTree->AssociateNode(myRandomDataOp);
  74. EXPECT_TRUE(rc.IsOk());
  75. rc = myTree->AssignRoot(myRandomDataOp);
  76. EXPECT_TRUE(rc.IsOk());
  77. std::ostringstream ss;
  78. ss << *myRandomDataOp;
  79. MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str();
  80. MS_LOG(INFO) << "Launching tree and begin iteration";
  81. rc = myTree->Prepare();
  82. EXPECT_TRUE(rc.IsOk());
  83. rc = myTree->Launch();
  84. EXPECT_TRUE(rc.IsOk());
  85. // Start the loop of reading tensors from our pipeline
  86. DatasetIterator dI(myTree);
  87. TensorRow tensorList;
  88. rc = dI.FetchNextTensorRow(&tensorList);
  89. EXPECT_TRUE(rc.IsOk());
  90. int rowCount = 0;
  91. while (!tensorList.empty()) {
  92. // Don't display these rows...too big to show
  93. MS_LOG(INFO) << "Row fetched #: " << rowCount;
  94. rc = dI.FetchNextTensorRow(&tensorList);
  95. EXPECT_TRUE(rc.IsOk());
  96. rowCount++;
  97. }
  98. ASSERT_EQ(rowCount, 25);
  99. }
  100. // Test info:
  101. // - Simple test with a randomly generated schema
  102. // - no iteration loop on this one, just create the op
  103. //
  104. // Tree: single node tree with RandomDataOp
  105. //
  106. // RandomDataOp
  107. //
  108. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
  109. Status rc;
  110. MS_LOG(INFO) << "UT test RandomDataOpBasic2";
  111. // Start with an empty execution tree
  112. auto myTree = std::make_shared<ExecutionTree>();
  113. std::shared_ptr<RandomDataOp> myRandomDataOp;
  114. RandomDataOp::Builder builder;
  115. rc = builder.SetRowsPerBuffer(2)
  116. .SetNumWorkers(1)
  117. .Build(&myRandomDataOp);
  118. EXPECT_TRUE(rc.IsOk());
  119. rc = myTree->AssociateNode(myRandomDataOp);
  120. EXPECT_TRUE(rc.IsOk());
  121. rc = myTree->AssignRoot(myRandomDataOp);
  122. EXPECT_TRUE(rc.IsOk());
  123. std::ostringstream ss;
  124. ss << *myRandomDataOp;
  125. MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
  126. }
  127. // Test info:
  128. // - json file test with iteration
  129. //
  130. // Tree: single node tree with RandomDataOp
  131. //
  132. // RandomDataOp
  133. //
  134. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
  135. Status rc;
  136. MS_LOG(INFO) << "UT test RandomDataOpBasic3";
  137. // Start with an empty execution tree
  138. auto myTree = std::make_shared<ExecutionTree>();
  139. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  140. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {});
  141. EXPECT_TRUE(rc.IsOk());
  142. std::shared_ptr<RandomDataOp> myRandomDataOp;
  143. RandomDataOp::Builder builder;
  144. rc = builder.SetRowsPerBuffer(2)
  145. .SetNumWorkers(1)
  146. .SetDataSchema(std::move(testSchema))
  147. .SetTotalRows(10)
  148. .Build(&myRandomDataOp);
  149. EXPECT_TRUE(rc.IsOk());
  150. rc = myTree->AssociateNode(myRandomDataOp);
  151. EXPECT_TRUE(rc.IsOk());
  152. rc = myTree->AssignRoot(myRandomDataOp);
  153. EXPECT_TRUE(rc.IsOk());
  154. std::ostringstream ss;
  155. ss << *myRandomDataOp;
  156. MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
  157. MS_LOG(INFO) << "Launching tree and begin iteration";
  158. rc = myTree->Prepare();
  159. EXPECT_TRUE(rc.IsOk());
  160. rc = myTree->Launch();
  161. EXPECT_TRUE(rc.IsOk());
  162. // Start the loop of reading tensors from our pipeline
  163. DatasetIterator dI(myTree);
  164. TensorRow tensorList;
  165. rc = dI.FetchNextTensorRow(&tensorList);
  166. EXPECT_TRUE(rc.IsOk());
  167. int rowCount = 0;
  168. while (!tensorList.empty()) {
  169. // Don't display these rows...too big to show
  170. MS_LOG(INFO) << "Row fetched #: " << rowCount;
  171. rc = dI.FetchNextTensorRow(&tensorList);
  172. EXPECT_TRUE(rc.IsOk());
  173. rowCount++;
  174. }
  175. ASSERT_EQ(rowCount, 10);
  176. }
  177. // Test info:
  178. // - json schema input it's a fairly simple one
  179. // - has an interation loop
  180. //
  181. // Tree: RepeatOp over RandomDataOp
  182. //
  183. // RepeatOp
  184. // |
  185. // RandomDataOp
  186. //
  187. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
  188. Status rc;
  189. MS_LOG(INFO) << "UT test RandomDataOpBasic4";
  190. // Start with an empty execution tree
  191. auto myTree = std::make_shared<ExecutionTree>();
  192. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  193. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  194. EXPECT_TRUE(rc.IsOk());
  195. std::shared_ptr<RandomDataOp> myRandomDataOp;
  196. RandomDataOp::Builder builder;
  197. rc = builder.SetRowsPerBuffer(2)
  198. .SetNumWorkers(1)
  199. .SetDataSchema(std::move(testSchema))
  200. .SetTotalRows(10)
  201. .Build(&myRandomDataOp);
  202. EXPECT_TRUE(rc.IsOk());
  203. rc = myTree->AssociateNode(myRandomDataOp);
  204. EXPECT_TRUE(rc.IsOk());
  205. uint32_t numRepeats = 2;
  206. std::shared_ptr<RepeatOp> myRepeatOp;
  207. rc = RepeatOp::Builder(numRepeats)
  208. .Build(&myRepeatOp);
  209. EXPECT_TRUE(rc.IsOk());
  210. rc = myTree->AssociateNode(myRepeatOp);
  211. EXPECT_TRUE(rc.IsOk());
  212. rc = myRepeatOp->AddChild(myRandomDataOp);
  213. EXPECT_TRUE(rc.IsOk());
  214. rc = myTree->AssignRoot(myRepeatOp);
  215. EXPECT_TRUE(rc.IsOk());
  216. MS_LOG(INFO) << "Launching tree and begin iteration";
  217. rc = myTree->Prepare();
  218. EXPECT_TRUE(rc.IsOk());
  219. rc = myTree->Launch();
  220. EXPECT_TRUE(rc.IsOk());
  221. // Start the loop of reading tensors from our pipeline
  222. DatasetIterator dI(myTree);
  223. TensorRow tensorList;
  224. rc = dI.FetchNextTensorRow(&tensorList);
  225. EXPECT_TRUE(rc.IsOk());
  226. int rowCount = 0;
  227. while (!tensorList.empty()) {
  228. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  229. // Display the tensor by calling the printer on it
  230. for (int i = 0; i < tensorList.size(); i++) {
  231. std::ostringstream ss;
  232. ss << *tensorList[i] << std::endl;
  233. MS_LOG(INFO) << "Tensor print: %s" << ss.str();
  234. }
  235. rc = dI.FetchNextTensorRow(&tensorList);
  236. EXPECT_TRUE(rc.IsOk());
  237. rowCount++;
  238. }
  239. ASSERT_EQ(rowCount, 20);
  240. }
  241. // Test info:
  242. // - json schema input it's a fairly simple one
  243. // - has an interation loop
  244. // - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers
  245. //
  246. // Tree: RepeatOp over RandomDataOp
  247. //
  248. // RepeatOp
  249. // |
  250. // RandomDataOp
  251. //
  252. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
  253. Status rc;
  254. MS_LOG(INFO) << "UT test RandomDataOpBasic5";
  255. // Start with an empty execution tree
  256. auto myTree = std::make_shared<ExecutionTree>();
  257. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  258. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  259. EXPECT_TRUE(rc.IsOk());
  260. std::shared_ptr<RandomDataOp> myRandomDataOp;
  261. RandomDataOp::Builder builder;
  262. rc = builder.SetRowsPerBuffer(2)
  263. .SetNumWorkers(4)
  264. .SetDataSchema(std::move(testSchema))
  265. .SetTotalRows(10)
  266. .Build(&myRandomDataOp);
  267. EXPECT_TRUE(rc.IsOk());
  268. rc = myTree->AssociateNode(myRandomDataOp);
  269. EXPECT_TRUE(rc.IsOk());
  270. uint32_t numRepeats = 3;
  271. std::shared_ptr<RepeatOp> myRepeatOp;
  272. rc = RepeatOp::Builder(numRepeats)
  273. .Build(&myRepeatOp);
  274. EXPECT_TRUE(rc.IsOk());
  275. rc = myTree->AssociateNode(myRepeatOp);
  276. EXPECT_TRUE(rc.IsOk());
  277. rc = myRepeatOp->AddChild(myRandomDataOp);
  278. EXPECT_TRUE(rc.IsOk());
  279. rc = myTree->AssignRoot(myRepeatOp);
  280. EXPECT_TRUE(rc.IsOk());
  281. MS_LOG(INFO) << "Launching tree and begin iteration";
  282. rc = myTree->Prepare();
  283. EXPECT_TRUE(rc.IsOk());
  284. rc = myTree->Launch();
  285. EXPECT_TRUE(rc.IsOk());
  286. // Start the loop of reading tensors from our pipeline
  287. DatasetIterator dI(myTree);
  288. TensorRow tensorList;
  289. rc = dI.FetchNextTensorRow(&tensorList);
  290. EXPECT_TRUE(rc.IsOk());
  291. int rowCount = 0;
  292. while (!tensorList.empty()) {
  293. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  294. // Display the tensor by calling the printer on it
  295. for (int i = 0; i < tensorList.size(); i++) {
  296. std::ostringstream ss;
  297. ss << *tensorList[i] << std::endl;
  298. MS_LOG(INFO) << "Tensor print: ", ss.str();
  299. }
  300. rc = dI.FetchNextTensorRow(&tensorList);
  301. EXPECT_TRUE(rc.IsOk());
  302. rowCount++;
  303. }
  304. ASSERT_EQ(rowCount, 30);
  305. }
  306. // Test info:
  307. // - repeat shuffle random
  308. //
  309. // Tree: RepeatOp over RandomDataOp
  310. //
  311. // RepeatOp
  312. // |
  313. // ShuffleOp
  314. // |
  315. // RandomDataOp
  316. //
  317. TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
  318. Status rc;
  319. MS_LOG(INFO) << "UT test RandomDataOpTree1";
  320. // Start with an empty execution tree
  321. auto myTree = std::make_shared<ExecutionTree>();
  322. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  323. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  324. EXPECT_TRUE(rc.IsOk());
  325. std::shared_ptr<RandomDataOp> myRandomDataOp;
  326. RandomDataOp::Builder builder;
  327. rc = builder.SetRowsPerBuffer(2)
  328. .SetNumWorkers(4)
  329. .SetDataSchema(std::move(testSchema))
  330. .SetTotalRows(10)
  331. .Build(&myRandomDataOp);
  332. EXPECT_TRUE(rc.IsOk());
  333. rc = myTree->AssociateNode(myRandomDataOp);
  334. EXPECT_TRUE(rc.IsOk());
  335. std::shared_ptr<ShuffleOp> myShuffleOp;
  336. rc = ShuffleOp::Builder()
  337. .SetRowsPerBuffer(2)
  338. .SetShuffleSize(4)
  339. .Build(&myShuffleOp);
  340. EXPECT_TRUE(rc.IsOk());
  341. rc = myTree->AssociateNode(myShuffleOp);
  342. EXPECT_TRUE(rc.IsOk());
  343. uint32_t numRepeats = 3;
  344. std::shared_ptr<RepeatOp> myRepeatOp;
  345. rc = RepeatOp::Builder(numRepeats)
  346. .Build(&myRepeatOp);
  347. EXPECT_TRUE(rc.IsOk());
  348. rc = myTree->AssociateNode(myRepeatOp);
  349. EXPECT_TRUE(rc.IsOk());
  350. rc = myRepeatOp->AddChild(myShuffleOp);
  351. EXPECT_TRUE(rc.IsOk());
  352. rc = myShuffleOp->AddChild(myRandomDataOp);
  353. EXPECT_TRUE(rc.IsOk());
  354. rc = myTree->AssignRoot(myRepeatOp);
  355. EXPECT_TRUE(rc.IsOk());
  356. MS_LOG(INFO) << "Launching tree and begin iteration";
  357. rc = myTree->Prepare();
  358. EXPECT_TRUE(rc.IsOk());
  359. rc = myTree->Launch();
  360. EXPECT_TRUE(rc.IsOk());
  361. // Start the loop of reading tensors from our pipeline
  362. DatasetIterator dI(myTree);
  363. TensorRow tensorList;
  364. rc = dI.FetchNextTensorRow(&tensorList);
  365. EXPECT_TRUE(rc.IsOk());
  366. int rowCount = 0;
  367. while (!tensorList.empty()) {
  368. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  369. // Display the tensor by calling the printer on it
  370. for (int i = 0; i < tensorList.size(); i++) {
  371. std::ostringstream ss;
  372. ss << *tensorList[i] << std::endl;
  373. MS_LOG(INFO) << "Tensor print: " << ss.str();
  374. }
  375. rc = dI.FetchNextTensorRow(&tensorList);
  376. EXPECT_TRUE(rc.IsOk());
  377. rowCount++;
  378. }
  379. ASSERT_EQ(rowCount, 30);
  380. }