You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

random_data_op_test.cc 13 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "minddata/dataset/core/client.h"
  17. #include "common/common.h"
  18. #include "gtest/gtest.h"
  19. #include <memory>
  20. #include <vector>
  21. #include <iostream>
  22. #include "minddata/dataset/core/tensor_shape.h"
  23. #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
  24. #include "minddata/dataset/engine/data_schema.h"
  25. #include "minddata/dataset/util/random.h"
  26. using namespace mindspore::dataset;
  27. using mindspore::LogStream;
  28. using mindspore::ExceptionType::NoExceptionType;
  29. using mindspore::MsLogLevel::INFO;
  30. class MindDataTestRandomDataOp : public UT::DatasetOpTesting {};
  31. // Test info:
  32. // - Simple test with a user-provided schema generated purely from DataSchema C API
  33. // - has an interaction loop
  34. //
  35. // Tree: single node tree with RandomDataOp
  36. //
  37. // RandomDataOp
  38. //
  39. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
  40. Status rc;
  41. int32_t rank = 0; // not used
  42. MS_LOG(INFO) << "UT test RandomDataOpBasic1";
  43. // Start with an empty execution tree
  44. auto myTree = std::make_shared<ExecutionTree>();
  45. // Create a schema using the C api's
  46. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  47. // RandomDataOp can randomly fill in unknown dimension lengths of a shape.
  48. // Most other ops cannot do that as they are limited by the physical data itself. We're
  49. // more flexible with random data since it is just making stuff up on the fly.
  50. TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3});
  51. ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
  52. rank, // not used
  53. &c1Shape);
  54. // Column 2 will just be a scalar label number
  55. TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor
  56. ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
  57. testSchema->AddColumn(c1);
  58. testSchema->AddColumn(c2);
  59. std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  60. auto op_connector_size = cfg->op_connector_size();
  61. std::shared_ptr<RandomDataOp> myRandomDataOp =
  62. std::make_shared<RandomDataOp>(1, op_connector_size, 25, std::move(testSchema));
  63. rc = myTree->AssociateNode(myRandomDataOp);
  64. EXPECT_TRUE(rc.IsOk());
  65. rc = myTree->AssignRoot(myRandomDataOp);
  66. EXPECT_TRUE(rc.IsOk());
  67. std::ostringstream ss;
  68. ss << *myRandomDataOp;
  69. MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str();
  70. MS_LOG(INFO) << "Launching tree and begin iteration";
  71. rc = myTree->Prepare();
  72. EXPECT_TRUE(rc.IsOk());
  73. rc = myTree->Launch();
  74. EXPECT_TRUE(rc.IsOk());
  75. // Start the loop of reading tensors from our pipeline
  76. DatasetIterator dI(myTree);
  77. TensorRow tensorList;
  78. rc = dI.FetchNextTensorRow(&tensorList);
  79. EXPECT_TRUE(rc.IsOk());
  80. int rowCount = 0;
  81. while (!tensorList.empty()) {
  82. // Don't display these rows...too big to show
  83. MS_LOG(INFO) << "Row fetched #: " << rowCount;
  84. rc = dI.FetchNextTensorRow(&tensorList);
  85. EXPECT_TRUE(rc.IsOk());
  86. rowCount++;
  87. }
  88. ASSERT_EQ(rowCount, 25);
  89. }
  90. // Test info:
  91. // - Simple test with a randomly generated schema
  92. // - no iteration loop on this one, just create the op
  93. //
  94. // Tree: single node tree with RandomDataOp
  95. //
  96. // RandomDataOp
  97. //
  98. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
  99. Status rc;
  100. MS_LOG(INFO) << "UT test RandomDataOpBasic2";
  101. // Start with an empty execution tree
  102. auto myTree = std::make_shared<ExecutionTree>();
  103. std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  104. auto op_connector_size = cfg->op_connector_size();
  105. std::shared_ptr<RandomDataOp> myRandomDataOp = std::make_shared<RandomDataOp>(1, op_connector_size, 0, nullptr);
  106. rc = myTree->AssociateNode(myRandomDataOp);
  107. EXPECT_TRUE(rc.IsOk());
  108. rc = myTree->AssignRoot(myRandomDataOp);
  109. EXPECT_TRUE(rc.IsOk());
  110. std::ostringstream ss;
  111. ss << *myRandomDataOp;
  112. MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
  113. }
  114. // Test info:
  115. // - json file test with iteration
  116. //
  117. // Tree: single node tree with RandomDataOp
  118. //
  119. // RandomDataOp
  120. //
  121. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
  122. Status rc;
  123. MS_LOG(INFO) << "UT test RandomDataOpBasic3";
  124. // Start with an empty execution tree
  125. auto myTree = std::make_shared<ExecutionTree>();
  126. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  127. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {});
  128. EXPECT_TRUE(rc.IsOk());
  129. std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  130. auto op_connector_size = cfg->op_connector_size();
  131. std::shared_ptr<RandomDataOp> myRandomDataOp =
  132. std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema));
  133. rc = myTree->AssociateNode(myRandomDataOp);
  134. EXPECT_TRUE(rc.IsOk());
  135. rc = myTree->AssignRoot(myRandomDataOp);
  136. EXPECT_TRUE(rc.IsOk());
  137. std::ostringstream ss;
  138. ss << *myRandomDataOp;
  139. MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
  140. MS_LOG(INFO) << "Launching tree and begin iteration";
  141. rc = myTree->Prepare();
  142. EXPECT_TRUE(rc.IsOk());
  143. rc = myTree->Launch();
  144. EXPECT_TRUE(rc.IsOk());
  145. // Start the loop of reading tensors from our pipeline
  146. DatasetIterator dI(myTree);
  147. TensorRow tensorList;
  148. rc = dI.FetchNextTensorRow(&tensorList);
  149. EXPECT_TRUE(rc.IsOk());
  150. int rowCount = 0;
  151. while (!tensorList.empty()) {
  152. // Don't display these rows...too big to show
  153. MS_LOG(INFO) << "Row fetched #: " << rowCount;
  154. rc = dI.FetchNextTensorRow(&tensorList);
  155. EXPECT_TRUE(rc.IsOk());
  156. rowCount++;
  157. }
  158. ASSERT_EQ(rowCount, 10);
  159. }
  160. // Test info:
  161. // - json schema input it's a fairly simple one
  162. // - has an interaction loop
  163. //
  164. // Tree: RepeatOp over RandomDataOp
  165. //
  166. // RepeatOp
  167. // |
  168. // RandomDataOp
  169. //
  170. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
  171. Status rc;
  172. MS_LOG(INFO) << "UT test RandomDataOpBasic4";
  173. // Start with an empty execution tree
  174. auto myTree = std::make_shared<ExecutionTree>();
  175. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  176. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  177. EXPECT_TRUE(rc.IsOk());
  178. std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  179. auto op_connector_size = cfg->op_connector_size();
  180. std::shared_ptr<RandomDataOp> myRandomDataOp =
  181. std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema));
  182. rc = myTree->AssociateNode(myRandomDataOp);
  183. EXPECT_TRUE(rc.IsOk());
  184. uint32_t numRepeats = 2;
  185. std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
  186. rc = myTree->AssociateNode(myRepeatOp);
  187. EXPECT_TRUE(rc.IsOk());
  188. myRandomDataOp->set_total_repeats(numRepeats);
  189. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  190. rc = myRepeatOp->AddChild(myRandomDataOp);
  191. EXPECT_TRUE(rc.IsOk());
  192. rc = myTree->AssignRoot(myRepeatOp);
  193. EXPECT_TRUE(rc.IsOk());
  194. MS_LOG(INFO) << "Launching tree and begin iteration";
  195. rc = myTree->Prepare();
  196. EXPECT_TRUE(rc.IsOk());
  197. rc = myTree->Launch();
  198. EXPECT_TRUE(rc.IsOk());
  199. // Start the loop of reading tensors from our pipeline
  200. DatasetIterator dI(myTree);
  201. TensorRow tensorList;
  202. rc = dI.FetchNextTensorRow(&tensorList);
  203. EXPECT_TRUE(rc.IsOk());
  204. int rowCount = 0;
  205. while (!tensorList.empty()) {
  206. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  207. // Display the tensor by calling the printer on it
  208. for (int i = 0; i < tensorList.size(); i++) {
  209. std::ostringstream ss;
  210. ss << *tensorList[i] << std::endl;
  211. MS_LOG(INFO) << "Tensor print: %s" << ss.str();
  212. }
  213. rc = dI.FetchNextTensorRow(&tensorList);
  214. EXPECT_TRUE(rc.IsOk());
  215. rowCount++;
  216. }
  217. ASSERT_EQ(rowCount, 20);
  218. }
  219. // Test info:
  220. // - json schema input it's a fairly simple one
  221. // - has an interaction loop
  222. // - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers
  223. //
  224. // Tree: RepeatOp over RandomDataOp
  225. //
  226. // RepeatOp
  227. // |
  228. // RandomDataOp
  229. //
  230. TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
  231. Status rc;
  232. MS_LOG(INFO) << "UT test RandomDataOpBasic5";
  233. // Start with an empty execution tree
  234. auto myTree = std::make_shared<ExecutionTree>();
  235. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  236. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  237. EXPECT_TRUE(rc.IsOk());
  238. std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  239. auto op_connector_size = cfg->op_connector_size();
  240. std::shared_ptr<RandomDataOp> myRandomDataOp =
  241. std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema));
  242. rc = myTree->AssociateNode(myRandomDataOp);
  243. EXPECT_TRUE(rc.IsOk());
  244. uint32_t numRepeats = 3;
  245. std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
  246. rc = myTree->AssociateNode(myRepeatOp);
  247. EXPECT_TRUE(rc.IsOk());
  248. myRandomDataOp->set_total_repeats(numRepeats);
  249. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  250. rc = myRepeatOp->AddChild(myRandomDataOp);
  251. EXPECT_TRUE(rc.IsOk());
  252. rc = myTree->AssignRoot(myRepeatOp);
  253. EXPECT_TRUE(rc.IsOk());
  254. MS_LOG(INFO) << "Launching tree and begin iteration";
  255. rc = myTree->Prepare();
  256. EXPECT_TRUE(rc.IsOk());
  257. rc = myTree->Launch();
  258. EXPECT_TRUE(rc.IsOk());
  259. // Start the loop of reading tensors from our pipeline
  260. DatasetIterator dI(myTree);
  261. TensorRow tensorList;
  262. rc = dI.FetchNextTensorRow(&tensorList);
  263. EXPECT_TRUE(rc.IsOk());
  264. int rowCount = 0;
  265. while (!tensorList.empty()) {
  266. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  267. // Display the tensor by calling the printer on it
  268. for (int i = 0; i < tensorList.size(); i++) {
  269. std::ostringstream ss;
  270. ss << *tensorList[i] << std::endl;
  271. MS_LOG(INFO) << "Tensor print: ", ss.str();
  272. }
  273. rc = dI.FetchNextTensorRow(&tensorList);
  274. EXPECT_TRUE(rc.IsOk());
  275. rowCount++;
  276. }
  277. ASSERT_EQ(rowCount, 30);
  278. }
  279. // Test info:
  280. // - repeat shuffle random
  281. //
  282. // Tree: RepeatOp over RandomDataOp
  283. //
  284. // RepeatOp
  285. // |
  286. // ShuffleOp
  287. // |
  288. // RandomDataOp
  289. //
  290. TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
  291. Status rc;
  292. MS_LOG(INFO) << "UT test RandomDataOpTree1";
  293. // Start with an empty execution tree
  294. auto myTree = std::make_shared<ExecutionTree>();
  295. std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
  296. rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
  297. EXPECT_TRUE(rc.IsOk());
  298. std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  299. auto op_connector_size = cfg->op_connector_size();
  300. std::shared_ptr<RandomDataOp> myRandomDataOp =
  301. std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema));
  302. rc = myTree->AssociateNode(myRandomDataOp);
  303. EXPECT_TRUE(rc.IsOk());
  304. uint32_t shuffle_seed = GetSeed();
  305. std::shared_ptr<ShuffleOp> myShuffleOp = std::make_shared<ShuffleOp>(4, shuffle_seed, op_connector_size, true);
  306. rc = myTree->AssociateNode(myShuffleOp);
  307. EXPECT_TRUE(rc.IsOk());
  308. uint32_t numRepeats = 3;
  309. std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
  310. rc = myTree->AssociateNode(myRepeatOp);
  311. EXPECT_TRUE(rc.IsOk());
  312. myShuffleOp->set_total_repeats(numRepeats);
  313. myShuffleOp->set_num_repeats_per_epoch(numRepeats);
  314. rc = myRepeatOp->AddChild(myShuffleOp);
  315. EXPECT_TRUE(rc.IsOk());
  316. myRandomDataOp->set_total_repeats(numRepeats);
  317. myRandomDataOp->set_num_repeats_per_epoch(numRepeats);
  318. rc = myShuffleOp->AddChild(myRandomDataOp);
  319. EXPECT_TRUE(rc.IsOk());
  320. rc = myTree->AssignRoot(myRepeatOp);
  321. EXPECT_TRUE(rc.IsOk());
  322. MS_LOG(INFO) << "Launching tree and begin iteration";
  323. rc = myTree->Prepare();
  324. EXPECT_TRUE(rc.IsOk());
  325. rc = myTree->Launch();
  326. EXPECT_TRUE(rc.IsOk());
  327. // Start the loop of reading tensors from our pipeline
  328. DatasetIterator dI(myTree);
  329. TensorRow tensorList;
  330. rc = dI.FetchNextTensorRow(&tensorList);
  331. EXPECT_TRUE(rc.IsOk());
  332. int rowCount = 0;
  333. while (!tensorList.empty()) {
  334. MS_LOG(INFO) << "Row display for row #: " << rowCount;
  335. // Display the tensor by calling the printer on it
  336. for (int i = 0; i < tensorList.size(); i++) {
  337. std::ostringstream ss;
  338. ss << *tensorList[i] << std::endl;
  339. MS_LOG(INFO) << "Tensor print: " << ss.str();
  340. }
  341. rc = dI.FetchNextTensorRow(&tensorList);
  342. EXPECT_TRUE(rc.IsOk());
  343. rowCount++;
  344. }
  345. ASSERT_EQ(rowCount, 30);
  346. }