You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tfReader_op_test.cc 23 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <iostream>
  17. #include <memory>
  18. #include <vector>
  19. #include "dataset/core/client.h"
  20. #include "dataset/engine/data_schema.h"
  21. #include "common/common.h"
  22. #include "common/utils.h"
  23. #include "gtest/gtest.h"
  24. #include "utils/log_adapter.h"
  25. namespace common = mindspore::common;
  26. using namespace mindspore::dataset;
  27. using mindspore::MsLogLevel::INFO;
  28. using mindspore::ExceptionType::NoExceptionType;
  29. using mindspore::LogStream;
  30. class MindDataTestTFReaderOp : public UT::DatasetOpTesting {
  31. };
  32. TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) {
  33. // Start with an empty execution tree
  34. auto my_tree = std::make_shared<ExecutionTree>();
  35. std::string dataset_path;
  36. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  37. std::shared_ptr<TFReaderOp> my_tfreader_op;
  38. TFReaderOp::Builder builder;
  39. builder.SetDatasetFilesList({dataset_path})
  40. .SetRowsPerBuffer(16)
  41. .SetNumWorkers(16);
  42. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  43. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  44. builder.SetDataSchema(std::move(schema));
  45. Status rc = builder.Build(&my_tfreader_op);
  46. ASSERT_TRUE(rc.IsOk());
  47. rc = my_tree->AssociateNode(my_tfreader_op);
  48. ASSERT_TRUE(rc.IsOk());
  49. rc = my_tree->AssignRoot(my_tfreader_op);
  50. ASSERT_TRUE(rc.IsOk());
  51. MS_LOG(INFO) << "Launching tree and begin iteration.";
  52. rc = my_tree->Prepare();
  53. ASSERT_TRUE(rc.IsOk());
  54. rc = my_tree->Launch();
  55. ASSERT_TRUE(rc.IsOk());
  56. // Start the loop of reading tensors from our pipeline
  57. DatasetIterator di(my_tree);
  58. TensorRow tensor_list;
  59. rc = di.FetchNextTensorRow(&tensor_list);
  60. ASSERT_TRUE(rc.IsOk());
  61. int row_count = 0;
  62. while (!tensor_list.empty()) {
  63. // Display the tensor by calling the printer on it
  64. for (int i = 0; i < tensor_list.size(); i++) {
  65. std::ostringstream ss;
  66. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  67. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  68. }
  69. rc = di.FetchNextTensorRow(&tensor_list);
  70. ASSERT_TRUE(rc.IsOk());
  71. row_count++;
  72. }
  73. ASSERT_EQ(row_count, 12);
  74. }
  75. TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeRowsPerBuffer) {
  76. // Start with an empty execution tree
  77. auto my_tree = std::make_shared<ExecutionTree>();
  78. std::string dataset_path;
  79. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  80. std::shared_ptr<TFReaderOp> my_tfreader_op;
  81. TFReaderOp::Builder builder;
  82. builder.SetDatasetFilesList({dataset_path})
  83. .SetRowsPerBuffer(500)
  84. .SetNumWorkers(16);
  85. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  86. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  87. builder.SetDataSchema(std::move(schema));
  88. Status rc = builder.Build(&my_tfreader_op);
  89. ASSERT_TRUE(rc.IsOk());
  90. rc = my_tree->AssociateNode(my_tfreader_op);
  91. ASSERT_TRUE(rc.IsOk());
  92. rc = my_tree->AssignRoot(my_tfreader_op);
  93. ASSERT_TRUE(rc.IsOk());
  94. MS_LOG(INFO) << "Launching tree and begin iteration.";
  95. rc = my_tree->Prepare();
  96. ASSERT_TRUE(rc.IsOk());
  97. rc = my_tree->Launch();
  98. ASSERT_TRUE(rc.IsOk());
  99. // Start the loop of reading tensors from our pipeline
  100. DatasetIterator di(my_tree);
  101. TensorRow tensor_list;
  102. rc = di.FetchNextTensorRow(&tensor_list);
  103. ASSERT_TRUE(rc.IsOk());
  104. int row_count = 0;
  105. while (!tensor_list.empty()) {
  106. // Display the tensor by calling the printer on it
  107. for (int i = 0; i < tensor_list.size(); i++) {
  108. std::ostringstream ss;
  109. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  110. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  111. }
  112. rc = di.FetchNextTensorRow(&tensor_list);
  113. ASSERT_TRUE(rc.IsOk());
  114. row_count++;
  115. }
  116. ASSERT_EQ(row_count, 12);
  117. }
  118. TEST_F(MindDataTestTFReaderOp, TestTFReaderSmallRowsPerBuffer) {
  119. // Start with an empty execution tree
  120. auto my_tree = std::make_shared<ExecutionTree>();
  121. std::string dataset_path;
  122. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  123. std::shared_ptr<TFReaderOp> my_tfreader_op;
  124. TFReaderOp::Builder builder;
  125. builder.SetDatasetFilesList({dataset_path})
  126. .SetRowsPerBuffer(1)
  127. .SetNumWorkers(16);
  128. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  129. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  130. builder.SetDataSchema(std::move(schema));
  131. Status rc = builder.Build(&my_tfreader_op);
  132. ASSERT_TRUE(rc.IsOk());
  133. rc = my_tree->AssociateNode(my_tfreader_op);
  134. ASSERT_TRUE(rc.IsOk());
  135. rc = my_tree->AssignRoot(my_tfreader_op);
  136. ASSERT_TRUE(rc.IsOk());
  137. MS_LOG(INFO) << "Launching tree and begin iteration.";
  138. rc = my_tree->Prepare();
  139. ASSERT_TRUE(rc.IsOk());
  140. rc = my_tree->Launch();
  141. ASSERT_TRUE(rc.IsOk());
  142. // Start the loop of reading tensors from our pipeline
  143. DatasetIterator di(my_tree);
  144. TensorRow tensor_list;
  145. rc = di.FetchNextTensorRow(&tensor_list);
  146. ASSERT_TRUE(rc.IsOk());
  147. int row_count = 0;
  148. while (!tensor_list.empty()) {
  149. // Display the tensor by calling the printer on it
  150. for (int i = 0; i < tensor_list.size(); i++) {
  151. std::ostringstream ss;
  152. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  153. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  154. }
  155. rc = di.FetchNextTensorRow(&tensor_list);
  156. ASSERT_TRUE(rc.IsOk());
  157. row_count++;
  158. }
  159. ASSERT_EQ(row_count, 12);
  160. }
  161. TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeQueueSize) {
  162. // Start with an empty execution tree
  163. auto my_tree = std::make_shared<ExecutionTree>();
  164. std::string dataset_path;
  165. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  166. std::shared_ptr<TFReaderOp> my_tfreader_op;
  167. TFReaderOp::Builder builder;
  168. builder.SetDatasetFilesList({dataset_path})
  169. .SetWorkerConnectorSize(1)
  170. .SetRowsPerBuffer(16)
  171. .SetNumWorkers(16);
  172. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  173. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  174. builder.SetDataSchema(std::move(schema));
  175. Status rc = builder.Build(&my_tfreader_op);
  176. ASSERT_TRUE(rc.IsOk());
  177. rc = my_tree->AssociateNode(my_tfreader_op);
  178. ASSERT_TRUE(rc.IsOk());
  179. rc = my_tree->AssignRoot(my_tfreader_op);
  180. ASSERT_TRUE(rc.IsOk());
  181. MS_LOG(INFO) << "Launching tree and begin iteration.";
  182. rc = my_tree->Prepare();
  183. ASSERT_TRUE(rc.IsOk());
  184. rc = my_tree->Launch();
  185. ASSERT_TRUE(rc.IsOk());
  186. // Start the loop of reading tensors from our pipeline
  187. DatasetIterator di(my_tree);
  188. TensorRow tensor_list;
  189. rc = di.FetchNextTensorRow(&tensor_list);
  190. ASSERT_TRUE(rc.IsOk());
  191. int row_count = 0;
  192. while (!tensor_list.empty()) {
  193. // Display the tensor by calling the printer on it
  194. for (int i = 0; i < tensor_list.size(); i++) {
  195. std::ostringstream ss;
  196. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  197. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  198. }
  199. rc = di.FetchNextTensorRow(&tensor_list);
  200. ASSERT_TRUE(rc.IsOk());
  201. row_count++;
  202. }
  203. ASSERT_EQ(row_count, 12);
  204. }
  205. TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) {
  206. // Start with an empty execution tree
  207. auto my_tree = std::make_shared<ExecutionTree>();
  208. std::string dataset_path;
  209. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  210. std::shared_ptr<TFReaderOp> my_tfreader_op;
  211. TFReaderOp::Builder builder;
  212. builder.SetDatasetFilesList({dataset_path})
  213. .SetRowsPerBuffer(16)
  214. .SetNumWorkers(1);
  215. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  216. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  217. builder.SetDataSchema(std::move(schema));
  218. Status rc = builder.Build(&my_tfreader_op);
  219. ASSERT_TRUE(rc.IsOk());
  220. rc = my_tree->AssociateNode(my_tfreader_op);
  221. ASSERT_TRUE(rc.IsOk());
  222. rc = my_tree->AssignRoot(my_tfreader_op);
  223. ASSERT_TRUE(rc.IsOk());
  224. MS_LOG(INFO) << "Launching tree and begin iteration.";
  225. rc = my_tree->Prepare();
  226. ASSERT_TRUE(rc.IsOk());
  227. rc = my_tree->Launch();
  228. ASSERT_TRUE(rc.IsOk());
  229. // Start the loop of reading tensors from our pipeline
  230. DatasetIterator di(my_tree);
  231. TensorRow tensor_list;
  232. rc = di.FetchNextTensorRow(&tensor_list);
  233. ASSERT_TRUE(rc.IsOk());
  234. int row_count = 0;
  235. while (!tensor_list.empty()) {
  236. // Display the tensor by calling the printer on it
  237. for (int i = 0; i < tensor_list.size(); i++) {
  238. std::ostringstream ss;
  239. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  240. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  241. }
  242. rc = di.FetchNextTensorRow(&tensor_list);
  243. ASSERT_TRUE(rc.IsOk());
  244. row_count++;
  245. }
  246. ASSERT_EQ(row_count, 12);
  247. }
  248. TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) {
  249. // Start with an empty execution tree
  250. auto my_tree = std::make_shared<ExecutionTree>();
  251. std::string dataset_path;
  252. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  253. // TFReaderOp
  254. std::shared_ptr<TFReaderOp> my_tfreader_op;
  255. TFReaderOp::Builder builder;
  256. builder.SetDatasetFilesList({dataset_path})
  257. .SetRowsPerBuffer(16)
  258. .SetWorkerConnectorSize(16)
  259. .SetNumWorkers(16);
  260. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  261. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  262. builder.SetDataSchema(std::move(schema));
  263. Status rc= builder.Build(&my_tfreader_op);
  264. ASSERT_TRUE(rc.IsOk());
  265. rc = my_tree->AssociateNode(my_tfreader_op);
  266. ASSERT_TRUE(rc.IsOk());
  267. // RepeatOp
  268. std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(3);
  269. rc = my_tree->AssociateNode(my_repeat_op);
  270. ASSERT_TRUE(rc.IsOk());
  271. // Set children/root layout.
  272. rc = my_repeat_op->AddChild(my_tfreader_op);
  273. ASSERT_TRUE(rc.IsOk());
  274. rc = my_tree->AssignRoot(my_repeat_op);
  275. ASSERT_TRUE(rc.IsOk());
  276. MS_LOG(INFO) << "Launching tree and begin iteration.";
  277. rc = my_tree->Prepare();
  278. ASSERT_TRUE(rc.IsOk());
  279. rc = my_tree->Launch();
  280. ASSERT_TRUE(rc.IsOk());
  281. // Start the loop of reading tensors from our pipeline
  282. DatasetIterator di(my_tree);
  283. TensorRow tensor_list;
  284. rc = di.FetchNextTensorRow(&tensor_list);
  285. ASSERT_TRUE(rc.IsOk());
  286. int row_count = 0;
  287. while (!tensor_list.empty()) {
  288. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  289. // Display the tensor by calling the printer on it
  290. for (int i = 0; i < tensor_list.size(); i++) {
  291. std::ostringstream ss;
  292. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  293. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  294. }
  295. rc = di.FetchNextTensorRow(&tensor_list);
  296. ASSERT_TRUE(rc.IsOk());
  297. row_count++;
  298. }
  299. ASSERT_EQ(row_count, 12 * 3);
  300. }
  301. TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) {
  302. // Start with an empty execution tree
  303. auto my_tree = std::make_shared<ExecutionTree>();
  304. std::string dataset_path;
  305. dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
  306. std::unique_ptr<DataSchema> data_schema = std::make_unique<DataSchema>();
  307. std::vector<std::string> columns_to_load;
  308. columns_to_load.push_back("col_sint32");
  309. columns_to_load.push_back("col_binary");
  310. data_schema->LoadSchemaFile(dataset_path + "/datasetSchema.json", columns_to_load);
  311. std::shared_ptr<TFReaderOp> my_tfreader_op;
  312. TFReaderOp::Builder builder;
  313. builder.SetDatasetFilesList({dataset_path+"/test.data"})
  314. .SetRowsPerBuffer(16)
  315. .SetNumWorkers(16)
  316. .SetDataSchema(std::move(data_schema));
  317. Status rc = builder.Build(&my_tfreader_op);
  318. ASSERT_TRUE(rc.IsOk());
  319. rc = my_tree->AssociateNode(my_tfreader_op);
  320. ASSERT_TRUE(rc.IsOk());
  321. rc = my_tree->AssignRoot(my_tfreader_op);
  322. ASSERT_TRUE(rc.IsOk());
  323. MS_LOG(INFO) << "Launching tree and begin iteration.";
  324. rc = my_tree->Prepare();
  325. ASSERT_TRUE(rc.IsOk());
  326. rc = my_tree->Launch();
  327. ASSERT_TRUE(rc.IsOk());
  328. // Start the loop of reading tensors from our pipeline
  329. DatasetIterator di(my_tree);
  330. TensorRow tensor_list;
  331. rc = di.FetchNextTensorRow(&tensor_list);
  332. ASSERT_TRUE(rc.IsOk());
  333. int row_count = 0;
  334. while (!tensor_list.empty()) {
  335. // Display the tensor by calling the printer on it
  336. ASSERT_EQ(tensor_list.size(), columns_to_load.size());
  337. for (int i = 0; i < tensor_list.size(); i++) {
  338. std::ostringstream ss;
  339. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  340. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  341. }
  342. rc = di.FetchNextTensorRow(&tensor_list);
  343. ASSERT_TRUE(rc.IsOk());
  344. row_count++;
  345. }
  346. ASSERT_EQ(row_count, 12);
  347. }
  348. TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Row) {
  349. // Start with an empty execution tree
  350. auto my_tree = std::make_shared<ExecutionTree>();
  351. std::string dataset_path;
  352. dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
  353. std::string data_schema_filepath = dataset_path + "/datasetSchema1Row.json";
  354. // TFReaderOp
  355. std::shared_ptr<TFReaderOp> my_tfreader_op;
  356. TFReaderOp::Builder builder;
  357. builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16);
  358. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  359. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {});
  360. builder.SetDataSchema(std::move(schema));
  361. Status rc= builder.Build(&my_tfreader_op);
  362. ASSERT_TRUE(rc.IsOk());
  363. rc = my_tree->AssociateNode(my_tfreader_op);
  364. ASSERT_TRUE(rc.IsOk());
  365. rc = my_tree->AssignRoot(my_tfreader_op);
  366. ASSERT_TRUE(rc.IsOk());
  367. MS_LOG(INFO) << "Launching tree and begin iteration.";
  368. rc = my_tree->Prepare();
  369. ASSERT_TRUE(rc.IsOk());
  370. rc = my_tree->Launch();
  371. ASSERT_TRUE(rc.IsOk());
  372. // Start the loop of reading tensors from our pipeline
  373. DatasetIterator di(my_tree);
  374. TensorRow tensor_list;
  375. rc = di.FetchNextTensorRow(&tensor_list);
  376. ASSERT_TRUE(rc.IsOk());
  377. int row_count = 0;
  378. while (!tensor_list.empty()) {
  379. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  380. // Display the tensor by calling the printer on it
  381. for (int i = 0; i < tensor_list.size(); i++) {
  382. std::ostringstream ss;
  383. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  384. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  385. }
  386. rc = di.FetchNextTensorRow(&tensor_list);
  387. ASSERT_TRUE(rc.IsOk());
  388. row_count++;
  389. }
  390. ASSERT_EQ(row_count, 1);
  391. }
  392. TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Buffer) {
  393. // Start with an empty execution tree
  394. auto my_tree = std::make_shared<ExecutionTree>();
  395. std::string dataset_path;
  396. dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
  397. std::string data_schema_filepath = dataset_path + "/datasetSchema5Rows.json";
  398. // TFReaderOp
  399. std::shared_ptr<TFReaderOp> my_tfreader_op;
  400. TFReaderOp::Builder builder;
  401. builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16);
  402. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  403. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema5Rows.json", {});
  404. builder.SetDataSchema(std::move(schema));
  405. Status rc= builder.Build(&my_tfreader_op);
  406. ASSERT_TRUE(rc.IsOk());
  407. rc = my_tree->AssociateNode(my_tfreader_op);
  408. ASSERT_TRUE(rc.IsOk());
  409. rc = my_tree->AssignRoot(my_tfreader_op);
  410. ASSERT_TRUE(rc.IsOk());
  411. MS_LOG(INFO) << "Launching tree and begin iteration.";
  412. rc = my_tree->Prepare();
  413. ASSERT_TRUE(rc.IsOk());
  414. rc = my_tree->Launch();
  415. ASSERT_TRUE(rc.IsOk());
  416. // Start the loop of reading tensors from our pipeline
  417. DatasetIterator di(my_tree);
  418. TensorRow tensor_list;
  419. rc = di.FetchNextTensorRow(&tensor_list);
  420. ASSERT_TRUE(rc.IsOk());
  421. int row_count = 0;
  422. while (!tensor_list.empty()) {
  423. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  424. // Display the tensor by calling the printer on it
  425. for (int i = 0; i < tensor_list.size(); i++) {
  426. std::ostringstream ss;
  427. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  428. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  429. }
  430. rc = di.FetchNextTensorRow(&tensor_list);
  431. ASSERT_TRUE(rc.IsOk());
  432. row_count++;
  433. }
  434. ASSERT_EQ(row_count, 5);
  435. }
  436. TEST_F(MindDataTestTFReaderOp, TestTFReaderTake7Rows) {
  437. // Start with an empty execution tree
  438. auto my_tree = std::make_shared<ExecutionTree>();
  439. std::string dataset_path;
  440. dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
  441. std::string data_schema_filepath = dataset_path + "/datasetSchema7Rows.json";
  442. // TFReaderOp
  443. std::shared_ptr<TFReaderOp> my_tfreader_op;
  444. TFReaderOp::Builder builder;
  445. builder.SetDatasetFilesList({dataset_path + "/test.data"}).SetRowsPerBuffer(5).SetNumWorkers(16);
  446. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  447. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema7Rows.json", {});
  448. builder.SetDataSchema(std::move(schema));
  449. Status rc= builder.Build(&my_tfreader_op);
  450. ASSERT_TRUE(rc.IsOk());
  451. rc = my_tree->AssociateNode(my_tfreader_op);
  452. ASSERT_TRUE(rc.IsOk());
  453. rc = my_tree->AssignRoot(my_tfreader_op);
  454. ASSERT_TRUE(rc.IsOk());
  455. MS_LOG(INFO) << "Launching tree and begin iteration.";
  456. rc = my_tree->Prepare();
  457. ASSERT_TRUE(rc.IsOk());
  458. rc = my_tree->Launch();
  459. ASSERT_TRUE(rc.IsOk());
  460. // Start the loop of reading tensors from our pipeline
  461. DatasetIterator di(my_tree);
  462. TensorRow tensor_list;
  463. rc = di.FetchNextTensorRow(&tensor_list);
  464. ASSERT_TRUE(rc.IsOk());
  465. int row_count = 0;
  466. while (!tensor_list.empty()) {
  467. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  468. // Display the tensor by calling the printer on it
  469. for (int i = 0; i < tensor_list.size(); i++) {
  470. std::ostringstream ss;
  471. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  472. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  473. }
  474. rc = di.FetchNextTensorRow(&tensor_list);
  475. ASSERT_TRUE(rc.IsOk());
  476. row_count++;
  477. }
  478. ASSERT_EQ(row_count, 7);
  479. }
  480. TEST_F(MindDataTestTFReaderOp, TestTFReaderBasicNoSchema) {
  481. // Start with an empty execution tree
  482. auto my_tree = std::make_shared<ExecutionTree>();
  483. std::string dataset_path;
  484. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  485. std::shared_ptr<TFReaderOp> my_tfreader_op;
  486. TFReaderOp::Builder builder;
  487. builder.SetDatasetFilesList({dataset_path})
  488. .SetRowsPerBuffer(16)
  489. .SetNumWorkers(16);
  490. Status rc = builder.Build(&my_tfreader_op);
  491. ASSERT_TRUE(rc.IsOk());
  492. rc = my_tree->AssociateNode(my_tfreader_op);
  493. ASSERT_TRUE(rc.IsOk());
  494. rc = my_tree->AssignRoot(my_tfreader_op);
  495. ASSERT_TRUE(rc.IsOk());
  496. MS_LOG(INFO) << "Launching tree and begin iteration.";
  497. rc = my_tree->Prepare();
  498. ASSERT_TRUE(rc.IsOk());
  499. rc = my_tree->Launch();
  500. ASSERT_TRUE(rc.IsOk());
  501. // Start the loop of reading tensors from our pipeline
  502. DatasetIterator di(my_tree);
  503. TensorRow tensor_list;
  504. rc = di.FetchNextTensorRow(&tensor_list);
  505. ASSERT_TRUE(rc.IsOk());
  506. int row_count = 0;
  507. while (!tensor_list.empty()) {
  508. // Display the tensor by calling the printer on it
  509. ASSERT_EQ(tensor_list.size(), 9);
  510. for (int i = 0; i < tensor_list.size(); i++) {
  511. std::ostringstream ss;
  512. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  513. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  514. }
  515. rc = di.FetchNextTensorRow(&tensor_list);
  516. ASSERT_TRUE(rc.IsOk());
  517. row_count++;
  518. }
  519. ASSERT_EQ(row_count, 12);
  520. }
  521. TEST_F(MindDataTestTFReaderOp, TestTotalRowsBasic) {
  522. std::string tf_file = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  523. std::vector<std::string> filenames;
  524. for (int i = 0; i < 5; i++) {
  525. filenames.push_back(tf_file);
  526. }
  527. int64_t total_rows = 0;
  528. TFReaderOp::CountTotalRows(&total_rows, filenames, 1);
  529. ASSERT_EQ(total_rows, 60);
  530. TFReaderOp::CountTotalRows(&total_rows, filenames, 2);
  531. ASSERT_EQ(total_rows, 60);
  532. TFReaderOp::CountTotalRows(&total_rows, filenames, 3);
  533. ASSERT_EQ(total_rows, 60);
  534. TFReaderOp::CountTotalRows(&total_rows, filenames, 4);
  535. ASSERT_EQ(total_rows, 60);
  536. TFReaderOp::CountTotalRows(&total_rows, filenames, 5);
  537. ASSERT_EQ(total_rows, 60);
  538. TFReaderOp::CountTotalRows(&total_rows, filenames, 6);
  539. ASSERT_EQ(total_rows, 60);
  540. TFReaderOp::CountTotalRows(&total_rows, filenames, 729);
  541. ASSERT_EQ(total_rows, 60);
  542. TFReaderOp::CountTotalRows(&total_rows, filenames, 1, true);
  543. ASSERT_EQ(total_rows, 60);
  544. TFReaderOp::CountTotalRows(&total_rows, filenames, 2, true);
  545. ASSERT_EQ(total_rows, 60);
  546. TFReaderOp::CountTotalRows(&total_rows, filenames, 3, true);
  547. ASSERT_EQ(total_rows, 60);
  548. TFReaderOp::CountTotalRows(&total_rows, filenames, 4, true);
  549. ASSERT_EQ(total_rows, 60);
  550. TFReaderOp::CountTotalRows(&total_rows, filenames, 5, true);
  551. ASSERT_EQ(total_rows, 60);
  552. TFReaderOp::CountTotalRows(&total_rows, filenames, 6, true);
  553. ASSERT_EQ(total_rows, 60);
  554. TFReaderOp::CountTotalRows(&total_rows, filenames, 729, true);
  555. ASSERT_EQ(total_rows, 60);
  556. }
  557. TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) {
  558. // Start with an empty execution tree
  559. auto my_tree = std::make_shared<ExecutionTree>();
  560. std::string valid_file = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  561. std::string schema_file = datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json";
  562. std::string invalid_file = datasets_root_path_ + "/testTFTestAllTypes/invalidFile.txt";
  563. std::string nonexistent_file = "this/file/doesnt/exist";
  564. std::shared_ptr<TFReaderOp> my_tfreader_op;
  565. TFReaderOp::Builder builder;
  566. builder.SetDatasetFilesList({invalid_file, valid_file, schema_file})
  567. .SetRowsPerBuffer(16)
  568. .SetNumWorkers(16);
  569. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  570. schema->LoadSchemaFile(schema_file, {});
  571. builder.SetDataSchema(std::move(schema));
  572. Status rc = builder.Build(&my_tfreader_op);
  573. ASSERT_TRUE(!rc.IsOk());
  574. builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file})
  575. .SetRowsPerBuffer(16)
  576. .SetNumWorkers(16);
  577. schema = std::make_unique<DataSchema>();
  578. schema->LoadSchemaFile(schema_file, {});
  579. builder.SetDataSchema(std::move(schema));
  580. rc = builder.Build(&my_tfreader_op);
  581. ASSERT_TRUE(!rc.IsOk());
  582. }