You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tfReader_op_test.cc 26 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <iostream>
  17. #include <memory>
  18. #include <vector>
  19. #include "minddata/dataset/core/client.h"
  20. #include "minddata/dataset/engine/data_schema.h"
  21. #include "minddata/dataset/engine/jagged_connector.h"
  22. #include "common/common.h"
  23. #include "gtest/gtest.h"
  24. #include "utils/log_adapter.h"
  25. namespace common = mindspore::common;
  26. using namespace mindspore::dataset;
  27. using mindspore::LogStream;
  28. using mindspore::ExceptionType::NoExceptionType;
  29. using mindspore::MsLogLevel::INFO;
  30. class MindDataTestTFReaderOp : public UT::DatasetOpTesting {};
  31. TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) {
  32. // Start with an empty execution tree
  33. auto my_tree = std::make_shared<ExecutionTree>();
  34. Status rc;
  35. std::string dataset_path;
  36. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  37. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  38. int32_t op_connector_size = config_manager->op_connector_size();
  39. int32_t num_workers = 1;
  40. int32_t worker_connector_size = config_manager->worker_connector_size();
  41. std::vector<std::string> files = {dataset_path};
  42. std::vector<std::string> columns_to_load = {};
  43. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  44. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  45. std::shared_ptr<TFReaderOp> my_tfreader_op =
  46. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  47. columns_to_load, false, 1, 0, false);
  48. rc = my_tfreader_op->Init();
  49. ASSERT_TRUE(rc.IsOk());
  50. rc = my_tree->AssociateNode(my_tfreader_op);
  51. ASSERT_TRUE(rc.IsOk());
  52. rc = my_tree->AssignRoot(my_tfreader_op);
  53. ASSERT_TRUE(rc.IsOk());
  54. MS_LOG(INFO) << "Launching tree and begin iteration.";
  55. rc = my_tree->Prepare();
  56. ASSERT_TRUE(rc.IsOk());
  57. rc = my_tree->Launch();
  58. ASSERT_TRUE(rc.IsOk());
  59. // Start the loop of reading tensors from our pipeline
  60. DatasetIterator di(my_tree);
  61. TensorRow tensor_list;
  62. rc = di.FetchNextTensorRow(&tensor_list);
  63. ASSERT_TRUE(rc.IsOk());
  64. int row_count = 0;
  65. while (!tensor_list.empty()) {
  66. // Display the tensor by calling the printer on it
  67. for (int i = 0; i < tensor_list.size(); i++) {
  68. std::ostringstream ss;
  69. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  70. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  71. }
  72. rc = di.FetchNextTensorRow(&tensor_list);
  73. ASSERT_TRUE(rc.IsOk());
  74. row_count++;
  75. }
  76. ASSERT_EQ(row_count, 12);
  77. }
  78. TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeRowsPerBuffer) {
  79. // Start with an empty execution tree
  80. auto my_tree = std::make_shared<ExecutionTree>();
  81. Status rc;
  82. std::string dataset_path;
  83. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  84. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  85. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  86. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  87. int32_t op_connector_size = config_manager->op_connector_size();
  88. int32_t num_workers = 1;
  89. int32_t worker_connector_size = config_manager->worker_connector_size();
  90. std::vector<std::string> files = {dataset_path};
  91. std::vector<std::string> columns_to_load = {};
  92. std::shared_ptr<TFReaderOp> my_tfreader_op =
  93. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  94. columns_to_load, false, 1, 0, false);
  95. rc = my_tfreader_op->Init();
  96. ASSERT_TRUE(rc.IsOk());
  97. rc = my_tree->AssociateNode(my_tfreader_op);
  98. ASSERT_TRUE(rc.IsOk());
  99. rc = my_tree->AssignRoot(my_tfreader_op);
  100. ASSERT_TRUE(rc.IsOk());
  101. MS_LOG(INFO) << "Launching tree and begin iteration.";
  102. rc = my_tree->Prepare();
  103. ASSERT_TRUE(rc.IsOk());
  104. rc = my_tree->Launch();
  105. ASSERT_TRUE(rc.IsOk());
  106. // Start the loop of reading tensors from our pipeline
  107. DatasetIterator di(my_tree);
  108. TensorRow tensor_list;
  109. rc = di.FetchNextTensorRow(&tensor_list);
  110. ASSERT_TRUE(rc.IsOk());
  111. int row_count = 0;
  112. while (!tensor_list.empty()) {
  113. // Display the tensor by calling the printer on it
  114. for (int i = 0; i < tensor_list.size(); i++) {
  115. std::ostringstream ss;
  116. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  117. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  118. }
  119. rc = di.FetchNextTensorRow(&tensor_list);
  120. ASSERT_TRUE(rc.IsOk());
  121. row_count++;
  122. }
  123. ASSERT_EQ(row_count, 12);
  124. }
  125. TEST_F(MindDataTestTFReaderOp, TestTFReaderSmallRowsPerBuffer) {
  126. // Start with an empty execution tree
  127. auto my_tree = std::make_shared<ExecutionTree>();
  128. Status rc;
  129. std::string dataset_path;
  130. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  131. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  132. int32_t op_connector_size = config_manager->op_connector_size();
  133. int32_t num_workers = 1;
  134. int32_t worker_connector_size = config_manager->worker_connector_size();
  135. std::vector<std::string> files = {dataset_path};
  136. std::vector<std::string> columns_to_load = {};
  137. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  138. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  139. std::shared_ptr<TFReaderOp> my_tfreader_op =
  140. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  141. columns_to_load, false, 1, 0, false);
  142. rc = my_tfreader_op->Init();
  143. ASSERT_TRUE(rc.IsOk());
  144. rc = my_tree->AssociateNode(my_tfreader_op);
  145. ASSERT_TRUE(rc.IsOk());
  146. rc = my_tree->AssignRoot(my_tfreader_op);
  147. ASSERT_TRUE(rc.IsOk());
  148. MS_LOG(INFO) << "Launching tree and begin iteration.";
  149. rc = my_tree->Prepare();
  150. ASSERT_TRUE(rc.IsOk());
  151. rc = my_tree->Launch();
  152. ASSERT_TRUE(rc.IsOk());
  153. // Start the loop of reading tensors from our pipeline
  154. DatasetIterator di(my_tree);
  155. TensorRow tensor_list;
  156. rc = di.FetchNextTensorRow(&tensor_list);
  157. ASSERT_TRUE(rc.IsOk());
  158. int row_count = 0;
  159. while (!tensor_list.empty()) {
  160. // Display the tensor by calling the printer on it
  161. for (int i = 0; i < tensor_list.size(); i++) {
  162. std::ostringstream ss;
  163. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  164. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  165. }
  166. rc = di.FetchNextTensorRow(&tensor_list);
  167. ASSERT_TRUE(rc.IsOk());
  168. row_count++;
  169. }
  170. ASSERT_EQ(row_count, 12);
  171. }
  172. TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeQueueSize) {
  173. // Start with an empty execution tree
  174. auto my_tree = std::make_shared<ExecutionTree>();
  175. Status rc;
  176. std::string dataset_path;
  177. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  178. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  179. int32_t op_connector_size = config_manager->op_connector_size();
  180. int32_t num_workers = 1;
  181. int32_t worker_connector_size = 1;
  182. std::vector<std::string> files = {dataset_path};
  183. std::vector<std::string> columns_to_load = {};
  184. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  185. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  186. std::shared_ptr<TFReaderOp> my_tfreader_op =
  187. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  188. columns_to_load, false, 1, 0, false);
  189. rc = my_tfreader_op->Init();
  190. ASSERT_TRUE(rc.IsOk());
  191. rc = my_tree->AssociateNode(my_tfreader_op);
  192. ASSERT_TRUE(rc.IsOk());
  193. rc = my_tree->AssignRoot(my_tfreader_op);
  194. ASSERT_TRUE(rc.IsOk());
  195. MS_LOG(INFO) << "Launching tree and begin iteration.";
  196. rc = my_tree->Prepare();
  197. ASSERT_TRUE(rc.IsOk());
  198. rc = my_tree->Launch();
  199. ASSERT_TRUE(rc.IsOk());
  200. // Start the loop of reading tensors from our pipeline
  201. DatasetIterator di(my_tree);
  202. TensorRow tensor_list;
  203. rc = di.FetchNextTensorRow(&tensor_list);
  204. ASSERT_TRUE(rc.IsOk());
  205. int row_count = 0;
  206. while (!tensor_list.empty()) {
  207. // Display the tensor by calling the printer on it
  208. for (int i = 0; i < tensor_list.size(); i++) {
  209. std::ostringstream ss;
  210. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  211. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  212. }
  213. rc = di.FetchNextTensorRow(&tensor_list);
  214. ASSERT_TRUE(rc.IsOk());
  215. row_count++;
  216. }
  217. ASSERT_EQ(row_count, 12);
  218. }
  219. TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) {
  220. // Start with an empty execution tree
  221. auto my_tree = std::make_shared<ExecutionTree>();
  222. Status rc;
  223. std::string dataset_path;
  224. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  225. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  226. int32_t op_connector_size = config_manager->op_connector_size();
  227. int32_t num_workers = 1;
  228. int32_t worker_connector_size = config_manager->worker_connector_size();
  229. std::vector<std::string> files = {dataset_path};
  230. std::vector<std::string> columns_to_load = {};
  231. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  232. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  233. std::shared_ptr<TFReaderOp> my_tfreader_op =
  234. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  235. columns_to_load, false, 1, 0, false);
  236. rc = my_tfreader_op->Init();
  237. ASSERT_TRUE(rc.IsOk());
  238. rc = my_tree->AssociateNode(my_tfreader_op);
  239. ASSERT_TRUE(rc.IsOk());
  240. rc = my_tree->AssignRoot(my_tfreader_op);
  241. ASSERT_TRUE(rc.IsOk());
  242. MS_LOG(INFO) << "Launching tree and begin iteration.";
  243. rc = my_tree->Prepare();
  244. ASSERT_TRUE(rc.IsOk());
  245. rc = my_tree->Launch();
  246. ASSERT_TRUE(rc.IsOk());
  247. // Start the loop of reading tensors from our pipeline
  248. DatasetIterator di(my_tree);
  249. TensorRow tensor_list;
  250. rc = di.FetchNextTensorRow(&tensor_list);
  251. ASSERT_TRUE(rc.IsOk());
  252. int row_count = 0;
  253. while (!tensor_list.empty()) {
  254. // Display the tensor by calling the printer on it
  255. for (int i = 0; i < tensor_list.size(); i++) {
  256. std::ostringstream ss;
  257. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  258. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  259. }
  260. rc = di.FetchNextTensorRow(&tensor_list);
  261. ASSERT_TRUE(rc.IsOk());
  262. row_count++;
  263. }
  264. ASSERT_EQ(row_count, 12);
  265. }
  266. TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) {
  267. // Start with an empty execution tree
  268. auto my_tree = std::make_shared<ExecutionTree>();
  269. Status rc;
  270. std::string dataset_path;
  271. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  272. // TFReaderOp
  273. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  274. int32_t op_connector_size = config_manager->op_connector_size();
  275. int32_t num_workers = 1;
  276. int32_t worker_connector_size = 16;
  277. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  278. std::vector<std::string> files = {dataset_path};
  279. std::vector<std::string> columns_to_load = {};
  280. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
  281. std::shared_ptr<TFReaderOp> my_tfreader_op =
  282. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  283. columns_to_load, false, 1, 0, false);
  284. rc = my_tfreader_op->Init();
  285. ASSERT_TRUE(rc.IsOk());
  286. rc = my_tree->AssociateNode(my_tfreader_op);
  287. ASSERT_TRUE(rc.IsOk());
  288. // RepeatOp
  289. uint32_t num_repeats = 3;
  290. std::shared_ptr<RepeatOp> my_repeat_op = std::make_shared<RepeatOp>(num_repeats);
  291. rc = my_tree->AssociateNode(my_repeat_op);
  292. ASSERT_TRUE(rc.IsOk());
  293. // Set children/root layout.
  294. my_tfreader_op->set_total_repeats(num_repeats);
  295. my_tfreader_op->set_num_repeats_per_epoch(num_repeats);
  296. rc = my_repeat_op->AddChild(my_tfreader_op);
  297. ASSERT_TRUE(rc.IsOk());
  298. rc = my_tree->AssignRoot(my_repeat_op);
  299. ASSERT_TRUE(rc.IsOk());
  300. MS_LOG(INFO) << "Launching tree and begin iteration.";
  301. rc = my_tree->Prepare();
  302. ASSERT_TRUE(rc.IsOk());
  303. rc = my_tree->Launch();
  304. ASSERT_TRUE(rc.IsOk());
  305. // Start the loop of reading tensors from our pipeline
  306. DatasetIterator di(my_tree);
  307. TensorRow tensor_list;
  308. rc = di.FetchNextTensorRow(&tensor_list);
  309. ASSERT_TRUE(rc.IsOk());
  310. int row_count = 0;
  311. while (!tensor_list.empty()) {
  312. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  313. // Display the tensor by calling the printer on it
  314. for (int i = 0; i < tensor_list.size(); i++) {
  315. std::ostringstream ss;
  316. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  317. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  318. }
  319. rc = di.FetchNextTensorRow(&tensor_list);
  320. ASSERT_TRUE(rc.IsOk());
  321. row_count++;
  322. }
  323. ASSERT_EQ(row_count, 12 * 3);
  324. }
  325. TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) {
  326. // Start with an empty execution tree
  327. auto my_tree = std::make_shared<ExecutionTree>();
  328. Status rc;
  329. std::string dataset_path;
  330. dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
  331. std::vector<std::string> files = {dataset_path + "/test.data"};
  332. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  333. std::vector<std::string> columns_to_load;
  334. columns_to_load.push_back("col_sint32");
  335. columns_to_load.push_back("col_binary");
  336. schema->LoadSchemaFile(dataset_path + "/datasetSchema.json", columns_to_load);
  337. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  338. int32_t op_connector_size = config_manager->op_connector_size();
  339. int32_t worker_connector_size = config_manager->worker_connector_size();
  340. int32_t num_workers = 1;
  341. std::shared_ptr<TFReaderOp> my_tfreader_op =
  342. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  343. columns_to_load, false, 1, 0, false);
  344. rc = my_tfreader_op->Init();
  345. ASSERT_TRUE(rc.IsOk());
  346. rc = my_tree->AssociateNode(my_tfreader_op);
  347. ASSERT_TRUE(rc.IsOk());
  348. rc = my_tree->AssignRoot(my_tfreader_op);
  349. ASSERT_TRUE(rc.IsOk());
  350. MS_LOG(INFO) << "Launching tree and begin iteration.";
  351. rc = my_tree->Prepare();
  352. ASSERT_TRUE(rc.IsOk());
  353. rc = my_tree->Launch();
  354. ASSERT_TRUE(rc.IsOk());
  355. // Start the loop of reading tensors from our pipeline
  356. DatasetIterator di(my_tree);
  357. TensorRow tensor_list;
  358. rc = di.FetchNextTensorRow(&tensor_list);
  359. ASSERT_TRUE(rc.IsOk());
  360. int row_count = 0;
  361. while (!tensor_list.empty()) {
  362. // Display the tensor by calling the printer on it
  363. ASSERT_EQ(tensor_list.size(), columns_to_load.size());
  364. for (int i = 0; i < tensor_list.size(); i++) {
  365. std::ostringstream ss;
  366. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  367. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  368. }
  369. rc = di.FetchNextTensorRow(&tensor_list);
  370. ASSERT_TRUE(rc.IsOk());
  371. row_count++;
  372. }
  373. ASSERT_EQ(row_count, 12);
  374. }
  375. TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Row) {
  376. // Start with an empty execution tree
  377. auto my_tree = std::make_shared<ExecutionTree>();
  378. Status rc;
  379. std::string dataset_path;
  380. dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
  381. std::string data_schema_filepath = dataset_path + "/datasetSchema1Row.json";
  382. // TFReaderOp
  383. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  384. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {});
  385. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  386. int32_t op_connector_size = config_manager->op_connector_size();
  387. int32_t num_workers = 1;
  388. int32_t worker_connector_size = config_manager->worker_connector_size();
  389. std::vector<std::string> files = {dataset_path + "/test.data"};
  390. std::vector<std::string> columns_to_load = {};
  391. std::shared_ptr<TFReaderOp> my_tfreader_op =
  392. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  393. columns_to_load, false, 1, 0, false);
  394. rc = my_tfreader_op->Init();
  395. ASSERT_TRUE(rc.IsOk());
  396. rc = my_tree->AssociateNode(my_tfreader_op);
  397. ASSERT_TRUE(rc.IsOk());
  398. rc = my_tree->AssignRoot(my_tfreader_op);
  399. ASSERT_TRUE(rc.IsOk());
  400. MS_LOG(INFO) << "Launching tree and begin iteration.";
  401. rc = my_tree->Prepare();
  402. ASSERT_TRUE(rc.IsOk());
  403. rc = my_tree->Launch();
  404. ASSERT_TRUE(rc.IsOk());
  405. // Start the loop of reading tensors from our pipeline
  406. DatasetIterator di(my_tree);
  407. TensorRow tensor_list;
  408. rc = di.FetchNextTensorRow(&tensor_list);
  409. ASSERT_TRUE(rc.IsOk());
  410. int row_count = 0;
  411. while (!tensor_list.empty()) {
  412. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  413. // Display the tensor by calling the printer on it
  414. for (int i = 0; i < tensor_list.size(); i++) {
  415. std::ostringstream ss;
  416. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  417. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  418. }
  419. rc = di.FetchNextTensorRow(&tensor_list);
  420. ASSERT_TRUE(rc.IsOk());
  421. row_count++;
  422. }
  423. ASSERT_EQ(row_count, 1);
  424. }
  425. TEST_F(MindDataTestTFReaderOp, TestTFReaderTake1Buffer) {
  426. // Start with an empty execution tree
  427. auto my_tree = std::make_shared<ExecutionTree>();
  428. Status rc;
  429. std::string dataset_path;
  430. dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
  431. std::string data_schema_filepath = dataset_path + "/datasetSchema5Rows.json";
  432. // TFReaderOp
  433. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  434. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema5Rows.json", {});
  435. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  436. int32_t op_connector_size = config_manager->op_connector_size();
  437. int32_t num_workers = 1;
  438. int32_t worker_connector_size = config_manager->worker_connector_size();
  439. std::vector<std::string> files = {dataset_path + "/test.data"};
  440. std::vector<std::string> columns_to_load = {};
  441. std::shared_ptr<TFReaderOp> my_tfreader_op =
  442. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  443. columns_to_load, false, 1, 0, false);
  444. rc = my_tfreader_op->Init();
  445. ASSERT_TRUE(rc.IsOk());
  446. rc = my_tree->AssociateNode(my_tfreader_op);
  447. ASSERT_TRUE(rc.IsOk());
  448. rc = my_tree->AssignRoot(my_tfreader_op);
  449. ASSERT_TRUE(rc.IsOk());
  450. MS_LOG(INFO) << "Launching tree and begin iteration.";
  451. rc = my_tree->Prepare();
  452. ASSERT_TRUE(rc.IsOk());
  453. rc = my_tree->Launch();
  454. ASSERT_TRUE(rc.IsOk());
  455. // Start the loop of reading tensors from our pipeline
  456. DatasetIterator di(my_tree);
  457. TensorRow tensor_list;
  458. rc = di.FetchNextTensorRow(&tensor_list);
  459. ASSERT_TRUE(rc.IsOk());
  460. int row_count = 0;
  461. while (!tensor_list.empty()) {
  462. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  463. // Display the tensor by calling the printer on it
  464. for (int i = 0; i < tensor_list.size(); i++) {
  465. std::ostringstream ss;
  466. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  467. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  468. }
  469. rc = di.FetchNextTensorRow(&tensor_list);
  470. ASSERT_TRUE(rc.IsOk());
  471. row_count++;
  472. }
  473. ASSERT_EQ(row_count, 5);
  474. }
  475. TEST_F(MindDataTestTFReaderOp, TestTFReaderTake7Rows) {
  476. // Start with an empty execution tree
  477. auto my_tree = std::make_shared<ExecutionTree>();
  478. Status rc;
  479. std::string dataset_path;
  480. dataset_path = datasets_root_path_ + "/testTFTestAllTypes";
  481. std::string data_schema_filepath = dataset_path + "/datasetSchema7Rows.json";
  482. // TFReaderOp
  483. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  484. schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema7Rows.json", {});
  485. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  486. int32_t op_connector_size = config_manager->op_connector_size();
  487. int32_t num_workers = 1;
  488. int32_t worker_connector_size = config_manager->worker_connector_size();
  489. std::vector<std::string> files = {dataset_path + "/test.data"};
  490. std::vector<std::string> columns_to_load = {};
  491. std::shared_ptr<TFReaderOp> my_tfreader_op =
  492. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  493. columns_to_load, false, 1, 0, false);
  494. rc = my_tfreader_op->Init();
  495. ASSERT_TRUE(rc.IsOk());
  496. rc = my_tree->AssociateNode(my_tfreader_op);
  497. ASSERT_TRUE(rc.IsOk());
  498. rc = my_tree->AssignRoot(my_tfreader_op);
  499. ASSERT_TRUE(rc.IsOk());
  500. MS_LOG(INFO) << "Launching tree and begin iteration.";
  501. rc = my_tree->Prepare();
  502. ASSERT_TRUE(rc.IsOk());
  503. rc = my_tree->Launch();
  504. ASSERT_TRUE(rc.IsOk());
  505. // Start the loop of reading tensors from our pipeline
  506. DatasetIterator di(my_tree);
  507. TensorRow tensor_list;
  508. rc = di.FetchNextTensorRow(&tensor_list);
  509. ASSERT_TRUE(rc.IsOk());
  510. int row_count = 0;
  511. while (!tensor_list.empty()) {
  512. MS_LOG(INFO) << "Row display for row #: " << row_count << ".";
  513. // Display the tensor by calling the printer on it
  514. for (int i = 0; i < tensor_list.size(); i++) {
  515. std::ostringstream ss;
  516. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  517. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  518. }
  519. rc = di.FetchNextTensorRow(&tensor_list);
  520. ASSERT_TRUE(rc.IsOk());
  521. row_count++;
  522. }
  523. ASSERT_EQ(row_count, 7);
  524. }
  525. TEST_F(MindDataTestTFReaderOp, TestTFReaderBasicNoSchema) {
  526. // Start with an empty execution tree
  527. auto my_tree = std::make_shared<ExecutionTree>();
  528. Status rc;
  529. std::string dataset_path;
  530. dataset_path = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  531. std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
  532. int32_t op_connector_size = config_manager->op_connector_size();
  533. int32_t num_workers = 1;
  534. std::vector<std::string> columns_to_load = {};
  535. std::vector<std::string> files = {dataset_path};
  536. int32_t worker_connector_size = config_manager->worker_connector_size();
  537. std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
  538. std::shared_ptr<TFReaderOp> my_tfreader_op =
  539. std::make_shared<TFReaderOp>(num_workers, worker_connector_size, 0, files, std::move(schema), op_connector_size,
  540. columns_to_load, false, 1, 0, false);
  541. rc = my_tfreader_op->Init();
  542. ASSERT_TRUE(rc.IsOk());
  543. rc = my_tree->AssociateNode(my_tfreader_op);
  544. ASSERT_TRUE(rc.IsOk());
  545. rc = my_tree->AssignRoot(my_tfreader_op);
  546. ASSERT_TRUE(rc.IsOk());
  547. MS_LOG(INFO) << "Launching tree and begin iteration.";
  548. rc = my_tree->Prepare();
  549. ASSERT_TRUE(rc.IsOk());
  550. rc = my_tree->Launch();
  551. ASSERT_TRUE(rc.IsOk());
  552. // Start the loop of reading tensors from our pipeline
  553. DatasetIterator di(my_tree);
  554. TensorRow tensor_list;
  555. rc = di.FetchNextTensorRow(&tensor_list);
  556. ASSERT_TRUE(rc.IsOk());
  557. int row_count = 0;
  558. while (!tensor_list.empty()) {
  559. // Display the tensor by calling the printer on it
  560. ASSERT_EQ(tensor_list.size(), 9);
  561. for (int i = 0; i < tensor_list.size(); i++) {
  562. std::ostringstream ss;
  563. ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
  564. MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
  565. }
  566. rc = di.FetchNextTensorRow(&tensor_list);
  567. ASSERT_TRUE(rc.IsOk());
  568. row_count++;
  569. }
  570. ASSERT_EQ(row_count, 12);
  571. }
  572. TEST_F(MindDataTestTFReaderOp, TestTotalRowsBasic) {
  573. std::string tf_file = datasets_root_path_ + "/testTFTestAllTypes/test.data";
  574. std::vector<std::string> filenames;
  575. for (int i = 0; i < 5; i++) {
  576. filenames.push_back(tf_file);
  577. }
  578. int64_t total_rows = 0;
  579. TFReaderOp::CountTotalRows(&total_rows, filenames, 1);
  580. ASSERT_EQ(total_rows, 60);
  581. TFReaderOp::CountTotalRows(&total_rows, filenames, 2);
  582. ASSERT_EQ(total_rows, 60);
  583. TFReaderOp::CountTotalRows(&total_rows, filenames, 3);
  584. ASSERT_EQ(total_rows, 60);
  585. TFReaderOp::CountTotalRows(&total_rows, filenames, 4);
  586. ASSERT_EQ(total_rows, 60);
  587. TFReaderOp::CountTotalRows(&total_rows, filenames, 5);
  588. ASSERT_EQ(total_rows, 60);
  589. TFReaderOp::CountTotalRows(&total_rows, filenames, 6);
  590. ASSERT_EQ(total_rows, 60);
  591. TFReaderOp::CountTotalRows(&total_rows, filenames, 729);
  592. ASSERT_EQ(total_rows, 60);
  593. TFReaderOp::CountTotalRows(&total_rows, filenames, 1, true);
  594. ASSERT_EQ(total_rows, 60);
  595. TFReaderOp::CountTotalRows(&total_rows, filenames, 2, true);
  596. ASSERT_EQ(total_rows, 60);
  597. TFReaderOp::CountTotalRows(&total_rows, filenames, 3, true);
  598. ASSERT_EQ(total_rows, 60);
  599. TFReaderOp::CountTotalRows(&total_rows, filenames, 4, true);
  600. ASSERT_EQ(total_rows, 60);
  601. TFReaderOp::CountTotalRows(&total_rows, filenames, 5, true);
  602. ASSERT_EQ(total_rows, 60);
  603. TFReaderOp::CountTotalRows(&total_rows, filenames, 6, true);
  604. ASSERT_EQ(total_rows, 60);
  605. TFReaderOp::CountTotalRows(&total_rows, filenames, 729, true);
  606. ASSERT_EQ(total_rows, 60);
  607. }