You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

barrier_op.cc 9.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "dataset/engine/datasetops/barrier_op.h"
  17. #include <iomanip>
  18. #include <utility>
  19. #include "dataset/core/constants.h"
  20. #include "dataset/engine/data_buffer.h"
  21. #include "dataset/engine/db_connector.h"
  22. #include "dataset/core/config_manager.h"
  23. #include "dataset/core/global_context.h"
  24. #include "utils/log_adapter.h"
  25. namespace mindspore {
  26. namespace dataset {
  27. BarrierOp::Builder::Builder() {
  28. // Some arguments to the BarrierOp constructor have a default argument that is taken
  29. // from the client config.
  30. // The user may choose to change these values for the construction of the BarrierOp by
  31. // using the various builder set methods.
  32. std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  33. builder_rows_per_buffer_ = cfg->rows_per_buffer();
  34. builder_op_connector_size_ = cfg->op_connector_size();
  35. }
  36. Status BarrierOp::Builder::SanityCheck() const { return Status::OK(); }
  37. Status BarrierOp::Builder::Build(std::shared_ptr<BarrierOp> *ptr) {
  38. RETURN_IF_NOT_OK(SanityCheck());
  39. *ptr = std::make_shared<BarrierOp>(builder_rows_per_buffer_, builder_op_connector_size_, builder_condition_name_,
  40. builder_condition_func_);
  41. return Status::OK();
  42. }
  43. // Construct BarrierOp here, local variables initialized in operator due to tree construction restrictions
  44. BarrierOp::BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name,
  45. py::function condition_func)
  46. : PipelineOp(op_connector_size),
  47. rows_per_buffer_(rows_per_buffer),
  48. buffer_id_(0),
  49. clean_up_(false),
  50. eof_(false),
  51. condition_name_(condition_name),
  52. condition_function_(condition_func) {}
  53. // destructor
  54. BarrierOp::~BarrierOp() {}
  55. // Entry point for Barrier, called by launch()
  56. Status BarrierOp::operator()() {
  57. // The children_num_ parameter needs to be put here
  58. // Synchronize with TaskManager once the thread is created.
  59. TaskManager::FindMe()->Post();
  60. // create child iterator, right now this barrier is a pipeline operator
  61. const int32_t worker_id = 0;
  62. const int32_t child_idx = 0;
  63. child_iterator_ = std::make_unique<ChildIterator>(this, worker_id, child_idx);
  64. // Loop until eof is true
  65. while (!eof_) {
  66. // Create new table to put the new tensor rows
  67. std::unique_ptr<TensorQTable> curr_table = std::make_unique<TensorQTable>();
  68. RETURN_IF_NOT_OK(prepare(curr_table.get()));
  69. // If an eof got picked up during the above prepare, then we're done
  70. if (eof_) {
  71. break;
  72. }
  73. // we have to output new buffer with possibly different buffer size, possibly one row
  74. while (!clean_up_) {
  75. // 1. If a previous loop iteration sent the current table out, then create a new one.
  76. if (curr_table == nullptr) {
  77. curr_table = std::make_unique<TensorQTable>();
  78. }
  79. // 2 fill the table. Note: clean_up mode might get turned on if epoch is finished
  80. RETURN_IF_NOT_OK(fillBuffer(curr_table.get()));
  81. // 3 create and update buffer and send it to the out connector
  82. if (!curr_table->empty()) {
  83. std::unique_ptr<DataBuffer> curr_buffer = std::make_unique<DataBuffer>(buffer_id_, DataBuffer::kDeBFlagNone);
  84. curr_buffer->set_tensor_table(std::move(curr_table));
  85. MS_LOG(DEBUG) << "Barrier operator finished one buffer, pushing, rows " << curr_buffer->NumRows() << ", cols "
  86. << curr_buffer->NumCols() << ", map " << column_name_id_map_.size() << ".";
  87. RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(curr_buffer)));
  88. buffer_id_++;
  89. }
  90. }
  91. // 4 handle drain state.
  92. if (clean_up_) {
  93. MS_LOG(DEBUG) << "Barrier operator sending epoch ending signal.";
  94. // Send the eoe up.
  95. RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
  96. }
  97. }
  98. // 5 handle eof
  99. // propagate eof here.
  100. MS_LOG(INFO) << "Barrier operator got EOF, propagating.";
  101. RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
  102. return Status::OK();
  103. }
  104. // Handles preprocessing of the main loop, used when starting new epoch
  105. Status BarrierOp::prepare(TensorQTable *const table) {
  106. MS_LOG(DEBUG) << "Barrier operator prepares for new epoch.";
  107. clean_up_ = false;
  108. buffer_id_ = 0;
  109. if (table == nullptr) {
  110. return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "BarrierOp prepare phase requires a tensor table.");
  111. }
  112. // fill initial row
  113. TensorRow new_row = {};
  114. // use iterator to get next row and invoke pyfunc wait
  115. RETURN_IF_NOT_OK(getNextTensorRow(&new_row));
  116. // If the first row fetching resulted in eof, then we are done.
  117. if (eof_) {
  118. return Status::OK();
  119. }
  120. if (new_row.empty()) {
  121. // This epoch is empty
  122. return Status::OK();
  123. }
  124. // Pack this first row into our tensor table
  125. // first row we also have to check if we should block
  126. RETURN_IF_NOT_OK(blockCond());
  127. table->push_back(std::move(new_row));
  128. // the update code below shouldn't do anything bad if the column name already exists.
  129. return Status::OK();
  130. }
  131. // fillBuffer always expects a new table to fill
  132. Status BarrierOp::fillBuffer(TensorQTable *const table) {
  133. if (table == nullptr) {
  134. return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "BarrierOp fillBuffer null table pointer.");
  135. }
  136. TensorRow new_row = {};
  137. while (table->size() < static_cast<size_t>(rows_per_buffer_)) {
  138. RETURN_IF_NOT_OK(getNextTensorRow(&new_row));
  139. // Early exit the loop if we got empty row from any of our child iterations
  140. if (new_row.empty()) {
  141. return Status::OK();
  142. }
  143. // else we got a row so pack it into the tensor table.
  144. RETURN_IF_NOT_OK(blockCond());
  145. table->push_back(std::move(new_row));
  146. }
  147. return Status::OK();
  148. }
  149. // function executes a py_func and blocks until condition becomes true.
  150. Status BarrierOp::blockCond() {
  151. {
  152. py::gil_scoped_acquire gil_acquire;
  153. if (Py_IsInitialized() == 0) {
  154. return Status(StatusCode::kPythonInterpreterFailure, "Python Interpreter is finalized");
  155. }
  156. // we have condition name, however the flexibility is in python today
  157. try {
  158. // Invoke python function
  159. py::object ret_py_obj = condition_function_();
  160. // Process the return value
  161. if (!py::isinstance<py::bool_>(ret_py_obj)) {
  162. return Status(StatusCode::kPyFuncException, "Condition wait function should return true/false");
  163. }
  164. } catch (const py::error_already_set &e) {
  165. return Status(StatusCode::kPyFuncException, e.what());
  166. }
  167. }
  168. return Status::OK();
  169. }
  170. // fetches next Barrier buffer row
  171. Status BarrierOp::getNextTensorRow(TensorRow *new_row) {
  172. // iterate over all iterators and generate a row
  173. RETURN_IF_NOT_OK((child_iterator_)->FetchNextTensorRow(new_row));
  174. // add each new row to iterator, check if row is empty, if row from iterator is empty return empty row
  175. if (new_row->empty()) {
  176. // If we did not get a row from any of the children, then it's the end of an epoch and we can move
  177. // to drain state.
  178. MS_LOG(INFO) << "Barrier operator child iterator produced empty row.";
  179. clean_up_ = true;
  180. // If we picked up an eof here, then we are completely done.
  181. if ((child_iterator_)->eof_handled()) {
  182. MS_LOG(INFO) << "Barrier operator iterator got EOF.";
  183. eof_ = true;
  184. }
  185. return Status::OK();
  186. }
  187. return Status::OK();
  188. }
  189. // A function that prints info about the Operator
  190. void BarrierOp::Print(std::ostream &out, bool show_all) const {
  191. // Always show the id and name as first line regardless if this summary or detailed print
  192. out << "(" << std::setw(2) << operator_id_ << ") <BarrierOp>:";
  193. if (!show_all) {
  194. // Call the super class for displaying any common 1-liner info
  195. PipelineOp::Print(out, show_all);
  196. // Then show any custom derived-internal 1-liner info for this op
  197. out << "\n";
  198. } else {
  199. // Call the super class for displaying any common detailed info
  200. PipelineOp::Print(out, show_all);
  201. // Then show any custom derived-internal stuff
  202. out << "\nCondition: " << condition_name_ << "\n\n";
  203. }
  204. }
  205. // overwrite function and handle eof
  206. Status BarrierOp::EofReceived(int32_t) {
  207. MS_LOG(DEBUG) << "Barrier operator EOF received, do nothing now.";
  208. return Status::OK();
  209. }
  210. // overwrite function and handle eoe
  211. Status BarrierOp::EoeReceived(int32_t) {
  212. state_ = OpState::kDeOpIdle;
  213. return Status::OK();
  214. }
  215. } // namespace dataset
  216. } // namespace mindspore