You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zip_op.h 5.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef DATASET_ENGINE_DATASETOPS_ZIP_OP_H_
  17. #define DATASET_ENGINE_DATASETOPS_ZIP_OP_H_
  18. #include <memory>
  19. #include <queue>
  20. #include <string>
  21. #include <unordered_map>
  22. #include <vector>
  23. #include "dataset/core/tensor.h"
  24. #include "dataset/engine/dataset_iterator.h"
  25. #include "dataset/engine/datasetops/pipeline_op.h"
  26. #include "dataset/util/status.h"
  27. namespace mindspore {
  28. namespace dataset {
  29. // forward declare
  30. class DataBuffer;
  31. class ZipOp : public PipelineOp {
  32. public:
  33. // The nested builder class inside of the ZipOp is used to help manage all of
  34. // the arguments for constructing it. Use the builder by setting each argument
  35. // with the provided set methods, and then finally call the build method to execute
  36. // the actual construction.
  37. // NOTE: the rows per buffer with initial value 0 means to default to the number of rows from the first child
  38. class Builder {
  39. public:
  40. // Builder constructor. Creates the builder object.
  41. // @note No default args
  42. // @return This is a constructor.
  43. Builder();
  44. // Default destructor
  45. ~Builder() = default;
  46. // Setter method.
  47. // @return Builder setter method returns reference to the builder.
  48. Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
  49. builder_rows_per_buffer_ = rows_per_buffer;
  50. return *this;
  51. }
  52. // Setter method.
  53. // @return Builder setter method returns reference to the builder.
  54. Builder &SetOpConnectorSize(int32_t op_connector_size) {
  55. builder_op_connector_size_ = op_connector_size;
  56. return *this;
  57. }
  58. // The builder "build" method creates the ZipOp dataset Operator.
  59. // @return shared_ptr to the new ZipOp object
  60. Status Build(std::shared_ptr<ZipOp> *);
  61. private:
  62. int32_t builder_rows_per_buffer_;
  63. int32_t builder_op_connector_size_;
  64. Status SanityCheck() const;
  65. };
  66. // Constructor for ZipOp
  67. // @param rows_per_buffer - number of rows in output buffer
  68. // @param op_connector_size - connector size
  69. ZipOp(int32_t rows_per_buffer, int32_t op_connector_size);
  70. // Destructor
  71. ~ZipOp();
  72. Status EofReceived(int32_t) override;
  73. Status EoeReceived(int32_t) override;
  74. // Print function for Zip
  75. // @param out - output stream to print to
  76. // @param show_all - if it should print everything
  77. void Print(std::ostream &out, bool show_all) const override;
  78. // Provide stream operator for displaying it
  79. friend std::ostream &operator<<(std::ostream &out, const ZipOp &zo) {
  80. zo.Print(out, false);
  81. return out;
  82. }
  83. // Class functor operator () override.
  84. // All dataset ops operate by launching a thread (see ExecutionTree). This class functor will
  85. // provide the master loop that drives the logic for performing the work
  86. // @return Status - The error code return
  87. Status operator()() override;
  88. // Base-class override for NodePass visitor acceptor.
  89. // @param p - Pointer to the NodePass to be accepted.
  90. // @param modified - Whether this node visit modified the pipeline.
  91. // @return - Status of the node visit.
  92. Status Accept(NodePass *p, bool *modified) override;
  93. // Op name getter
  94. // @return Name of the current Op
  95. std::string Name() const override { return "ZipOp"; }
  96. private:
  97. // Handles preprocessing of the main loop, used when starting new epoch
  98. Status prepare(TensorQTable *const table);
  99. // This function calls takes a table repeatedly adds rows to it.
  100. // @param table a table of tensors to be moved into a buffer
  101. Status fillBuffer(TensorQTable *const table);
  102. // Special handle case where an empty row has been received from child iterator
  103. // @note - we need to drain eoe signals from all children connectors.
  104. // @details - when this function is called, then we encountered eoe at child iterator
  105. // we have to drain rows from other child iterators until we hit eoe from all other child iterators
  106. Status drainPipeline();
  107. // Merges 1 row from each childIterator together
  108. // @param new_zip_row - input and output, will be a non-empty row if all rows from childConnectors are non-empty
  109. // @param updateColumnMapping - generates a new column name to index mapping (mColNameIdMap) if set to true
  110. // @details merge rows from iterator together. This is the main functionality for ZipOp
  111. // this function takes one row and fills it with tensors from rows fetched
  112. // from childIterators.
  113. // @example:
  114. // Zips multiple rows at a time, the output is store in newZipRow
  115. // 1 a T
  116. // \ | /
  117. // 1, a, T
  118. Status getNextTensorRow(TensorRow *const new_zip_row);
  119. // Computing the assignment of the column name map.
  120. // @return - Status
  121. Status ComputeColMap() override;
  122. int32_t children_num_;
  123. int32_t rows_per_buffer_;
  124. int32_t buffer_id_;
  125. bool draining_;
  126. bool eof_;
  127. std::vector<std::unique_ptr<ChildIterator>> child_iterators_;
  128. };
  129. } // namespace dataset
  130. } // namespace mindspore
  131. #endif // DATASET_ENGINE_DATASETOPS_ZIP_OP_H_