You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

project_op.h 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef DATASET_ENGINE_DATASETOPS_PROJECT_OP_H_
  17. #define DATASET_ENGINE_DATASETOPS_PROJECT_OP_H_
  18. #include <memory>
  19. #include <string>
  20. #include <vector>
  21. #include "dataset/engine/datasetops/pipeline_op.h"
  22. namespace mindspore {
  23. namespace dataset {
  24. class ProjectOp : public PipelineOp {
  25. public:
  26. // The nested builder class inside of the ProjectOp is used to help manage all of the arguments
  27. // for constructing it. This repeat op is very simple though, so this builder is really just
  28. // provided for a consistent look and feel for creators of Dataset operators overall.
  29. class Builder {
  30. public:
  31. // Builder constructor. Creates the builder object.
  32. // @param columns_to_project -
  33. // @return This is a constructor.
  34. explicit Builder(const std::vector<std::string> &columns_to_project);
  35. // Builder destructor.
  36. ~Builder() = default;
  37. // The builder "build" method creates the final object.
  38. // @return shared_ptr to the new StorageOp object.
  39. Status Build(std::shared_ptr<ProjectOp> *);
  40. private:
  41. std::vector<std::string> builder_columns_to_project_;
  42. Status SanityCheck() const;
  43. };
  44. // Constructor of the ProjectOp.
  45. // @param columnsToProject -
  46. explicit ProjectOp(const std::vector<std::string> &columns_to_project);
  47. // Destructor.
  48. ~ProjectOp() = default;
  49. // A print method typically used for debugging.
  50. // @param out - The output stream to write output to.
  51. // @param show_all - A bool to control if you want to show all info or just a summary.
  52. void Print(std::ostream &out, bool show_all) const override;
  53. // << Stream output operator overload.
  54. // @notes This allows you to write the debug print info using stream operators.
  55. // @param out - reference to the output stream being overloaded.
  56. // @param project_op - reference to the ProjectOp to display.
  57. // @return - the output stream must be returned.
  58. friend std::ostream &operator<<(std::ostream &out, const ProjectOp &project_op) {
  59. project_op.Print(out, false);
  60. return out;
  61. }
  62. // Class functor operator () override.
  63. // Most dataset ops operate by launching a thread (see ExecutionTree).
  64. // However, the ProjectOp is defined as a inlined operator, so it is invalid to launch the
  65. // functor since this op runs inlined inside another operator. The function is overloaded to
  66. // ensure that it is not called by mistake (it will generate an error).
  67. // @return Status - The error code returned.
  68. Status operator()() override;
  69. // Gets a buffer from the child node and projects that buffer. The caller is typically our parent node.
  70. // @param p_buffer - output pointer to the projected buffer.
  71. // @param worker_id - The worker id
  72. Status GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) override;
  73. // Base-class override. Return the number of workers in the first parent.
  74. // @param workerId - The worker id
  75. int32_t num_consumers() const override;
  76. // Base-class override. Return the number of producers in the first child.
  77. // @param workerId - The worker id
  78. int32_t num_producers() const override;
  79. // Base-class override for special eoe handler.
  80. // Inline operators must override this because there is no connector to push eoe onto.
  81. // @return Status - The error code returned.
  82. Status EoeReceived(int32_t worker_id) override;
  83. // Base-class override for special eof handler.
  84. // Inline operators must override this because there is no connector to push eof onto.
  85. // @return Status - The error code returned.
  86. Status EofReceived(int32_t worker_id) override;
  87. private:
  88. std::vector<std::string> columns_to_project_;
  89. Status Project(std::unique_ptr<DataBuffer> *data_buffer);
  90. };
  91. } // namespace dataset
  92. } // namespace mindspore
  93. #endif // DATASET_ENGINE_DATASETOPS_PROJECT_OP_H_