You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rename_op.cc 6.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "dataset/engine/datasetops/rename_op.h"
  17. #include <vector>
  18. #include <utility>
  19. #include <unordered_map>
  20. #include "dataset/core/config_manager.h"
  21. #include "dataset/core/constants.h"
  22. #include "dataset/core/global_context.h"
  23. #include "dataset/engine/data_buffer.h"
  24. #include "dataset/engine/db_connector.h"
  25. #include "utils/log_adapter.h"
  26. namespace mindspore {
  27. namespace dataset {
  28. // builds
  29. RenameOp::Builder::Builder() {
  30. // Some arguments to the RenameOp constructor have a default argument that is taken
  31. // from the client config.
  32. // The user may choose to change these values for the construction of the RenameOp by
  33. // using the various builder set methods.
  34. std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  35. builder_op_connector_size_ = cfg->op_connector_size();
  36. }
  37. Status RenameOp::Builder::SanityCheck() const { return Status::OK(); }
  38. // build method for RenameOp
  39. Status RenameOp::Builder::Build(std::shared_ptr<RenameOp> *ptr) {
  40. RETURN_IF_NOT_OK(SanityCheck());
  41. *ptr = std::make_shared<RenameOp>(builder_in_columns_, builder_out_columns_, builder_op_connector_size_);
  42. return Status::OK();
  43. }
  44. // constructor
  45. RenameOp::RenameOp(const std::vector<std::string> &in_col_names, const std::vector<std::string> &out_col_names,
  46. int32_t op_connector_size)
  47. : PipelineOp(op_connector_size), in_columns_(in_col_names), out_columns_(out_col_names) {
  48. // check input & output sizes
  49. if (in_columns_.size() != out_columns_.size()) {
  50. MS_LOG(ERROR) << "Rename operator number of in columns != number of out columns.";
  51. }
  52. }
  53. // destructor
  54. RenameOp::~RenameOp() {}
  55. // main entry point for rename
  56. Status RenameOp::operator()() {
  57. TaskManager::FindMe()->Post();
  58. std::unique_ptr<DataBuffer> curr_buffer;
  59. RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
  60. if (curr_buffer->buffer_flags() != DataBuffer::kDeBFlagNone) {
  61. RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(curr_buffer)));
  62. std::string err_msg = "Rename first buffer got was control signal";
  63. // if 1st eoe or eof, pass it on then return
  64. RETURN_STATUS_UNEXPECTED(err_msg);
  65. }
  66. while (curr_buffer->eof() == false) {
  67. while (curr_buffer->eoe() == false) {
  68. // core rename functionality
  69. RETURN_IF_NOT_OK(RenameBuffer(&curr_buffer));
  70. // push the renamed input buffer
  71. MS_LOG(DEBUG) << "Rename operator pushing next buffer.";
  72. RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(curr_buffer)));
  73. RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
  74. } // end of while eoe loop
  75. // we got eoe, now try again until we get eof
  76. MS_LOG(INFO) << "Rename operator EOE Received.";
  77. RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
  78. MS_LOG(DEBUG) << "Rename operator fetching buffer after EOE.";
  79. RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
  80. } // end of while eof loop
  81. MS_LOG(INFO) << "Rename opeerator EOF Received.";
  82. RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
  83. return Status::OK();
  84. }
  85. // renames buffer
  86. Status RenameOp::RenameBuffer(std::unique_ptr<DataBuffer> *input_buffer) {
  87. // iterate over my index in input vector, find the corresponding position
  88. const std::unordered_map<std::string, int32_t> col_name_id_map = (*input_buffer)->column_name_map();
  89. std::unordered_map<std::string, int32_t> new_col_name_id_map = {};
  90. // parameter for input check
  91. size_t found = 0;
  92. // iterate over all the pairs and if there is a name match with rename, rename the column and add it to new map
  93. // by doing it this way we recreate a new ColNameIdMap and allow for switching
  94. for (const auto &pair : col_name_id_map) {
  95. std::string name = pair.first;
  96. int32_t id = pair.second;
  97. // find name
  98. std::vector<std::string>::iterator it;
  99. it = std::find(in_columns_.begin(), in_columns_.end(), name);
  100. // for c input checks here we have to count the number of times we find the stuff in in_columns_
  101. // because we iterate over the mInputList n times
  102. if (it != in_columns_.end()) {
  103. // found
  104. found += 1;
  105. int index = std::distance(in_columns_.begin(), it);
  106. MS_LOG(INFO) << "Rename operator index found " << index << " value " << id << ".";
  107. new_col_name_id_map[out_columns_[index]] = id;
  108. } else {
  109. // not found
  110. MS_LOG(INFO) << "Rename operator index not found: " << id << " is the column id.";
  111. new_col_name_id_map[name] = id;
  112. }
  113. }
  114. // only checks number of renamed columns have been found, this input check doesn't check everything
  115. if (found != in_columns_.size()) {
  116. MS_LOG(INFO) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << ".";
  117. std::string err_msg = "Renamed column doesn't exist in dataset";
  118. RETURN_STATUS_UNEXPECTED(err_msg);
  119. }
  120. (*input_buffer)->set_column_name_map(new_col_name_id_map);
  121. return Status::OK();
  122. }
  123. // prints rename
  124. void RenameOp::Print(std::ostream &out, // In: The output stream to print to
  125. bool show_all) const { // In: T/F if it should print everything
  126. // Call base class printer first
  127. PipelineOp::Print(out, show_all);
  128. out << "\nRenameOp:\n";
  129. for (size_t i = 0; i < in_columns_.size(); ++i) {
  130. out << "\nin Columns: " << in_columns_[i] << "\nOut Columns: " << out_columns_[i] << "\n\n";
  131. }
  132. }
  133. Status RenameOp::EofReceived(int32_t) {
  134. MS_LOG(INFO) << "Rename operator EOF received, do nothing now.";
  135. return Status::OK();
  136. }
  137. Status RenameOp::EoeReceived(int32_t) {
  138. state_ = OpState::kDeOpIdle;
  139. return Status::OK();
  140. }
  141. } // namespace dataset
  142. } // namespace mindspore