From: @hfarahat Reviewed-by: Signed-off-by:pull/13290/MERGE
| @@ -14,6 +14,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||||
| clue_op.cc | clue_op.cc | ||||
| csv_op.cc | csv_op.cc | ||||
| album_op.cc | album_op.cc | ||||
| mappable_leaf_op.cc | |||||
| ) | ) | ||||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | ||||
| @@ -72,17 +72,15 @@ Status AlbumOp::Builder::SanityCheck() { | |||||
| AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode, | AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode, | ||||
| const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema, | const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema, | ||||
| std::shared_ptr<SamplerRT> sampler) | std::shared_ptr<SamplerRT> sampler) | ||||
| : ParallelOp(num_wkrs, queue_size, std::move(sampler)), | |||||
| rows_per_buffer_(rows_per_buffer), | |||||
| : MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer), | |||||
| folder_path_(file_dir), | folder_path_(file_dir), | ||||
| decode_(do_decode), | decode_(do_decode), | ||||
| extensions_(exts), | extensions_(exts), | ||||
| data_schema_(std::move(data_schema)), | data_schema_(std::move(data_schema)), | ||||
| row_cnt_(0), | |||||
| buf_cnt_(0), | |||||
| sampler_ind_(0), | sampler_ind_(0), | ||||
| dirname_offset_(0), | dirname_offset_(0), | ||||
| sample_ids_(nullptr) { | |||||
| sample_ids_(nullptr), | |||||
| curr_row_(0) { | |||||
| // Set the column name map (base class field) | // Set the column name map (base class field) | ||||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | ||||
| column_name_id_map_[data_schema_->column(i).name()] = i; | column_name_id_map_[data_schema_->column(i).name()] = i; | ||||
| @@ -131,97 +129,6 @@ Status AlbumOp::PrescanEntry() { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work | |||||
| Status AlbumOp::operator()() { | |||||
| RETURN_IF_NOT_OK(this->PrescanEntry()); | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| while (true) { // each iterator is 1 epoch | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| while (sampler_buffer->eoe() == false) { | |||||
| TensorRow sample_row; | |||||
| RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row)); | |||||
| TensorPtr sample_ids = sample_row[0]; | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||||
| if ((*itr) >= num_rows_) continue; // index out of bound, skipping | |||||
| keys.push_back(*itr); | |||||
| row_cnt_++; | |||||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[buf_cnt_++ % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||||
| keys.clear(); | |||||
| } | |||||
| } | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| if (keys.empty() == false) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||||
| std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); | |||||
| for (int32_t i = 0; i < num_workers_; ++i) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { // not the last repeat. | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| // contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ | |||||
| // IMPORTANT: 1 IOBlock produces 1 DataBuffer | |||||
| Status AlbumOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> io_block; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| while (io_block != nullptr) { | |||||
| if (io_block->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (io_block->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (io_block->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||||
| if (keys.empty() == true) return Status::OK(); // empty key is a quit signal for workers | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| } | |||||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); | |||||
| } | |||||
| // Only support JPEG/PNG/GIF/BMP | // Only support JPEG/PNG/GIF/BMP | ||||
| // Optimization: Could take in a tensor | // Optimization: Could take in a tensor | ||||
| // This function does not return status because we want to just skip bad input, not crash | // This function does not return status because we want to just skip bad input, not crash | ||||
| @@ -443,7 +350,8 @@ Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, | |||||
| // to take a reference to a column descriptor? | // to take a reference to a column descriptor? | ||||
| // the design of this class is to make the code more readable, forgoing minor performance gain like | // the design of this class is to make the code more readable, forgoing minor performance gain like | ||||
| // getting rid of duplicated checks | // getting rid of duplicated checks | ||||
| Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, TensorRow *row) { | |||||
| Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { | |||||
| std::string file = image_rows_[row_id]; | |||||
| // testing here is to just print out file path | // testing here is to just print out file path | ||||
| (*row) = TensorRow(row_id, {}); | (*row) = TensorRow(row_id, {}); | ||||
| MS_LOG(INFO) << "Image row file: " << file << "."; | MS_LOG(INFO) << "Image row file: " << file << "."; | ||||
| @@ -531,19 +439,6 @@ Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann: | |||||
| } | } | ||||
| } | } | ||||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||||
| Status AlbumOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| TensorRow trow; | |||||
| for (const int64_t &key : keys) { | |||||
| RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_rows_[key], &trow)); | |||||
| deq->push_back(std::move(trow)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| void AlbumOp::Print(std::ostream &out, bool show_all) const { | void AlbumOp::Print(std::ostream &out, bool show_all) const { | ||||
| // Always show the id and name as first line regardless if this summary or detailed print | // Always show the id and name as first line regardless if this summary or detailed print | ||||
| out << "(" << std::setw(2) << operator_id_ << ") <AlbumOp>:"; | out << "(" << std::setw(2) << operator_id_ << ") <AlbumOp>:"; | ||||
| @@ -561,24 +456,12 @@ void AlbumOp::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| // Reset Sampler and wakeup Master thread (functor) | |||||
| Status AlbumOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| row_cnt_ = 0; | |||||
| return Status::OK(); | |||||
| } | |||||
| // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows | |||||
| Status AlbumOp::InitSampler() { | |||||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status AlbumOp::LaunchThreadsAndInitOp() { | Status AlbumOp::LaunchThreadsAndInitOp() { | ||||
| if (tree_ == nullptr) { | if (tree_ == nullptr) { | ||||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); | return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); | ||||
| } | } | ||||
| RETURN_IF_NOT_OK(this->PrescanEntry()); | |||||
| // registers QueueList and individual Queues for interrupt services | // registers QueueList and individual Queues for interrupt services | ||||
| RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); | RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); | ||||
| RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks())); | RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks())); | ||||
| @@ -612,13 +495,13 @@ Status AlbumOp::GetNextRow(TensorRow *row) { | |||||
| RETURN_IF_NOT_OK(sample_buffer->PopRow(&sample_row)); | RETURN_IF_NOT_OK(sample_buffer->PopRow(&sample_row)); | ||||
| sample_ids_ = sample_row[0]; | sample_ids_ = sample_row[0]; | ||||
| } | } | ||||
| if (row_cnt_ + 1 > sample_ids_->Size()) { | |||||
| if (curr_row_ + 1 > sample_ids_->Size()) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| int64_t key; | int64_t key; | ||||
| sample_ids_->GetItemAt(&key, {row_cnt_}); | |||||
| RETURN_IF_NOT_OK(LoadTensorRow(key, image_rows_[key], row)); | |||||
| row_cnt_++; | |||||
| RETURN_IF_NOT_OK(sample_ids_->GetItemAt(&key, {curr_row_})); | |||||
| RETURN_IF_NOT_OK(LoadTensorRow(key, row)); | |||||
| curr_row_++; | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| } // namespace dataset | } // namespace dataset | ||||
| @@ -30,6 +30,7 @@ | |||||
| #include "minddata/dataset/engine/data_buffer.h" | #include "minddata/dataset/engine/data_buffer.h" | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | #include "minddata/dataset/engine/datasetops/parallel_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #include "minddata/dataset/util/path.h" | #include "minddata/dataset/util/path.h" | ||||
| #include "minddata/dataset/util/queue.h" | #include "minddata/dataset/util/queue.h" | ||||
| @@ -47,7 +48,7 @@ class Queue; | |||||
| using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>; | using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>; | ||||
| /// \class AlbumOp album_op.h | /// \class AlbumOp album_op.h | ||||
| class AlbumOp : public ParallelOp, public RandomAccessOp { | |||||
| class AlbumOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| class Builder { | class Builder { | ||||
| public: | public: | ||||
| @@ -171,17 +172,6 @@ class AlbumOp : public ParallelOp, public RandomAccessOp { | |||||
| /// \return Status The status code returned | /// \return Status The status code returned | ||||
| Status PrescanEntry(); | Status PrescanEntry(); | ||||
| /// \brief Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||||
| /// \param[in] int32_t workerId - id of each worker | |||||
| /// \return Status The status code returned | |||||
| Status WorkerEntry(int32_t worker_id) override; | |||||
| /// \brief Main Loop of AlbumOp | |||||
| /// Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| /// Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector | |||||
| /// \return Status The status code returned | |||||
| Status operator()() override; | |||||
| /// \brief A print method typically used for debugging | /// \brief A print method typically used for debugging | ||||
| /// \param[in] out | /// \param[in] out | ||||
| /// \param[in] show_all | /// \param[in] show_all | ||||
| @@ -197,10 +187,6 @@ class AlbumOp : public ParallelOp, public RandomAccessOp { | |||||
| std::string Name() const override { return "AlbumOp"; } | std::string Name() const override { return "AlbumOp"; } | ||||
| private: | private: | ||||
| /// \brief Initialize Sampler, calls sampler->Init() within | |||||
| /// \return Status The status code returned | |||||
| Status InitSampler(); | |||||
| /// \brief Load image to tensor row | /// \brief Load image to tensor row | ||||
| /// \param[in] image_file Image name of file | /// \param[in] image_file Image name of file | ||||
| /// \param[in] col_num Column num in schema | /// \param[in] col_num Column num in schema | ||||
| @@ -265,10 +251,9 @@ class AlbumOp : public ParallelOp, public RandomAccessOp { | |||||
| /// \brief Load a tensor row according to a json file | /// \brief Load a tensor row according to a json file | ||||
| /// \param[in] row_id_type row_id - id for this tensor row | /// \param[in] row_id_type row_id - id for this tensor row | ||||
| /// \param[in] ImageColumns file Json file location | |||||
| /// \param[in, out] TensorRow row Json content stored into a tensor row | /// \param[in, out] TensorRow row Json content stored into a tensor row | ||||
| /// \return Status The status code returned | /// \return Status The status code returned | ||||
| Status LoadTensorRow(row_id_type row_id, const std::string &file, TensorRow *row); | |||||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; | |||||
| /// \brief Load a tensor column according to a json file | /// \brief Load a tensor column according to a json file | ||||
| /// \param[in] ImageColumns file Json file location | /// \param[in] ImageColumns file Json file location | ||||
| @@ -278,23 +263,14 @@ class AlbumOp : public ParallelOp, public RandomAccessOp { | |||||
| /// \return Status The status code returned | /// \return Status The status code returned | ||||
| Status loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row); | Status loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row); | ||||
| /// \param[in] const std::vector<int64_t> &keys Keys in ioblock | |||||
| /// \param[in, out] std::unique_ptr<DataBuffer> db Databuffer to push to | |||||
| /// \return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| /// \brief Called first when function is called | /// \brief Called first when function is called | ||||
| /// \return Status The status code returned | /// \return Status The status code returned | ||||
| Status LaunchThreadsAndInitOp(); | |||||
| /// \brief reset Op | |||||
| /// \return Status The status code returned | |||||
| Status Reset() override; | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| Status GetNextRow(TensorRow *row) override; | Status GetNextRow(TensorRow *row) override; | ||||
| // Private function for computing the assignment of the column name map. | |||||
| // @return Status The status code returned | |||||
| /// Private function for computing the assignment of the column name map. | |||||
| /// \return Status The status code returned | |||||
| Status ComputeColMap() override; | Status ComputeColMap() override; | ||||
| int32_t rows_per_buffer_; | int32_t rows_per_buffer_; | ||||
| @@ -303,12 +279,12 @@ class AlbumOp : public ParallelOp, public RandomAccessOp { | |||||
| std::set<std::string> extensions_; // extensions allowed | std::set<std::string> extensions_; // extensions allowed | ||||
| std::unordered_map<std::string, int32_t> col_name_map_; | std::unordered_map<std::string, int32_t> col_name_map_; | ||||
| std::unique_ptr<DataSchema> data_schema_; | std::unique_ptr<DataSchema> data_schema_; | ||||
| int64_t row_cnt_; | |||||
| int64_t buf_cnt_; | |||||
| int64_t sampler_ind_; | int64_t sampler_ind_; | ||||
| int64_t dirname_offset_; | int64_t dirname_offset_; | ||||
| std::vector<std::string> image_rows_; | std::vector<std::string> image_rows_; | ||||
| TensorPtr sample_ids_; | TensorPtr sample_ids_; | ||||
| int32_t curr_row_; | |||||
| }; | }; | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -79,8 +79,7 @@ Status CelebAOp::Builder::SanityCheck() { | |||||
| CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, | CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, | ||||
| bool decode, const std::string &usage, const std::set<std::string> &exts, | bool decode, const std::string &usage, const std::set<std::string> &exts, | ||||
| std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler) | std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler) | ||||
| : ParallelOp(num_workers, queue_size, std::move(sampler)), | |||||
| rows_per_buffer_(rows_per_buffer), | |||||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), | |||||
| folder_path_(dir), | folder_path_(dir), | ||||
| decode_(decode), | decode_(decode), | ||||
| extensions_(exts), | extensions_(exts), | ||||
| @@ -269,121 +268,8 @@ std::vector<std::string> CelebAOp::Split(const std::string &line) { | |||||
| return split; | return split; | ||||
| } | } | ||||
| // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work | |||||
| Status CelebAOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> data_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&data_buffer)); | |||||
| RETURN_IF_NOT_OK(AddIOBlock(&data_buffer)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status CelebAOp::AddIOBlock(std::unique_ptr<DataBuffer> *data_buffer) { | |||||
| int64_t buff_count = 0; | |||||
| while (true) { | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| int64_t row_count = 0; | |||||
| while (!(*data_buffer)->eoe()) { | |||||
| TensorRow sample_row; | |||||
| RETURN_IF_NOT_OK((*data_buffer)->PopRow(&sample_row)); | |||||
| std::shared_ptr<Tensor> sample_ids = sample_row[0]; | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||||
| if ((*itr) >= num_rows_) { | |||||
| MS_LOG(WARNING) << "Sample Id (" << *itr << ") is out of bounds, skipping. Max id is " << num_rows_ << "."; | |||||
| continue; | |||||
| } | |||||
| keys.push_back(*itr); | |||||
| row_count++; | |||||
| if (row_count % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[buff_count++ % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| keys.clear(); | |||||
| } | |||||
| } | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(data_buffer)); | |||||
| } | |||||
| if (!keys.empty()) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buff_count++) % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||||
| for (int32_t i = 0; i < num_workers_; i++) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { // not the last repeat. | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(data_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| Status CelebAOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> io_block; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| while (io_block != nullptr) { | |||||
| if (io_block->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (io_block->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (io_block->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||||
| if (keys.empty()) { | |||||
| return Status::OK(); // empty key is a quit signal for workers | |||||
| } | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| } | |||||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Unexpected nullptr received in worker."); | |||||
| } | |||||
| Status CelebAOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| for (const auto &key : keys) { | |||||
| TensorRow row; | |||||
| RETURN_IF_NOT_OK(LoadTensorRow(key, image_labels_vec_[key], &row)); | |||||
| deq->push_back(std::move(row)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string, std::vector<int32_t>> &image_label, | |||||
| TensorRow *row) { | |||||
| Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { | |||||
| std::pair<std::string, std::vector<int32_t>> &image_label = image_labels_vec_[row_id]; | |||||
| std::shared_ptr<Tensor> image; | std::shared_ptr<Tensor> image; | ||||
| std::shared_ptr<Tensor> label; | std::shared_ptr<Tensor> label; | ||||
| @@ -432,13 +318,6 @@ void CelebAOp::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| // Reset Sampler and wakeup Master thread (functor) | |||||
| Status CelebAOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status CelebAOp::ComputeColMap() { | Status CelebAOp::ComputeColMap() { | ||||
| // Set the column name map (base class field) | // Set the column name map (base class field) | ||||
| if (column_name_id_map_.empty()) { | if (column_name_id_map_.empty()) { | ||||
| @@ -27,6 +27,7 @@ | |||||
| #include "minddata/dataset/util/status.h" | #include "minddata/dataset/util/status.h" | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | #include "minddata/dataset/engine/datasetops/parallel_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #include "minddata/dataset/util/queue.h" | #include "minddata/dataset/util/queue.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/io_block.h" | #include "minddata/dataset/engine/datasetops/source/io_block.h" | ||||
| @@ -41,7 +42,7 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| class CelebAOp : public ParallelOp, RandomAccessOp { | |||||
| class CelebAOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| class Builder { | class Builder { | ||||
| public: | public: | ||||
| @@ -148,27 +149,11 @@ class CelebAOp : public ParallelOp, RandomAccessOp { | |||||
| ~CelebAOp() override = default; | ~CelebAOp() override = default; | ||||
| // Main Loop of CelebAOp | |||||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||||
| // @param int32_t worker_id - id of each worker | |||||
| // @return Status The status code returned | |||||
| Status WorkerEntry(int32_t worker_id) override; | |||||
| // A print method typically used for debugging | // A print method typically used for debugging | ||||
| // @param out | // @param out | ||||
| // @param show_all | // @param show_all | ||||
| void Print(std::ostream &out, bool show_all) const override; | void Print(std::ostream &out, bool show_all) const override; | ||||
| // Method in operator(), to fill IOBlockQueue | |||||
| // @param std::unique_ptr<DataBuffer> sampler_buffer - to fill IOBlockQueue | |||||
| // @return Status The status code returned | |||||
| Status AddIOBlock(std::unique_ptr<DataBuffer> *data_buffer); | |||||
| // Op name getter | // Op name getter | ||||
| // @return Name of the current Op | // @return Name of the current Op | ||||
| std::string Name() const override { return "CelebAOp"; } | std::string Name() const override { return "CelebAOp"; } | ||||
| @@ -176,7 +161,7 @@ class CelebAOp : public ParallelOp, RandomAccessOp { | |||||
| private: | private: | ||||
| // Called first when function is called | // Called first when function is called | ||||
| // @return | // @return | ||||
| Status LaunchThreadsAndInitOp(); | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| // Parse attribute file | // Parse attribute file | ||||
| // @return | // @return | ||||
| @@ -191,32 +176,21 @@ class CelebAOp : public ParallelOp, RandomAccessOp { | |||||
| // @return std::vector<std::string> - string after split | // @return std::vector<std::string> - string after split | ||||
| std::vector<std::string> Split(const std::string &line); | std::vector<std::string> Split(const std::string &line); | ||||
| // @param const std::vector<int64_t> &keys - keys in ioblock | |||||
| // @param std::unique_ptr<DataBuffer> db | |||||
| // @return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| // Load a tensor row according to a pair | // Load a tensor row according to a pair | ||||
| // @param row_id_type row_id - id for this tensor row | // @param row_id_type row_id - id for this tensor row | ||||
| // @param std::pair - <image_file,<label>> | // @param std::pair - <image_file,<label>> | ||||
| // @param TensorRow row - image & label read into this tensor row | // @param TensorRow row - image & label read into this tensor row | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LoadTensorRow(row_id_type row_id, const std::pair<std::string, std::vector<int32_t>> &image_label, | |||||
| TensorRow *row); | |||||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; | |||||
| // Check if need read according to dataset type | // Check if need read according to dataset type | ||||
| // @return bool - if need read | // @return bool - if need read | ||||
| bool CheckDatasetTypeValid(); | bool CheckDatasetTypeValid(); | ||||
| // reset Op | |||||
| // @return Status The status code returned | |||||
| Status Reset() override; | |||||
| // Private function for computing the assignment of the column name map. | // Private function for computing the assignment of the column name map. | ||||
| // @return - Status | // @return - Status | ||||
| Status ComputeColMap() override; | Status ComputeColMap() override; | ||||
| int32_t rows_per_buffer_; | |||||
| std::string folder_path_; // directory of celeba folder | std::string folder_path_; // directory of celeba folder | ||||
| bool decode_; | bool decode_; | ||||
| std::set<std::string> extensions_; // extensions allowed | std::set<std::string> extensions_; // extensions allowed | ||||
| @@ -88,76 +88,16 @@ Status CifarOp::Builder::SanityCheck() { | |||||
| CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf, | CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf, | ||||
| const std::string &file_dir, int32_t queue_size, std::unique_ptr<DataSchema> data_schema, | const std::string &file_dir, int32_t queue_size, std::unique_ptr<DataSchema> data_schema, | ||||
| std::shared_ptr<SamplerRT> sampler) | std::shared_ptr<SamplerRT> sampler) | ||||
| : ParallelOp(num_works, queue_size, std::move(sampler)), | |||||
| : MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buf), | |||||
| cifar_type_(type), | cifar_type_(type), | ||||
| usage_(usage), | usage_(usage), | ||||
| rows_per_buffer_(rows_per_buf), | |||||
| folder_path_(file_dir), | folder_path_(file_dir), | ||||
| data_schema_(std::move(data_schema)), | |||||
| row_cnt_(0), | |||||
| buf_cnt_(0) { | |||||
| data_schema_(std::move(data_schema)) { | |||||
| constexpr uint64_t kUtilQueueSize = 512; | constexpr uint64_t kUtilQueueSize = 512; | ||||
| cifar_raw_data_block_ = std::make_unique<Queue<std::vector<unsigned char>>>(kUtilQueueSize); | cifar_raw_data_block_ = std::make_unique<Queue<std::vector<unsigned char>>>(kUtilQueueSize); | ||||
| io_block_queues_.Init(num_workers_, queue_size); | io_block_queues_.Init(num_workers_, queue_size); | ||||
| } | } | ||||
| // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work | |||||
| Status CifarOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| while (true) { // each iterator is 1 epoch | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| while (sampler_buffer->eoe() == false) { | |||||
| TensorRow sample_row; | |||||
| RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row)); | |||||
| std::shared_ptr<Tensor> sample_ids = sample_row[0]; | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); itr++) { | |||||
| keys.push_back(*itr); | |||||
| row_cnt_++; | |||||
| if ((*itr) >= num_rows_) continue; // index out of bound, skipping | |||||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| keys.clear(); | |||||
| } | |||||
| } | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| if (keys.empty() == false) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||||
| for (int32_t i = 0; i < num_workers_; i++) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { // not the last repeat. | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| Status CifarOp::LaunchThreadsAndInitOp() { | Status CifarOp::LaunchThreadsAndInitOp() { | ||||
| if (tree_ == nullptr) { | if (tree_ == nullptr) { | ||||
| RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | ||||
| @@ -175,43 +115,8 @@ Status CifarOp::LaunchThreadsAndInitOp() { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ | |||||
| // IMPORTANT: 1 IOBlock produces 1 DataBuffer | |||||
| Status CifarOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> io_block; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| while (io_block != nullptr) { | |||||
| if (io_block->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (io_block->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (io_block->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||||
| if (keys.empty() == true) { | |||||
| return Status::OK(); // empty key is a quit signal for workers | |||||
| } | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| } | |||||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); | |||||
| } | |||||
| // Load 1 TensorRow (image,label). 1 function call produces 1 TensorTow in a DataBuffer | // Load 1 TensorRow (image,label). 1 function call produces 1 TensorTow in a DataBuffer | ||||
| Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) { | |||||
| Status CifarOp::LoadTensorRow(row_id_type index, TensorRow *trow) { | |||||
| std::shared_ptr<Tensor> label; | std::shared_ptr<Tensor> label; | ||||
| std::shared_ptr<Tensor> fine_label; | std::shared_ptr<Tensor> fine_label; | ||||
| std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first; | std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first; | ||||
| @@ -234,18 +139,6 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||||
| Status CifarOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| for (const int64_t &key : keys) { | |||||
| TensorRow trow; | |||||
| RETURN_IF_NOT_OK(LoadTensorRow(key, &trow)); | |||||
| deq->push_back(std::move(trow)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| void CifarOp::Print(std::ostream &out, bool show_all) const { | void CifarOp::Print(std::ostream &out, bool show_all) const { | ||||
| if (!show_all) { | if (!show_all) { | ||||
| // Call the super class for displaying any common 1-liner info | // Call the super class for displaying any common 1-liner info | ||||
| @@ -260,20 +153,6 @@ void CifarOp::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| // Reset Sampler and wakeup Master thread (functor) | |||||
| Status CifarOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| row_cnt_ = 0; | |||||
| return Status::OK(); | |||||
| } | |||||
| // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows | |||||
| Status CifarOp::InitSampler() { | |||||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status CifarOp::ReadCifarBlockDataAsync() { | Status CifarOp::ReadCifarBlockDataAsync() { | ||||
| TaskManager::FindMe()->Post(); | TaskManager::FindMe()->Post(); | ||||
| RETURN_IF_NOT_OK(GetCifarFiles()); | RETURN_IF_NOT_OK(GetCifarFiles()); | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "minddata/dataset/engine/data_buffer.h" | #include "minddata/dataset/engine/data_buffer.h" | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | #include "minddata/dataset/engine/datasetops/parallel_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #include "minddata/dataset/util/path.h" | #include "minddata/dataset/util/path.h" | ||||
| #include "minddata/dataset/util/queue.h" | #include "minddata/dataset/util/queue.h" | ||||
| @@ -35,7 +36,7 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| class CifarOp : public ParallelOp, public RandomAccessOp { | |||||
| class CifarOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| enum CifarType { kCifar10, kCifar100 }; | enum CifarType { kCifar10, kCifar100 }; | ||||
| @@ -142,17 +143,6 @@ class CifarOp : public ParallelOp, public RandomAccessOp { | |||||
| // Destructor. | // Destructor. | ||||
| ~CifarOp() = default; | ~CifarOp() = default; | ||||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||||
| // @param uint32_t workerId - id of each worker | |||||
| // @return Status The status code returned | |||||
| Status WorkerEntry(int32_t worker_id) override; | |||||
| // Main Loop of CifarOp | |||||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // A print method typically used for debugging | // A print method typically used for debugging | ||||
| // @param out | // @param out | ||||
| // @param show_all | // @param show_all | ||||
| @@ -170,32 +160,20 @@ class CifarOp : public ParallelOp, public RandomAccessOp { | |||||
| std::string Name() const override { return "CifarOp"; } | std::string Name() const override { return "CifarOp"; } | ||||
| private: | private: | ||||
| // Initialize Sampler, calls sampler->Init() within | |||||
| // @return Status The status code returned | |||||
| Status InitSampler(); | |||||
| // Load a tensor row according to a pair | // Load a tensor row according to a pair | ||||
| // @param uint64_t index - index need to load | // @param uint64_t index - index need to load | ||||
| // @param TensorRow row - image & label read into this tensor row | // @param TensorRow row - image & label read into this tensor row | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LoadTensorRow(uint64_t index, TensorRow *row); | |||||
| // @param const std::vector<uint64_t> &keys - keys in ioblock | |||||
| // @param std::unique_ptr<DataBuffer> db | |||||
| // @return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| Status LoadTensorRow(row_id_type index, TensorRow *trow) override; | |||||
| private: | |||||
| // Read block data from cifar file | // Read block data from cifar file | ||||
| // @return | // @return | ||||
| Status ReadCifarBlockDataAsync(); | Status ReadCifarBlockDataAsync(); | ||||
| // Called first when function is called | // Called first when function is called | ||||
| // @return | // @return | ||||
| Status LaunchThreadsAndInitOp(); | |||||
| // reset Op | |||||
| // @return Status The status code returned | |||||
| Status Reset() override; | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| // Get cifar files in dir | // Get cifar files in dir | ||||
| // @return | // @return | ||||
| @@ -223,12 +201,9 @@ class CifarOp : public ParallelOp, public RandomAccessOp { | |||||
| Status ComputeColMap() override; | Status ComputeColMap() override; | ||||
| CifarType cifar_type_; | CifarType cifar_type_; | ||||
| int32_t rows_per_buffer_; | |||||
| std::string folder_path_; | std::string folder_path_; | ||||
| std::unique_ptr<DataSchema> data_schema_; | std::unique_ptr<DataSchema> data_schema_; | ||||
| int64_t row_cnt_; | |||||
| int64_t buf_cnt_; | |||||
| const std::string usage_; // can only be either "train" or "test" | const std::string usage_; // can only be either "train" or "test" | ||||
| std::unique_ptr<Queue<std::vector<unsigned char>>> cifar_raw_data_block_; | std::unique_ptr<Queue<std::vector<unsigned char>>> cifar_raw_data_block_; | ||||
| std::vector<std::string> cifar_files_; | std::vector<std::string> cifar_files_; | ||||
| @@ -124,82 +124,15 @@ Status CocoOp::Builder::SanityCheck() { | |||||
| CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, | CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, | ||||
| int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, | int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, | ||||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | ||||
| : ParallelOp(num_workers, queue_size, std::move(sampler)), | |||||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), | |||||
| decode_(decode), | decode_(decode), | ||||
| row_cnt_(0), | |||||
| buf_cnt_(0), | |||||
| task_type_(task_type), | task_type_(task_type), | ||||
| image_folder_path_(image_folder_path), | image_folder_path_(image_folder_path), | ||||
| annotation_path_(annotation_path), | annotation_path_(annotation_path), | ||||
| rows_per_buffer_(rows_per_buffer), | |||||
| data_schema_(std::move(data_schema)) { | data_schema_(std::move(data_schema)) { | ||||
| io_block_queues_.Init(num_workers_, queue_size); | io_block_queues_.Init(num_workers_, queue_size); | ||||
| } | } | ||||
| Status CocoOp::TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys) { | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||||
| if ((*itr) > num_rows_) continue; | |||||
| keys->push_back(*itr); | |||||
| row_cnt_++; | |||||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); | |||||
| keys->clear(); | |||||
| } | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| Status CocoOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| while (true) { | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| while (sampler_buffer->eoe() == false) { | |||||
| std::shared_ptr<Tensor> sample_ids; | |||||
| RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0)); | |||||
| if (sample_ids->type() != DataType(DataType::DE_INT64)) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, data type of Sampler Tensor isn't int64, got " + | |||||
| sample_ids->type().ToString()); | |||||
| } | |||||
| RETURN_IF_NOT_OK(TraverseSampleIds(sample_ids, &keys)); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| if (keys.empty() == false) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||||
| std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); | |||||
| for (int32_t i = 0; i < num_workers_; i++) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| void CocoOp::Print(std::ostream &out, bool show_all) const { | void CocoOp::Print(std::ostream &out, bool show_all) const { | ||||
| if (!show_all) { | if (!show_all) { | ||||
| // Call the super class for displaying any common 1-liner info | // Call the super class for displaying any common 1-liner info | ||||
| @@ -215,14 +148,8 @@ void CocoOp::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| Status CocoOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| row_cnt_ = 0; | |||||
| return Status::OK(); | |||||
| } | |||||
| Status CocoOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *trow) { | |||||
| Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||||
| std::string image_id = image_ids_[row_id]; | |||||
| std::shared_ptr<Tensor> image, coordinate; | std::shared_ptr<Tensor> image, coordinate; | ||||
| auto itr = coordinate_map_.find(image_id); | auto itr = coordinate_map_.find(image_id); | ||||
| if (itr == coordinate_map_.end()) { | if (itr == coordinate_map_.end()) { | ||||
| @@ -374,48 +301,6 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status CocoOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| TensorRow trow; | |||||
| for (const int64_t &key : keys) { | |||||
| RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_ids_[key], &trow)); | |||||
| deq->push_back(std::move(trow)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status CocoOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> io_block; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| while (io_block != nullptr) { | |||||
| if (io_block->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (io_block->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (io_block->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||||
| if (keys.empty() == true) return Status::OK(); | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| } | |||||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); | |||||
| } | |||||
| template <typename T> | template <typename T> | ||||
| Status CocoOp::SearchNodeInJson(const nlohmann::json &input_tree, std::string node_name, T *output_node) { | Status CocoOp::SearchNodeInJson(const nlohmann::json &input_tree, std::string node_name, T *output_node) { | ||||
| auto node = input_tree.find(node_name); | auto node = input_tree.find(node_name); | ||||
| @@ -627,11 +512,6 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status CocoOp::InitSampler() { | |||||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status CocoOp::LaunchThreadsAndInitOp() { | Status CocoOp::LaunchThreadsAndInitOp() { | ||||
| if (tree_ == nullptr) { | if (tree_ == nullptr) { | ||||
| RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | ||||
| @@ -27,6 +27,7 @@ | |||||
| #include "minddata/dataset/engine/data_buffer.h" | #include "minddata/dataset/engine/data_buffer.h" | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | #include "minddata/dataset/engine/datasetops/parallel_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| #include "minddata/dataset/kernels/image/image_utils.h" | #include "minddata/dataset/kernels/image/image_utils.h" | ||||
| @@ -46,7 +47,7 @@ class Queue; | |||||
| using CoordinateRow = std::vector<std::vector<float>>; | using CoordinateRow = std::vector<std::vector<float>>; | ||||
| class CocoOp : public ParallelOp, public RandomAccessOp { | |||||
| class CocoOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| enum class TaskType { Detection = 0, Stuff = 1, Panoptic = 2, Keypoint = 3 }; | enum class TaskType { Detection = 0, Stuff = 1, Panoptic = 2, Keypoint = 3 }; | ||||
| @@ -171,17 +172,6 @@ class CocoOp : public ParallelOp, public RandomAccessOp { | |||||
| // Destructor | // Destructor | ||||
| ~CocoOp() = default; | ~CocoOp() = default; | ||||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||||
| // @param int32_t workerId - id of each worker | |||||
| // @return Status The status code returned | |||||
| Status WorkerEntry(int32_t worker_id) override; | |||||
| // Main Loop of CocoOp | |||||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it the put buffer to mOutConnector | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // A print method typically used for debugging | // A print method typically used for debugging | ||||
| // @param out | // @param out | ||||
| // @param show_all | // @param show_all | ||||
| @@ -212,16 +202,12 @@ class CocoOp : public ParallelOp, public RandomAccessOp { | |||||
| Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override; | Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override; | ||||
| private: | private: | ||||
| // Initialize Sampler, calls sampler->Init() within | |||||
| // @return Status The status code returned | |||||
| Status InitSampler(); | |||||
| // Load a tensor row according to image id | // Load a tensor row according to image id | ||||
| // @param row_id_type row_id - id for this tensor row | // @param row_id_type row_id - id for this tensor row | ||||
| // @param std::string image_id - image id | // @param std::string image_id - image id | ||||
| // @param TensorRow row - image & target read into this tensor row | // @param TensorRow row - image & target read into this tensor row | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *row); | |||||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; | |||||
| // Load a tensor row with vector which a vector to a tensor | // Load a tensor row with vector which a vector to a tensor | ||||
| // @param row_id_type row_id - id for this tensor row | // @param row_id_type row_id - id for this tensor row | ||||
| @@ -259,27 +245,13 @@ class CocoOp : public ParallelOp, public RandomAccessOp { | |||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor); | Status ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor); | ||||
| // @param const std::vector<uint64_t> &keys - keys in ioblock | |||||
| // @param std::unique_ptr<DataBuffer> db | |||||
| // @return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| // Read annotation from Annotation folder | // Read annotation from Annotation folder | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status ParseAnnotationIds(); | Status ParseAnnotationIds(); | ||||
| // @param const std::shared_ptr<Tensor> &sample_ids - sample ids of tensor | |||||
| // @param std::vector<int64_t> *keys - image id | |||||
| // @return Status The status code returned | |||||
| Status TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys); | |||||
| // Called first when function is called | // Called first when function is called | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LaunchThreadsAndInitOp(); | |||||
| // Reset dataset state | |||||
| // @return Status The status code returned | |||||
| Status Reset() override; | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| // @param nlohmann::json image_tree - image tree of json | // @param nlohmann::json image_tree - image tree of json | ||||
| // @param std::vector<std::string> *image_vec - image id list of json | // @param std::vector<std::string> *image_vec - image id list of json | ||||
| @@ -323,12 +295,9 @@ class CocoOp : public ParallelOp, public RandomAccessOp { | |||||
| Status ComputeColMap() override; | Status ComputeColMap() override; | ||||
| bool decode_; | bool decode_; | ||||
| int64_t row_cnt_; | |||||
| int64_t buf_cnt_; | |||||
| std::string image_folder_path_; | std::string image_folder_path_; | ||||
| std::string annotation_path_; | std::string annotation_path_; | ||||
| TaskType task_type_; | TaskType task_type_; | ||||
| int32_t rows_per_buffer_; | |||||
| std::unique_ptr<DataSchema> data_schema_; | std::unique_ptr<DataSchema> data_schema_; | ||||
| std::vector<std::string> image_ids_; | std::vector<std::string> image_ids_; | ||||
| @@ -68,16 +68,13 @@ ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::str | |||||
| bool recursive, bool do_decode, const std::set<std::string> &exts, | bool recursive, bool do_decode, const std::set<std::string> &exts, | ||||
| const std::map<std::string, int32_t> &map, std::unique_ptr<DataSchema> data_schema, | const std::map<std::string, int32_t> &map, std::unique_ptr<DataSchema> data_schema, | ||||
| std::shared_ptr<SamplerRT> sampler) | std::shared_ptr<SamplerRT> sampler) | ||||
| : ParallelOp(num_wkrs, queue_size, std::move(sampler)), | |||||
| rows_per_buffer_(rows_per_buffer), | |||||
| : MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer), | |||||
| folder_path_(file_dir), | folder_path_(file_dir), | ||||
| recursive_(recursive), | recursive_(recursive), | ||||
| decode_(do_decode), | decode_(do_decode), | ||||
| extensions_(exts), | extensions_(exts), | ||||
| class_index_(map), | class_index_(map), | ||||
| data_schema_(std::move(data_schema)), | data_schema_(std::move(data_schema)), | ||||
| row_cnt_(0), | |||||
| buf_cnt_(0), | |||||
| sampler_ind_(0), | sampler_ind_(0), | ||||
| dirname_offset_(0) { | dirname_offset_(0) { | ||||
| folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size); | folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size); | ||||
| @@ -125,98 +122,9 @@ Status ImageFolderOp::PrescanMasterEntry(const std::string &filedir) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work | |||||
| Status ImageFolderOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| while (true) { // each iterator is 1 epoch | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| while (sampler_buffer->eoe() == false) { | |||||
| TensorRow sample_row; | |||||
| RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row)); | |||||
| std::shared_ptr<Tensor> sample_ids = sample_row[0]; | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||||
| if ((*itr) >= num_rows_) continue; // index out of bound, skipping | |||||
| keys.push_back(*itr); | |||||
| row_cnt_++; | |||||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[buf_cnt_++ % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||||
| keys.clear(); | |||||
| } | |||||
| } | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| if (keys.empty() == false) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||||
| std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); | |||||
| for (int32_t i = 0; i < num_workers_; ++i) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { // not the last repeat. | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| // contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ | |||||
| // IMPORTANT: 1 IOBlock produces 1 DataBuffer | |||||
| Status ImageFolderOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> io_block; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| while (io_block != nullptr) { | |||||
| if (io_block->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (io_block->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (io_block->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||||
| if (keys.empty() == true) return Status::OK(); // empty key is a quit signal for workers | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| } | |||||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); | |||||
| } | |||||
| // Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer | // Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer | ||||
| Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr, TensorRow *trow) { | |||||
| Status ImageFolderOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||||
| ImageLabelPair pairPtr = image_label_pairs_[row_id]; | |||||
| std::shared_ptr<Tensor> image, label; | std::shared_ptr<Tensor> image, label; | ||||
| RETURN_IF_NOT_OK(Tensor::CreateScalar(pairPtr->second, &label)); | RETURN_IF_NOT_OK(Tensor::CreateScalar(pairPtr->second, &label)); | ||||
| RETURN_IF_NOT_OK(Tensor::CreateFromFile(folder_path_ + (pairPtr->first), &image)); | RETURN_IF_NOT_OK(Tensor::CreateFromFile(folder_path_ + (pairPtr->first), &image)); | ||||
| @@ -233,18 +141,6 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr, | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||||
| Status ImageFolderOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| TensorRow trow; | |||||
| for (const int64_t &key : keys) { | |||||
| RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_label_pairs_[key], &trow)); | |||||
| deq->push_back(std::move(trow)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| void ImageFolderOp::Print(std::ostream &out, bool show_all) const { | void ImageFolderOp::Print(std::ostream &out, bool show_all) const { | ||||
| if (!show_all) { | if (!show_all) { | ||||
| // Call the super class for displaying any common 1-liner info | // Call the super class for displaying any common 1-liner info | ||||
| @@ -260,20 +156,6 @@ void ImageFolderOp::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| // Reset Sampler and wakeup Master thread (functor) | |||||
| Status ImageFolderOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| row_cnt_ = 0; | |||||
| return Status::OK(); | |||||
| } | |||||
| // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows | |||||
| Status ImageFolderOp::InitSampler() { | |||||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // Derived from RandomAccessOp | // Derived from RandomAccessOp | ||||
| Status ImageFolderOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | Status ImageFolderOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | ||||
| if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { | if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { | ||||
| @@ -29,6 +29,7 @@ | |||||
| #include "minddata/dataset/engine/data_buffer.h" | #include "minddata/dataset/engine/data_buffer.h" | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | #include "minddata/dataset/engine/datasetops/parallel_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| #include "minddata/dataset/kernels/image/image_utils.h" | #include "minddata/dataset/kernels/image/image_utils.h" | ||||
| @@ -50,7 +51,7 @@ class Queue; | |||||
| using ImageLabelPair = std::shared_ptr<std::pair<std::string, int32_t>>; | using ImageLabelPair = std::shared_ptr<std::pair<std::string, int32_t>>; | ||||
| using FolderImagesPair = std::shared_ptr<std::pair<std::string, std::queue<ImageLabelPair>>>; | using FolderImagesPair = std::shared_ptr<std::pair<std::string, std::queue<ImageLabelPair>>>; | ||||
| class ImageFolderOp : public ParallelOp, public RandomAccessOp { | |||||
| class ImageFolderOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| class Builder { | class Builder { | ||||
| public: | public: | ||||
| @@ -175,22 +176,11 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { | |||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status PrescanMasterEntry(const std::string &dir); | Status PrescanMasterEntry(const std::string &dir); | ||||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||||
| // @param int32_t workerId - id of each worker | |||||
| // @return Status The status code returned | |||||
| Status WorkerEntry(int32_t worker_id) override; | |||||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | ||||
| // @param int32_t workerId - id of each worker | // @param int32_t workerId - id of each worker | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status PrescanWorkerEntry(int32_t worker_id); | Status PrescanWorkerEntry(int32_t worker_id); | ||||
| // Main Loop of ImageFolderOp | |||||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // Method derived from RandomAccess Op, enable Sampler to get all ids for each class | // Method derived from RandomAccess Op, enable Sampler to get all ids for each class | ||||
| // @param (std::map<int64_t, std::vector<int64_t >> * map - key label, val all ids for this class | // @param (std::map<int64_t, std::vector<int64_t >> * map - key label, val all ids for this class | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| @@ -217,21 +207,12 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { | |||||
| Status GetNumClasses(int64_t *num_classes) override; | Status GetNumClasses(int64_t *num_classes) override; | ||||
| private: | private: | ||||
| // Initialize Sampler, calls sampler->Init() within | |||||
| // @return Status The status code returned | |||||
| Status InitSampler(); | |||||
| // Load a tensor row according to a pair | // Load a tensor row according to a pair | ||||
| // @param row_id_type row_id - id for this tensor row | // @param row_id_type row_id - id for this tensor row | ||||
| // @param ImageLabelPair pair - <imagefile,label> | // @param ImageLabelPair pair - <imagefile,label> | ||||
| // @param TensorRow row - image & label read into this tensor row | // @param TensorRow row - image & label read into this tensor row | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LoadTensorRow(row_id_type row_id, ImageLabelPair pair, TensorRow *row); | |||||
| // @param const std::vector<int64_t> &keys - keys in ioblock | |||||
| // @param std::unique_ptr<DataBuffer> db | |||||
| // @return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; | |||||
| // @param std::string & dir - dir to walk all images | // @param std::string & dir - dir to walk all images | ||||
| // @param int64_t * cnt - number of non folder files under the current dir | // @param int64_t * cnt - number of non folder files under the current dir | ||||
| @@ -244,25 +225,18 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp { | |||||
| // Called first when function is called | // Called first when function is called | ||||
| // @return | // @return | ||||
| Status LaunchThreadsAndInitOp(); | |||||
| // reset Op | |||||
| // @return Status The status code returned | |||||
| Status Reset() override; | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| // Private function for computing the assignment of the column name map. | // Private function for computing the assignment of the column name map. | ||||
| // @return - Status | // @return - Status | ||||
| Status ComputeColMap() override; | Status ComputeColMap() override; | ||||
| int32_t rows_per_buffer_; | |||||
| std::string folder_path_; // directory of image folder | std::string folder_path_; // directory of image folder | ||||
| bool recursive_; | bool recursive_; | ||||
| bool decode_; | bool decode_; | ||||
| std::set<std::string> extensions_; // extensions allowed | std::set<std::string> extensions_; // extensions allowed | ||||
| std::map<std::string, int32_t> class_index_; | std::map<std::string, int32_t> class_index_; | ||||
| std::unique_ptr<DataSchema> data_schema_; | std::unique_ptr<DataSchema> data_schema_; | ||||
| int64_t row_cnt_; | |||||
| int64_t buf_cnt_; | |||||
| int64_t sampler_ind_; | int64_t sampler_ind_; | ||||
| int64_t dirname_offset_; | int64_t dirname_offset_; | ||||
| std::vector<ImageLabelPair> image_label_pairs_; | std::vector<ImageLabelPair> image_label_pairs_; | ||||
| @@ -67,82 +67,18 @@ Status ManifestOp::Builder::SanityCheck() { | |||||
| ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode, | ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode, | ||||
| const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema, | const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema, | ||||
| std::shared_ptr<SamplerRT> sampler, std::string usage) | std::shared_ptr<SamplerRT> sampler, std::string usage) | ||||
| : ParallelOp(num_works, queue_size, std::move(sampler)), | |||||
| rows_per_buffer_(rows_per_buffer), | |||||
| : MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buffer), | |||||
| io_block_pushed_(0), | io_block_pushed_(0), | ||||
| row_cnt_(0), | |||||
| sampler_ind_(0), | sampler_ind_(0), | ||||
| data_schema_(std::move(data_schema)), | data_schema_(std::move(data_schema)), | ||||
| file_(file), | file_(file), | ||||
| class_index_(class_index), | class_index_(class_index), | ||||
| decode_(decode), | decode_(decode), | ||||
| usage_(usage), | |||||
| buf_cnt_(0) { | |||||
| usage_(usage) { | |||||
| io_block_queues_.Init(num_workers_, queue_size); | io_block_queues_.Init(num_workers_, queue_size); | ||||
| (void)std::transform(usage_.begin(), usage_.end(), usage_.begin(), ::tolower); | (void)std::transform(usage_.begin(), usage_.end(), usage_.begin(), ::tolower); | ||||
| } | } | ||||
| // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work | |||||
| Status ManifestOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| return AddIoBlock(&sampler_buffer); | |||||
| } | |||||
| Status ManifestOp::AddIoBlock(std::unique_ptr<DataBuffer> *sampler_buffer) { | |||||
| while (true) { // each iterator is 1 epoch | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| while (!(*sampler_buffer)->eoe()) { | |||||
| TensorRow sample_row; | |||||
| RETURN_IF_NOT_OK((*sampler_buffer)->PopRow(&sample_row)); | |||||
| std::shared_ptr<Tensor> sample_ids = sample_row[0]; | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||||
| if ((*itr) >= num_rows_) continue; // index out of bound, skipping | |||||
| keys.push_back(*itr); | |||||
| row_cnt_++; | |||||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| keys.clear(); | |||||
| } | |||||
| } | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(sampler_buffer)); | |||||
| } | |||||
| if (keys.empty() == false) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||||
| for (int32_t i = 0; i < num_workers_; i++) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(sampler_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| Status ManifestOp::LaunchThreadsAndInitOp() { | Status ManifestOp::LaunchThreadsAndInitOp() { | ||||
| if (tree_ == nullptr) { | if (tree_ == nullptr) { | ||||
| RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | ||||
| @@ -159,44 +95,9 @@ Status ManifestOp::LaunchThreadsAndInitOp() { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ | |||||
| // IMPORTANT: 1 IOBlock produces 1 DataBuffer | |||||
| Status ManifestOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> io_block; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| while (io_block != nullptr) { | |||||
| if (io_block->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (io_block->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (io_block->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||||
| if (keys.empty()) { | |||||
| return Status::OK(); // empty key is a quit signal for workers | |||||
| } | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| } | |||||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); | |||||
| } | |||||
| // Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer | // Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer | ||||
| Status ManifestOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string, std::vector<std::string>> &data, | |||||
| TensorRow *trow) { | |||||
| Status ManifestOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||||
| std::pair<std::string, std::vector<std::string>> data = image_labelname_[static_cast<size_t>(row_id)]; | |||||
| std::shared_ptr<Tensor> image; | std::shared_ptr<Tensor> image; | ||||
| std::shared_ptr<Tensor> label; | std::shared_ptr<Tensor> label; | ||||
| std::vector<int32_t> label_index(data.second.size()); | std::vector<int32_t> label_index(data.second.size()); | ||||
| @@ -222,18 +123,6 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||||
| Status ManifestOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| for (const auto &key : keys) { | |||||
| TensorRow trow; | |||||
| RETURN_IF_NOT_OK(LoadTensorRow(key, image_labelname_[static_cast<size_t>(key)], &trow)); | |||||
| deq->push_back(std::move(trow)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| void ManifestOp::Print(std::ostream &out, bool show_all) const { | void ManifestOp::Print(std::ostream &out, bool show_all) const { | ||||
| if (!show_all) { | if (!show_all) { | ||||
| // Call the super class for displaying any common 1-liner info | // Call the super class for displaying any common 1-liner info | ||||
| @@ -249,20 +138,6 @@ void ManifestOp::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| // Reset Sampler and wakeup Master thread (functor) | |||||
| Status ManifestOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| row_cnt_ = 0; | |||||
| return Status::OK(); | |||||
| } | |||||
| // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows | |||||
| Status ManifestOp::InitSampler() { | |||||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // Derived from RandomAccessOp | // Derived from RandomAccessOp | ||||
| Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | ||||
| if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) { | if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) { | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "minddata/dataset/engine/data_buffer.h" | #include "minddata/dataset/engine/data_buffer.h" | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | #include "minddata/dataset/engine/datasetops/parallel_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #include "minddata/dataset/kernels/image/image_utils.h" | #include "minddata/dataset/kernels/image/image_utils.h" | ||||
| #include "minddata/dataset/util/queue.h" | #include "minddata/dataset/util/queue.h" | ||||
| @@ -35,7 +36,7 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| class ManifestOp : public ParallelOp, public RandomAccessOp { | |||||
| class ManifestOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| class Builder { | class Builder { | ||||
| public: | public: | ||||
| @@ -143,17 +144,6 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { | |||||
| // Destructor. | // Destructor. | ||||
| ~ManifestOp() = default; | ~ManifestOp() = default; | ||||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||||
| // @param int32_t worker_id - id of each worker | |||||
| // @return Status The status code returned | |||||
| Status WorkerEntry(int32_t worker_id) override; | |||||
| // Main Loop of ManifestOp | |||||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // Method derived from RandomAccess Op, enable Sampler to get all ids for each class | // Method derived from RandomAccess Op, enable Sampler to get all ids for each class | ||||
| // @param (std::map<int64_t, std::vector<int64_t >> * map - key label, val all ids for this class | // @param (std::map<int64_t, std::vector<int64_t >> * map - key label, val all ids for this class | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| @@ -194,27 +184,12 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { | |||||
| Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override; | Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override; | ||||
| private: | private: | ||||
| // Initialize Sampler, calls sampler->Init() within | |||||
| // @return Status The status code returned | |||||
| Status InitSampler(); | |||||
| // Method in operator(), to fill IOBlockQueue | |||||
| // @param std::unique_ptr<DataBuffer> sampler_buffer - to fill IOBlockQueue | |||||
| // @return Status The status code returned | |||||
| Status AddIoBlock(std::unique_ptr<DataBuffer> *sampler_buffer); | |||||
| // Load a tensor row according to a pair | // Load a tensor row according to a pair | ||||
| // @param row_id_type row_id - id for this tensor row | // @param row_id_type row_id - id for this tensor row | ||||
| // @param std::pair<std::string, std::vector<std::string>> - <imagefile, <label1, label2...>> | // @param std::pair<std::string, std::vector<std::string>> - <imagefile, <label1, label2...>> | ||||
| // @param TensorRow row - image & label read into this tensor row | // @param TensorRow row - image & label read into this tensor row | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LoadTensorRow(row_id_type row_id, const std::pair<std::string, std::vector<std::string>> &data, | |||||
| TensorRow *row); | |||||
| // @param const std::vector<int64_t> &keys - keys in ioblock | |||||
| // @param std::unique_ptr<DataBuffer> db | |||||
| // @return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; | |||||
| // Parse manifest file to get image path and label and so on. | // Parse manifest file to get image path and label and so on. | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| @@ -222,11 +197,7 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { | |||||
| // Called first when function is called | // Called first when function is called | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LaunchThreadsAndInitOp(); | |||||
| // reset Op | |||||
| // @return Status The status code returned | |||||
| Status Reset() override; | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| // Check if image ia valid.Only support JPEG/PNG/GIF/BMP | // Check if image ia valid.Only support JPEG/PNG/GIF/BMP | ||||
| // @return | // @return | ||||
| @@ -240,16 +211,13 @@ class ManifestOp : public ParallelOp, public RandomAccessOp { | |||||
| // @return - Status | // @return - Status | ||||
| Status ComputeColMap() override; | Status ComputeColMap() override; | ||||
| int32_t rows_per_buffer_; | |||||
| int64_t io_block_pushed_; | int64_t io_block_pushed_; | ||||
| int64_t row_cnt_; | |||||
| int64_t sampler_ind_; | int64_t sampler_ind_; | ||||
| std::unique_ptr<DataSchema> data_schema_; | std::unique_ptr<DataSchema> data_schema_; | ||||
| std::string file_; // file that store the information of images | std::string file_; // file that store the information of images | ||||
| std::map<std::string, int32_t> class_index_; | std::map<std::string, int32_t> class_index_; | ||||
| bool decode_; | bool decode_; | ||||
| std::string usage_; | std::string usage_; | ||||
| int64_t buf_cnt_; | |||||
| std::map<std::string, int32_t> label_index_; | std::map<std::string, int32_t> label_index_; | ||||
| std::vector<std::pair<std::string, std::vector<std::string>>> image_labelname_; | std::vector<std::pair<std::string, std::vector<std::string>>> image_labelname_; | ||||
| @@ -0,0 +1,152 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include <fstream> | |||||
| #include <unordered_set> | |||||
| #include "utils/ms_utils.h" | |||||
| #include "minddata/dataset/core/config_manager.h" | |||||
| #include "minddata/dataset/core/tensor_shape.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||||
| #include "minddata/dataset/engine/db_connector.h" | |||||
| #include "minddata/dataset/engine/execution_tree.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler, | |||||
| int32_t rows_per_buffer) | |||||
| : ParallelOp(num_wkrs, queue_size, std::move(sampler)), | |||||
| row_cnt_(0), | |||||
| buf_cnt_(0), | |||||
| rows_per_buffer_(rows_per_buffer) {} | |||||
| // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work | |||||
| Status MappableLeafOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| while (true) { // each iterator is 1 epoch | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| while (sampler_buffer->eoe() == false) { | |||||
| TensorRow sample_row; | |||||
| RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row)); | |||||
| std::shared_ptr<Tensor> sample_ids = sample_row[0]; | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||||
| if ((*itr) >= num_rows_) continue; // index out of bound, skipping | |||||
| keys.push_back(*itr); | |||||
| row_cnt_++; | |||||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[buf_cnt_++ % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||||
| keys.clear(); | |||||
| } | |||||
| } | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| if (keys.empty() == false) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||||
| std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); | |||||
| for (int32_t i = 0; i < num_workers_; ++i) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { // not the last repeat. | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| // Reset Sampler and wakeup Master thread (functor) | |||||
| Status MappableLeafOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| return Status::OK(); | |||||
| } | |||||
| // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows | |||||
| Status MappableLeafOp::InitSampler() { | |||||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ | |||||
| // IMPORTANT: 1 IOBlock produces 1 DataBuffer | |||||
| Status MappableLeafOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> io_block; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| while (io_block != nullptr) { | |||||
| if (io_block->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (io_block->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (io_block->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||||
| if (keys.empty() == true) return Status::OK(); // empty key is a quit signal for workers | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| } | |||||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); | |||||
| } | |||||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||||
| Status MappableLeafOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| TensorRow trow; | |||||
| for (const int64_t &key : keys) { | |||||
| RETURN_IF_NOT_OK(this->LoadTensorRow(key, &trow)); | |||||
| deq->push_back(std::move(trow)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,110 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MAPPABLE_LEAF_OP_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MAPPABLE_LEAF_OP_H_ | |||||
| #include <deque> | |||||
| #include <memory> | |||||
| #include <queue> | |||||
| #include <string> | |||||
| #include <algorithm> | |||||
| #include <map> | |||||
| #include <set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "minddata/dataset/core/tensor.h" | |||||
| #include "minddata/dataset/engine/data_buffer.h" | |||||
| #include "minddata/dataset/engine/data_schema.h" | |||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | |||||
| #ifndef ENABLE_ANDROID | |||||
| #include "minddata/dataset/kernels/image/image_utils.h" | |||||
| #else | |||||
| #include "minddata/dataset/kernels/image/lite_image_utils.h" | |||||
| #endif | |||||
| #include "minddata/dataset/util/path.h" | |||||
| #include "minddata/dataset/util/queue.h" | |||||
| #include "minddata/dataset/util/services.h" | |||||
| #include "minddata/dataset/util/status.h" | |||||
| #include "minddata/dataset/util/wait_post.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Forward declares | |||||
| template <typename T> | |||||
| class Queue; | |||||
| using ImageLabelPair = std::shared_ptr<std::pair<std::string, int32_t>>; | |||||
| using FolderImagesPair = std::shared_ptr<std::pair<std::string, std::queue<ImageLabelPair>>>; | |||||
| class MappableLeafOp : public ParallelOp, public RandomAccessOp { | |||||
| public: | |||||
| // Constructor | |||||
| // @param int32_t num_wkrs - Num of workers reading images in parallel | |||||
| // @param int32_t - rows_per_buffer Number of images (rows) in each buffer | |||||
| // @param std::string - dir directory of ImageNetFolder | |||||
| // @param int32_t queue_size - connector queue size | |||||
| // @param std::set<std::string> exts - set of file extensions to read, if empty, read everything under the dir | |||||
| // @param td::unique_ptr<Sampler> sampler - sampler tells the source what to read | |||||
| MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler, int32_t rows_per_buffer); | |||||
| // Destructor. | |||||
| ~MappableLeafOp() = default; | |||||
| // Main Loop of MappableLeaf | |||||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // Op name getter | |||||
| // @return Name of the current Op | |||||
| std::string Name() const override { return "MappableLeafPp"; } | |||||
| protected: | |||||
| // Initialize Sampler, calls sampler->Init() within | |||||
| // @return Status The status code returned | |||||
| Status InitSampler(); | |||||
| // // Called first when function is called | |||||
| // // @return | |||||
| virtual Status LaunchThreadsAndInitOp() = 0; | |||||
| Status WorkerEntry(int32_t workerId) override; | |||||
| // @param const std::vector<int64_t> &keys - keys in ioblock | |||||
| // @param std::unique_ptr<DataBuffer> db | |||||
| // @return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| // Load a tensor row according to a pair | |||||
| // @param row_id_type row_id - id for this tensor row | |||||
| // @param ImageLabelPair pair - <imagefile,label> | |||||
| // @param TensorRow row - loaded row | |||||
| // @return Status The status code returned | |||||
| virtual Status LoadTensorRow(row_id_type row_id, TensorRow *row) = 0; | |||||
| // reset Op | |||||
| // @return Status The status code returned | |||||
| Status Reset() override; | |||||
| int32_t rows_per_buffer_; | |||||
| int64_t row_cnt_; | |||||
| int64_t buf_cnt_; | |||||
| }; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MAPPABLE_LEAF_OP_H_ | |||||
| @@ -27,6 +27,7 @@ | |||||
| #include "minddata/dataset/core/global_context.h" | #include "minddata/dataset/core/global_context.h" | ||||
| #include "minddata/dataset/engine/data_buffer.h" | #include "minddata/dataset/engine/data_buffer.h" | ||||
| #include "minddata/dataset/engine/datasetops/dataset_op.h" | #include "minddata/dataset/engine/datasetops/dataset_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||||
| #include "minddata/dataset/engine/db_connector.h" | #include "minddata/dataset/engine/db_connector.h" | ||||
| #include "minddata/dataset/engine/execution_tree.h" | #include "minddata/dataset/engine/execution_tree.h" | ||||
| #include "minddata/dataset/util/log_adapter.h" | #include "minddata/dataset/util/log_adapter.h" | ||||
| @@ -115,16 +116,14 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buf | |||||
| const std::vector<std::string> &columns_to_load, | const std::vector<std::string> &columns_to_load, | ||||
| const std::vector<std::shared_ptr<ShardOperator>> &operators, int64_t num_padded, | const std::vector<std::shared_ptr<ShardOperator>> &operators, int64_t num_padded, | ||||
| const mindrecord::json &sample_json, const std::map<std::string, std::string> &sample_bytes) | const mindrecord::json &sample_json, const std::map<std::string, std::string> &sample_bytes) | ||||
| : ParallelOp(num_mind_record_workers, op_connector_queue_size), | |||||
| rows_per_buffer_(rows_per_buffer), | |||||
| : MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared<SequentialSamplerRT>(0, 0), | |||||
| rows_per_buffer), | |||||
| dataset_file_(dataset_file), | dataset_file_(dataset_file), | ||||
| load_dataset_(load_dataset), | load_dataset_(load_dataset), | ||||
| columns_to_load_(columns_to_load), | columns_to_load_(columns_to_load), | ||||
| operators_(operators), | operators_(operators), | ||||
| num_mind_record_workers_(num_mind_record_workers), | num_mind_record_workers_(num_mind_record_workers), | ||||
| num_rows_(0), | |||||
| buffers_needed_(0), | buffers_needed_(0), | ||||
| buf_cnt_(0), | |||||
| ended_worker_(0), | ended_worker_(0), | ||||
| num_padded_(num_padded), | num_padded_(num_padded), | ||||
| sample_json_(sample_json), | sample_json_(sample_json), | ||||
| @@ -379,61 +378,19 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // Class functor operator () override. | |||||
| // All dataset ops operate by launching a thread (see ExecutionTree). This class functor will | |||||
| // provide the master loop that drives the logic for performing the work | |||||
| // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work | |||||
| Status MindRecordOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadAndInitOp()); | |||||
| num_rows_ = shard_reader_->GetNumRows(); | |||||
| // Compute how many buffers we would need to accomplish rowsPerBuffer | |||||
| buffers_needed_ = (num_rows_ + rows_per_buffer_ - 1) / rows_per_buffer_; | |||||
| while (true) { // each iterator is 1 epoch | |||||
| for (int32_t i = 0; i < buffers_needed_; ++i) { | |||||
| std::vector<int64_t> keys(1, i); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||||
| for (int32_t i = 0; i < num_workers_; i++) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[i]->Add( | |||||
| std::move(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) RETURN_IF_NOT_OK(Reset()); | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| // Overrides base class reset method. When an operator does a reset, it cleans up any state | // Overrides base class reset method. When an operator does a reset, it cleans up any state | ||||
| // info from it's previous execution and then initializes itself so that it can be executed | // info from it's previous execution and then initializes itself so that it can be executed | ||||
| // again. | // again. | ||||
| Status MindRecordOp::Reset() { | Status MindRecordOp::Reset() { | ||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | MS_LOG(DEBUG) << Name() << " performing a self-reset."; | ||||
| RETURN_IF_NOT_OK(ParallelOp::Reset()); // Call our super class reset first. | |||||
| RETURN_IF_NOT_OK(MappableLeafOp::Reset()); // Call our super class reset first. | |||||
| shard_reader_->ShuffleTask(); | shard_reader_->ShuffleTask(); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status MindRecordOp::LaunchThreadAndInitOp() { | |||||
| Status MindRecordOp::LaunchThreadsAndInitOp() { | |||||
| if (tree_ == nullptr) { | if (tree_ == nullptr) { | ||||
| RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | ||||
| } | } | ||||
| @@ -446,6 +403,8 @@ Status MindRecordOp::LaunchThreadAndInitOp() { | |||||
| // Launch main workers that load DataBuffers by reading all images | // Launch main workers that load DataBuffers by reading all images | ||||
| RETURN_IF_NOT_OK( | RETURN_IF_NOT_OK( | ||||
| tree_->LaunchWorkers(num_workers_, std::bind(&MindRecordOp::WorkerEntry, this, std::placeholders::_1), "", id())); | tree_->LaunchWorkers(num_workers_, std::bind(&MindRecordOp::WorkerEntry, this, std::placeholders::_1), "", id())); | ||||
| num_rows_ = shard_reader_->GetNumRows(); | |||||
| RETURN_IF_NOT_OK(this->InitSampler()); // pass numRows to Sampler | |||||
| TaskManager::FindMe()->Post(); | TaskManager::FindMe()->Post(); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -28,7 +28,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/util/queue.h" | #include "minddata/dataset/util/queue.h" | ||||
| #include "minddata/dataset/util/status.h" | #include "minddata/dataset/util/status.h" | ||||
| #include "minddata/mindrecord/include/shard_column.h" | #include "minddata/mindrecord/include/shard_column.h" | ||||
| @@ -50,7 +50,7 @@ using ShardTuple = std::vector<std::tuple<std::vector<uint8_t>, mindrecord::json | |||||
| const int32_t LOG_INTERVAL = 19; | const int32_t LOG_INTERVAL = 19; | ||||
| class MindRecordOp : public ParallelOp { | |||||
| class MindRecordOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| // The nested builder class inside of the MindRecordOp is used to help manage all of the arguments | // The nested builder class inside of the MindRecordOp is used to help manage all of the arguments | ||||
| // for constructing it. Use the builder by setting each argument with the provided set methods, | // for constructing it. Use the builder by setting each argument with the provided set methods, | ||||
| @@ -167,15 +167,9 @@ class MindRecordOp : public ParallelOp { | |||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status WorkerEntry(int32_t worker_id) override; | Status WorkerEntry(int32_t worker_id) override; | ||||
| // Class functor operator () override. | |||||
| // All DatasetOps operate by launching a thread (see ExecutionTree). This class functor will | |||||
| // provide the master loop that drives the logic for performing the work. | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // Called first when function is called | // Called first when function is called | ||||
| // @return | // @return | ||||
| Status LaunchThreadAndInitOp(); | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| // Overrides base class reset method. When an operator does a reset, it cleans up any state | // Overrides base class reset method. When an operator does a reset, it cleans up any state | ||||
| // info from it's previous execution and then initializes itself so that it can be executed | // info from it's previous execution and then initializes itself so that it can be executed | ||||
| @@ -183,15 +177,9 @@ class MindRecordOp : public ParallelOp { | |||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status Reset() override; | Status Reset() override; | ||||
| // Getter method | |||||
| int32_t num_rows() const { return num_rows_; } | |||||
| static Status CountTotalRows(const std::vector<std::string> dataset_path, bool load_dataset, | static Status CountTotalRows(const std::vector<std::string> dataset_path, bool load_dataset, | ||||
| const std::shared_ptr<ShardOperator> &op, int64_t *count, int64_t num_padded); | const std::shared_ptr<ShardOperator> &op, int64_t *count, int64_t num_padded); | ||||
| // Getter method | |||||
| int32_t rows_per_buffer() const { return rows_per_buffer_; } | |||||
| // Getter method | // Getter method | ||||
| std::vector<std::string> dataset_file() const { return dataset_file_; } | std::vector<std::string> dataset_file() const { return dataset_file_; } | ||||
| @@ -216,19 +204,19 @@ class MindRecordOp : public ParallelOp { | |||||
| Status LoadTensorRow(TensorRow *tensor_row, const std::vector<uint8_t> &columns_blob, | Status LoadTensorRow(TensorRow *tensor_row, const std::vector<uint8_t> &columns_blob, | ||||
| const mindrecord::json &columns_json, const mindrecord::TaskType task_type); | const mindrecord::json &columns_json, const mindrecord::TaskType task_type); | ||||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override { | |||||
| return Status(StatusCode::kMDSyntaxError, "Cannot call this method."); | |||||
| } | |||||
| // Private function for computing the assignment of the column name map. | // Private function for computing the assignment of the column name map. | ||||
| // @return - Status | // @return - Status | ||||
| Status ComputeColMap() override; | Status ComputeColMap() override; | ||||
| int32_t rows_per_buffer_; // The number of requested rows per buffer. | |||||
| std::vector<std::string> dataset_file_; // dataset files | std::vector<std::string> dataset_file_; // dataset files | ||||
| bool load_dataset_; // load dataset from single file or not | bool load_dataset_; // load dataset from single file or not | ||||
| std::vector<std::string> columns_to_load_; // Columns to load from dataset | std::vector<std::string> columns_to_load_; // Columns to load from dataset | ||||
| std::vector<std::shared_ptr<ShardOperator>> operators_; // ShardOperators to use | std::vector<std::shared_ptr<ShardOperator>> operators_; // ShardOperators to use | ||||
| int32_t num_mind_record_workers_; // number of workers to be spawned by ShardReader | int32_t num_mind_record_workers_; // number of workers to be spawned by ShardReader | ||||
| int32_t buffers_needed_; // Counter for the buffers that were fetched | int32_t buffers_needed_; // Counter for the buffers that were fetched | ||||
| int64_t buf_cnt_; // Buffer counter | |||||
| int32_t num_rows_; // One more than the last row id in the range for this cache | |||||
| std::atomic<int32_t> ended_worker_; | std::atomic<int32_t> ended_worker_; | ||||
| int64_t num_padded_; | int64_t num_padded_; | ||||
| @@ -75,117 +75,18 @@ Status MnistOp::Builder::SanityCheck() { | |||||
| MnistOp::MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path, | MnistOp::MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path, | ||||
| int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | ||||
| : ParallelOp(num_workers, queue_size, std::move(sampler)), | |||||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), | |||||
| usage_(usage), | usage_(usage), | ||||
| buf_cnt_(0), | |||||
| row_cnt_(0), | |||||
| folder_path_(folder_path), | folder_path_(folder_path), | ||||
| rows_per_buffer_(rows_per_buffer), | |||||
| image_path_({}), | image_path_({}), | ||||
| label_path_({}), | label_path_({}), | ||||
| data_schema_(std::move(data_schema)) { | data_schema_(std::move(data_schema)) { | ||||
| io_block_queues_.Init(num_workers, queue_size); | io_block_queues_.Init(num_workers, queue_size); | ||||
| } | } | ||||
| Status MnistOp::TraversalSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys) { | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||||
| if ((*itr) >= num_rows_) continue; // index out of bound, skipping | |||||
| keys->push_back(*itr); | |||||
| row_cnt_++; | |||||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); | |||||
| keys->clear(); | |||||
| } | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| // functor that contains the main logic of MNIST op | |||||
| Status MnistOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| while (true) { // each iterator is 1 epoch | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| while (sampler_buffer->eoe() == false) { | |||||
| std::shared_ptr<Tensor> sample_ids; | |||||
| RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0)); | |||||
| if (sample_ids->type() != DataType(DataType::DE_INT64)) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, data type of Sampler Tensor isn't int64, got " + | |||||
| sample_ids->type().ToString()); | |||||
| } | |||||
| RETURN_IF_NOT_OK(TraversalSampleIds(sample_ids, &keys)); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| if (keys.empty() == false) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||||
| for (int32_t i = 0; i < num_workers_; ++i) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| // contains the logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ | |||||
| Status MnistOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> iOBlock; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&iOBlock)); | |||||
| while (iOBlock != nullptr) { | |||||
| if (iOBlock->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (iOBlock->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (iOBlock->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(iOBlock->GetKeys(&keys)); | |||||
| if (keys.empty() == true) return Status::OK(); // empty key is a quit signal for workers | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&iOBlock)); | |||||
| } | |||||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); | |||||
| } | |||||
| // Load 1 TensorRow (image,label) using 1 MnistLabelPair. | // Load 1 TensorRow (image,label) using 1 MnistLabelPair. | ||||
| Status MnistOp::LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pair, TensorRow *trow) { | |||||
| Status MnistOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||||
| MnistLabelPair mnist_pair = image_label_pairs_[row_id]; | |||||
| std::shared_ptr<Tensor> image, label; | std::shared_ptr<Tensor> image, label; | ||||
| // make a copy of cached tensor | // make a copy of cached tensor | ||||
| RETURN_IF_NOT_OK(Tensor::CreateFromTensor(mnist_pair.first, &image)); | RETURN_IF_NOT_OK(Tensor::CreateFromTensor(mnist_pair.first, &image)); | ||||
| @@ -196,18 +97,6 @@ Status MnistOp::LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pa | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||||
| Status MnistOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| TensorRow trow; | |||||
| for (const int64_t &key : keys) { | |||||
| RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_label_pairs_[key], &trow)); | |||||
| deq->push_back(std::move(trow)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| void MnistOp::Print(std::ostream &out, bool show_all) const { | void MnistOp::Print(std::ostream &out, bool show_all) const { | ||||
| if (!show_all) { | if (!show_all) { | ||||
| // Call the super class for displaying any common 1-liner info | // Call the super class for displaying any common 1-liner info | ||||
| @@ -222,20 +111,6 @@ void MnistOp::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| // Reset Sampler and wakeup Master thread (functor) | |||||
| Status MnistOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| row_cnt_ = 0; | |||||
| return Status::OK(); | |||||
| } | |||||
| // hand shake with Sampler, allow Sampler to call RandomAccessOp's functions to get NumRows | |||||
| Status MnistOp::InitSampler() { | |||||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // Derived from RandomAccessOp | // Derived from RandomAccessOp | ||||
| Status MnistOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | Status MnistOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | ||||
| if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { | if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { | ||||
| @@ -27,6 +27,7 @@ | |||||
| #include "minddata/dataset/engine/data_buffer.h" | #include "minddata/dataset/engine/data_buffer.h" | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | #include "minddata/dataset/engine/datasetops/parallel_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #include "minddata/dataset/util/path.h" | #include "minddata/dataset/util/path.h" | ||||
| #include "minddata/dataset/util/queue.h" | #include "minddata/dataset/util/queue.h" | ||||
| @@ -41,7 +42,7 @@ class Queue; | |||||
| using MnistLabelPair = std::pair<std::shared_ptr<Tensor>, uint32_t>; | using MnistLabelPair = std::pair<std::shared_ptr<Tensor>, uint32_t>; | ||||
| class MnistOp : public ParallelOp, public RandomAccessOp { | |||||
| class MnistOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| class Builder { | class Builder { | ||||
| public: | public: | ||||
| @@ -131,17 +132,6 @@ class MnistOp : public ParallelOp, public RandomAccessOp { | |||||
| // Destructor. | // Destructor. | ||||
| ~MnistOp() = default; | ~MnistOp() = default; | ||||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||||
| // @param int32_t worker_id - id of each worker | |||||
| // @return Status The status code returned | |||||
| Status WorkerEntry(int32_t worker_id) override; | |||||
| // Main Loop of MnistOp | |||||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // Method derived from RandomAccess Op, enable Sampler to get all ids for each class | // Method derived from RandomAccess Op, enable Sampler to get all ids for each class | ||||
| // @param (std::map<uint64_t, std::vector<uint64_t >> * map - key label, val all ids for this class | // @param (std::map<uint64_t, std::vector<uint64_t >> * map - key label, val all ids for this class | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| @@ -163,27 +153,12 @@ class MnistOp : public ParallelOp, public RandomAccessOp { | |||||
| std::string Name() const override { return "MnistOp"; } | std::string Name() const override { return "MnistOp"; } | ||||
| private: | private: | ||||
| // Initialize Sampler, calls sampler->Init() within | |||||
| // @return Status The status code returned | |||||
| Status InitSampler(); | |||||
| // Load a tensor row according to a pair | // Load a tensor row according to a pair | ||||
| // @param row_id_type row_id - id for this tensor row | // @param row_id_type row_id - id for this tensor row | ||||
| // @param ImageLabelPair pair - <imagefile,label> | // @param ImageLabelPair pair - <imagefile,label> | ||||
| // @param TensorRow row - image & label read into this tensor row | // @param TensorRow row - image & label read into this tensor row | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pair, TensorRow *row); | |||||
| // @param const std::vector<int64_t> &keys - keys in ioblock | |||||
| // @param std::unique_ptr<DataBuffer> db | |||||
| // @return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| // Iterate through all members in sampleIds and fill them into IOBlock. | |||||
| // @param std::shared_ptr<Tensor> sample_ids - | |||||
| // @param std::vector<int64_t> *keys - keys in ioblock | |||||
| // @return Status The status code returned | |||||
| Status TraversalSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys); | |||||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; | |||||
| // Check image file stream. | // Check image file stream. | ||||
| // @param const std::string *file_name - image file name | // @param const std::string *file_name - image file name | ||||
| @@ -226,20 +201,13 @@ class MnistOp : public ParallelOp, public RandomAccessOp { | |||||
| // Called first when function is called | // Called first when function is called | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LaunchThreadsAndInitOp(); | |||||
| // reset Op | |||||
| // @return Status The status code returned | |||||
| Status Reset() override; | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| // Private function for computing the assignment of the column name map. | // Private function for computing the assignment of the column name map. | ||||
| // @return - Status | // @return - Status | ||||
| Status ComputeColMap() override; | Status ComputeColMap() override; | ||||
| int64_t buf_cnt_; | |||||
| int64_t row_cnt_; | |||||
| std::string folder_path_; // directory of image folder | std::string folder_path_; // directory of image folder | ||||
| int32_t rows_per_buffer_; | |||||
| const std::string usage_; // can only be either "train" or "test" | const std::string usage_; // can only be either "train" or "test" | ||||
| std::unique_ptr<DataSchema> data_schema_; | std::unique_ptr<DataSchema> data_schema_; | ||||
| std::vector<MnistLabelPair> image_label_pairs_; | std::vector<MnistLabelPair> image_label_pairs_; | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -26,7 +26,7 @@ Status RandomAccessOp::GetNumRowsInDataset(int64_t *num) const { | |||||
| // after it has interacted with it's storage layers. | // after it has interacted with it's storage layers. | ||||
| // Here, it is just a getter method to return the value. However, it is invalid if there is | // Here, it is just a getter method to return the value. However, it is invalid if there is | ||||
| // not a value set for this count, so generate a failure if that is the case. | // not a value set for this count, so generate a failure if that is the case. | ||||
| if (num == nullptr || num_rows_ == 0) { | |||||
| if (num == nullptr || num_rows_ == -1) { | |||||
| RETURN_STATUS_UNEXPECTED("RandomAccessOp has not computed its num rows yet."); | RETURN_STATUS_UNEXPECTED("RandomAccessOp has not computed its num rows yet."); | ||||
| } | } | ||||
| (*num) = num_rows_; | (*num) = num_rows_; | ||||
| @@ -70,9 +70,6 @@ Status SamplerRT::HandshakeRandomAccessOp(const RandomAccessOp *op) { | |||||
| } | } | ||||
| Status SamplerRT::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements) { | Status SamplerRT::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements) { | ||||
| if (num_elements == 0) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid data, num of elements cannot be 0."); | |||||
| } | |||||
| if (col_desc_ == nullptr) { | if (col_desc_ == nullptr) { | ||||
| // a ColDescriptor for Tensor that holds SampleIds | // a ColDescriptor for Tensor that holds SampleIds | ||||
| col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); | col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -70,7 +70,7 @@ Status SequentialSamplerRT::InitSampler() { | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(start_index_ >= 0, | CHECK_FAIL_RETURN_UNEXPECTED(start_index_ >= 0, | ||||
| "Invalid parameter, start_index must be greater than or equal to 0, but got " + | "Invalid parameter, start_index must be greater than or equal to 0, but got " + | ||||
| std::to_string(start_index_) + ".\n"); | std::to_string(start_index_) + ".\n"); | ||||
| CHECK_FAIL_RETURN_UNEXPECTED(start_index_ < num_rows_, | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(start_index_ < num_rows_ || (num_rows_ == 0 && start_index_ == 0), | |||||
| "Invalid parameter, start_index must be less than num_rows, but got start_index: " + | "Invalid parameter, start_index must be less than num_rows, but got start_index: " + | ||||
| std::to_string(start_index_) + ", num_rows: " + std::to_string(num_rows_) + ".\n"); | std::to_string(start_index_) + ", num_rows: " + std::to_string(num_rows_) + ".\n"); | ||||
| CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ >= 0, | CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ >= 0, | ||||
| @@ -83,7 +83,7 @@ Status SequentialSamplerRT::InitSampler() { | |||||
| num_samples_ = available_row_count; | num_samples_ = available_row_count; | ||||
| } | } | ||||
| CHECK_FAIL_RETURN_UNEXPECTED( | CHECK_FAIL_RETURN_UNEXPECTED( | ||||
| num_samples_ > 0 && samples_per_buffer_ > 0, | |||||
| (num_samples_ > 0 && samples_per_buffer_ > 0) || num_samples_ == 0, | |||||
| "Invalid parameter, samples_per_buffer must be greater than 0, but got " + std::to_string(samples_per_buffer_)); | "Invalid parameter, samples_per_buffer must be greater than 0, but got " + std::to_string(samples_per_buffer_)); | ||||
| samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_; | samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_; | ||||
| @@ -99,83 +99,16 @@ VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std: | |||||
| const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer, | const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer, | ||||
| int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, | int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, | ||||
| std::shared_ptr<SamplerRT> sampler) | std::shared_ptr<SamplerRT> sampler) | ||||
| : ParallelOp(num_workers, queue_size, std::move(sampler)), | |||||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), | |||||
| decode_(decode), | decode_(decode), | ||||
| row_cnt_(0), | |||||
| buf_cnt_(0), | |||||
| task_type_(task_type), | task_type_(task_type), | ||||
| usage_(task_mode), | usage_(task_mode), | ||||
| folder_path_(folder_path), | folder_path_(folder_path), | ||||
| class_index_(class_index), | class_index_(class_index), | ||||
| rows_per_buffer_(rows_per_buffer), | |||||
| data_schema_(std::move(data_schema)) { | data_schema_(std::move(data_schema)) { | ||||
| io_block_queues_.Init(num_workers_, queue_size); | io_block_queues_.Init(num_workers_, queue_size); | ||||
| } | } | ||||
| Status VOCOp::TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys) { | |||||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||||
| if ((*itr) > num_rows_) continue; | |||||
| keys->push_back(*itr); | |||||
| row_cnt_++; | |||||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); | |||||
| keys->clear(); | |||||
| } | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| Status VOCOp::operator()() { | |||||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| while (true) { | |||||
| std::vector<int64_t> keys; | |||||
| keys.reserve(rows_per_buffer_); | |||||
| while (sampler_buffer->eoe() == false) { | |||||
| std::shared_ptr<Tensor> sample_ids; | |||||
| RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0)); | |||||
| if (sample_ids->type() != DataType(DataType::DE_INT64)) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, data type of Sampler Tensor isn't int64, got " + | |||||
| sample_ids->type().ToString()); | |||||
| } | |||||
| RETURN_IF_NOT_OK(TraverseSampleIds(sample_ids, &keys)); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| if (keys.empty() == false) { | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||||
| } | |||||
| if (IsLastIteration()) { | |||||
| std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||||
| std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); | |||||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); | |||||
| for (int32_t i = 0; i < num_workers_; i++) { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||||
| } | |||||
| return Status::OK(); | |||||
| } else { | |||||
| RETURN_IF_NOT_OK( | |||||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||||
| } | |||||
| if (epoch_sync_flag_) { | |||||
| // If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for | |||||
| // the current epoch. | |||||
| RETURN_IF_NOT_OK(WaitForWorkers()); | |||||
| } | |||||
| // If not the last repeat, self-reset and go to loop again. | |||||
| if (!IsLastIteration()) { | |||||
| RETURN_IF_NOT_OK(Reset()); | |||||
| RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); | |||||
| } | |||||
| UpdateRepeatAndEpochCounter(); | |||||
| } | |||||
| } | |||||
| void VOCOp::Print(std::ostream &out, bool show_all) const { | void VOCOp::Print(std::ostream &out, bool show_all) const { | ||||
| if (!show_all) { | if (!show_all) { | ||||
| // Call the super class for displaying any common 1-liner info | // Call the super class for displaying any common 1-liner info | ||||
| @@ -191,14 +124,8 @@ void VOCOp::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| Status VOCOp::Reset() { | |||||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||||
| RETURN_IF_NOT_OK(sampler_->ResetSampler()); | |||||
| row_cnt_ = 0; | |||||
| return Status::OK(); | |||||
| } | |||||
| Status VOCOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *trow) { | |||||
| Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||||
| std::string image_id = image_ids_[row_id]; | |||||
| if (task_type_ == TaskType::Segmentation) { | if (task_type_ == TaskType::Segmentation) { | ||||
| std::shared_ptr<Tensor> image, target; | std::shared_ptr<Tensor> image, target; | ||||
| const std::string kImageFile = | const std::string kImageFile = | ||||
| @@ -226,48 +153,6 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Ten | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status VOCOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||||
| TensorRow trow; | |||||
| for (const uint64_t &key : keys) { | |||||
| RETURN_IF_NOT_OK(this->LoadTensorRow(key, image_ids_[key], &trow)); | |||||
| deq->push_back(std::move(trow)); | |||||
| } | |||||
| (*db)->set_tensor_table(std::move(deq)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status VOCOp::WorkerEntry(int32_t worker_id) { | |||||
| TaskManager::FindMe()->Post(); | |||||
| int64_t buffer_id = worker_id; | |||||
| std::unique_ptr<IOBlock> io_block; | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| while (io_block != nullptr) { | |||||
| if (io_block->wait() == true) { | |||||
| // Sync io_block is a signal that master thread wants us to pause and sync with other workers. | |||||
| // The last guy who comes to this sync point should reset the counter and wake up the master thread. | |||||
| if (++num_workers_paused_ == num_workers_) { | |||||
| wait_for_workers_post_.Set(); | |||||
| } | |||||
| } else if (io_block->eoe() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||||
| buffer_id = worker_id; | |||||
| } else if (io_block->eof() == true) { | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||||
| } else { | |||||
| std::vector<int64_t> keys; | |||||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||||
| if (keys.empty() == true) return Status::OK(); | |||||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||||
| buffer_id += num_workers_; | |||||
| } | |||||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||||
| } | |||||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); | |||||
| } | |||||
| Status VOCOp::ParseImageIds() { | Status VOCOp::ParseImageIds() { | ||||
| std::string image_sets_file; | std::string image_sets_file; | ||||
| if (task_type_ == TaskType::Segmentation) { | if (task_type_ == TaskType::Segmentation) { | ||||
| @@ -378,11 +263,6 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status VOCOp::InitSampler() { | |||||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status VOCOp::LaunchThreadsAndInitOp() { | Status VOCOp::LaunchThreadsAndInitOp() { | ||||
| if (tree_ == nullptr) { | if (tree_ == nullptr) { | ||||
| RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); | ||||
| @@ -27,6 +27,7 @@ | |||||
| #include "minddata/dataset/engine/data_buffer.h" | #include "minddata/dataset/engine/data_buffer.h" | ||||
| #include "minddata/dataset/engine/data_schema.h" | #include "minddata/dataset/engine/data_schema.h" | ||||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | #include "minddata/dataset/engine/datasetops/parallel_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #include "minddata/dataset/kernels/image/image_utils.h" | #include "minddata/dataset/kernels/image/image_utils.h" | ||||
| #include "minddata/dataset/util/path.h" | #include "minddata/dataset/util/path.h" | ||||
| @@ -45,7 +46,7 @@ class Queue; | |||||
| using Annotation = std::vector<std::pair<std::string, std::vector<float>>>; | using Annotation = std::vector<std::pair<std::string, std::vector<float>>>; | ||||
| class VOCOp : public ParallelOp, public RandomAccessOp { | |||||
| class VOCOp : public MappableLeafOp { | |||||
| public: | public: | ||||
| enum class TaskType { Segmentation = 0, Detection = 1 }; | enum class TaskType { Segmentation = 0, Detection = 1 }; | ||||
| @@ -175,17 +176,6 @@ class VOCOp : public ParallelOp, public RandomAccessOp { | |||||
| // Destructor | // Destructor | ||||
| ~VOCOp() = default; | ~VOCOp() = default; | ||||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||||
| // @param int32_t workerId - id of each worker | |||||
| // @return Status The status code returned | |||||
| Status WorkerEntry(int32_t worker_id) override; | |||||
| // Main Loop of VOCOp | |||||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it the put buffer to mOutConnector | |||||
| // @return Status The status code returned | |||||
| Status operator()() override; | |||||
| // A print method typically used for debugging | // A print method typically used for debugging | ||||
| // @param out | // @param out | ||||
| // @param show_all | // @param show_all | ||||
| @@ -219,16 +209,12 @@ class VOCOp : public ParallelOp, public RandomAccessOp { | |||||
| Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override; | Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override; | ||||
| private: | private: | ||||
| // Initialize Sampler, calls sampler->Init() within | |||||
| // @return Status The status code returned | |||||
| Status InitSampler(); | |||||
| // Load a tensor row according to image id | // Load a tensor row according to image id | ||||
| // @param row_id_type row_id - id for this tensor row | // @param row_id_type row_id - id for this tensor row | ||||
| // @param std::string image_id - image id | // @param std::string image_id - image id | ||||
| // @param TensorRow row - image & target read into this tensor row | // @param TensorRow row - image & target read into this tensor row | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *row); | |||||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; | |||||
| // @param const std::string &path - path to the image file | // @param const std::string &path - path to the image file | ||||
| // @param const ColDescriptor &col - contains tensor implementation and datatype | // @param const ColDescriptor &col - contains tensor implementation and datatype | ||||
| @@ -241,11 +227,6 @@ class VOCOp : public ParallelOp, public RandomAccessOp { | |||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status ReadAnnotationToTensor(const std::string &path, TensorRow *row); | Status ReadAnnotationToTensor(const std::string &path, TensorRow *row); | ||||
| // @param const std::vector<uint64_t> &keys - keys in ioblock | |||||
| // @param std::unique_ptr<DataBuffer> db | |||||
| // @return Status The status code returned | |||||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||||
| // Read image list from ImageSets | // Read image list from ImageSets | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status ParseImageIds(); | Status ParseImageIds(); | ||||
| @@ -264,18 +245,9 @@ class VOCOp : public ParallelOp, public RandomAccessOp { | |||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| void ParseNodeValue(XMLElement *bbox_node, const char *name, float *value); | void ParseNodeValue(XMLElement *bbox_node, const char *name, float *value); | ||||
| // @param const std::shared_ptr<Tensor> &sample_ids - sample ids of tensor | |||||
| // @param std::vector<int64_t> *keys - image id | |||||
| // @return Status The status code returned | |||||
| Status TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys); | |||||
| // Called first when function is called | // Called first when function is called | ||||
| // @return Status The status code returned | // @return Status The status code returned | ||||
| Status LaunchThreadsAndInitOp(); | |||||
| // Reset dataset state | |||||
| // @return Status The status code returned | |||||
| Status Reset() override; | |||||
| Status LaunchThreadsAndInitOp() override; | |||||
| // Private function for computing the assignment of the column name map. | // Private function for computing the assignment of the column name map. | ||||
| // @return - Status | // @return - Status | ||||
| @@ -154,6 +154,7 @@ if(BUILD_MINDDATA STREQUAL "full") | |||||
| ${MINDDATA_DIR}/engine/datasetops/map_op/cpu_map_job.cc | ${MINDDATA_DIR}/engine/datasetops/map_op/cpu_map_job.cc | ||||
| ${MINDDATA_DIR}/engine/datasetops/source/album_op.cc | ${MINDDATA_DIR}/engine/datasetops/source/album_op.cc | ||||
| ${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc | ${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc | ||||
| ${MINDDATA_DIR}/engine/datasetops/source/mappable_leaf_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/io_block.cc | ${MINDDATA_DIR}/engine/datasetops/source/io_block.cc | ||||
| ${MINDDATA_DIR}/engine/opt/pre/getter_pass.cc | ${MINDDATA_DIR}/engine/opt/pre/getter_pass.cc | ||||