From: @hfarahat Reviewed-by: @robingrosman,@pandoublefeng Signed-off-by: @pandoublefengpull/14633/MERGE
| @@ -42,7 +42,6 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) { | |||
| .def("get_numa_enable", &ConfigManager::numa_enable) | |||
| .def("set_numa_enable", &ConfigManager::set_numa_enable) | |||
| .def("get_op_connector_size", &ConfigManager::op_connector_size) | |||
| .def("get_rows_per_buffer", &ConfigManager::rows_per_buffer) | |||
| .def("get_seed", &ConfigManager::seed) | |||
| .def("set_rank_id", &ConfigManager::set_rank_id) | |||
| .def("get_worker_connector_size", &ConfigManager::worker_connector_size) | |||
| @@ -54,7 +53,6 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) { | |||
| .def("get_profiler_file_status", &ConfigManager::get_profiler_file_status) | |||
| .def("set_num_parallel_workers", &ConfigManager::set_num_parallel_workers) | |||
| .def("set_op_connector_size", &ConfigManager::set_op_connector_size) | |||
| .def("set_rows_per_buffer", &ConfigManager::set_rows_per_buffer) | |||
| .def("set_seed", &ConfigManager::set_seed) | |||
| .def("set_worker_connector_size", &ConfigManager::set_worker_connector_size) | |||
| .def("load", [](ConfigManager &c, std::string s) { THROW_IF_ERROR(c.LoadFile(s)); }); | |||
| @@ -31,8 +31,7 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| ConfigManager::ConfigManager() | |||
| : rows_per_buffer_(kCfgRowsPerBuffer), | |||
| num_parallel_workers_(kCfgParallelWorkers), | |||
| : num_parallel_workers_(kCfgParallelWorkers), | |||
| worker_connector_size_(kCfgWorkerConnectorSize), | |||
| op_connector_size_(kCfgOpConnectorSize), | |||
| rank_id_(kCfgDefaultRankId), | |||
| @@ -70,7 +69,6 @@ void ConfigManager::Print(std::ostream &out) const { | |||
| // Don't show the test/internal ones. Only display the main ones here. | |||
| // fyi, boolalpha tells the output stream to write "true" and "false" for bools | |||
| out << "\nClient config settings :" | |||
| << "\nDataCache Rows per buffer : " << rows_per_buffer_ | |||
| << "\nParallelOp workers : " << num_parallel_workers_ | |||
| << "\nParallelOp worker connector size : " << worker_connector_size_ | |||
| << "\nSize of each Connector : " << op_connector_size_ << std::endl; | |||
| @@ -78,7 +76,6 @@ void ConfigManager::Print(std::ostream &out) const { | |||
| // Private helper function that takes a nlohmann json format and populates the settings | |||
| Status ConfigManager::FromJson(const nlohmann::json &j) { | |||
| set_rows_per_buffer(j.value("rowsPerBuffer", rows_per_buffer_)); | |||
| set_num_parallel_workers(j.value("numParallelWorkers", num_parallel_workers_)); | |||
| set_worker_connector_size(j.value("workerConnectorSize", worker_connector_size_)); | |||
| set_op_connector_size(j.value("opConnectorSize", op_connector_size_)); | |||
| @@ -115,9 +112,6 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) { | |||
| return rc; | |||
| } | |||
| // Setter function | |||
| void ConfigManager::set_rows_per_buffer(int32_t rows_per_buffer) { rows_per_buffer_ = rows_per_buffer; } | |||
| // Setter function | |||
| void ConfigManager::set_num_parallel_workers(int32_t num_parallel_workers) { | |||
| num_parallel_workers_ = num_parallel_workers; | |||
| @@ -74,10 +74,6 @@ class ConfigManager { | |||
| // @return Status error code | |||
| Status LoadFile(const std::string &settingsFile); | |||
| // getter function | |||
| // @return The rows per buffer setting | |||
| int32_t rows_per_buffer() const { return rows_per_buffer_; } | |||
| // getter function | |||
| // @return The number of workers setting | |||
| int32_t num_parallel_workers() const { return num_parallel_workers_; } | |||
| @@ -112,10 +108,6 @@ class ConfigManager { | |||
| /// \return auto_num_workers_ | |||
| bool auto_num_workers() const { return auto_num_workers_; } | |||
| // setter function | |||
| // @param rows_per_buffer - The setting to apply to the config | |||
| void set_rows_per_buffer(int32_t rows_per_buffer); | |||
| // setter function | |||
| // @param num_parallel_workers - The setting to apply to the config | |||
| void set_num_parallel_workers(int32_t num_parallel_workers); | |||
| @@ -230,7 +222,6 @@ class ConfigManager { | |||
| void set_auto_worker_config_(uint8_t cfg) { auto_worker_config_ = cfg; } | |||
| private: | |||
| int32_t rows_per_buffer_; | |||
| int32_t num_parallel_workers_; | |||
| int32_t worker_connector_size_; | |||
| int32_t op_connector_size_; | |||
| @@ -35,7 +35,7 @@ TensorRow::TensorRow(row_id_type id, const std::initializer_list<value_type> &ls | |||
| TensorRow::TensorRow(const TensorRow &tr) | |||
| : id_(tr.id_), path_(tr.path_), row_(tr.row_), tensor_row_flag_(tr.tensor_row_flag_) {} | |||
| TensorRow::TensorRow(TensorRow::TensorRowFlags flag) : tensor_row_flag_(flag) {} | |||
| TensorRow::TensorRow(TensorRow::TensorRowFlags flag) : id_(kDefaultRowId), path_({}), tensor_row_flag_(flag) {} | |||
| TensorRow &TensorRow::operator=(const TensorRow &tr) { | |||
| if (this == &tr) { | |||
| @@ -540,8 +540,7 @@ Status CachePerfRun::Run() { | |||
| int64_t elapse_time = std::chrono::duration_cast<std::chrono::seconds>(end_tick - start_tick).count(); | |||
| std::cout << "Epoch one (build phase) elapsed time " << elapse_time << " seconds" << std::endl; | |||
| std::cout << "Epoch one (build phase) per pipeline per worker summary. Buffer size = " << cfg_.rows_per_buffer() | |||
| << std::endl; | |||
| std::cout << "Epoch one (build phase) per pipeline per worker summary." << std::endl; | |||
| PrintEpochSummary(); | |||
| // Get some stat but we need to connect. The server will thinks it is the (n+1) pipeline | |||
| @@ -228,16 +228,13 @@ Status CachePipelineRun::RunFirstEpoch() { | |||
| } | |||
| std::vector<row_id_type> keys; | |||
| auto rows_per_buffer = cfg_.rows_per_buffer(); | |||
| keys.reserve(rows_per_buffer); | |||
| keys.reserve(1); | |||
| int32_t worker_id = 0; | |||
| for (auto i = start_row_; i <= end_row_; ++i) { | |||
| keys.push_back(i); | |||
| if (keys.size() == rows_per_buffer) { | |||
| auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)); | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk))); | |||
| keys.clear(); | |||
| } | |||
| auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)); | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk))); | |||
| keys.clear(); | |||
| } | |||
| if (!keys.empty()) { | |||
| auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)); | |||
| @@ -355,9 +352,8 @@ Status CachePipelineRun::WriterWorkerEntry(int32_t worker_id) { | |||
| Status CachePipelineRun::RunReadEpoch() { | |||
| std::vector<row_id_type> keys; | |||
| auto rows_per_buffer = cc_->GetPrefetchSize(); // We will use prefetch size to read. | |||
| auto num_workers = cfg_.num_parallel_workers(); | |||
| keys.reserve(rows_per_buffer); | |||
| keys.reserve(1); | |||
| // Spawn workers | |||
| auto f = std::bind(&CachePipelineRun::ReaderWorkerEntry, this, std::placeholders::_1); | |||
| std::vector<Task *> worker_threads; | |||
| @@ -381,11 +377,9 @@ Status CachePipelineRun::RunReadEpoch() { | |||
| int32_t worker_id = 0; | |||
| for (auto id : all_keys) { | |||
| keys.push_back(id); | |||
| if (keys.size() == rows_per_buffer) { | |||
| auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)); | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk))); | |||
| keys.clear(); | |||
| } | |||
| auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)); | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk))); | |||
| keys.clear(); | |||
| } | |||
| if (!keys.empty()) { | |||
| auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)); | |||
| @@ -31,7 +31,6 @@ BarrierOp::Builder::Builder() { | |||
| // using the various builder set methods. | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -39,17 +38,13 @@ Status BarrierOp::Builder::SanityCheck() const { return Status::OK(); } | |||
| Status BarrierOp::Builder::Build(std::shared_ptr<BarrierOp> *ptr) { | |||
| RETURN_IF_NOT_OK(SanityCheck()); | |||
| *ptr = std::make_shared<BarrierOp>(builder_rows_per_buffer_, builder_op_connector_size_, builder_condition_name_, | |||
| builder_condition_func_); | |||
| *ptr = std::make_shared<BarrierOp>(builder_op_connector_size_, builder_condition_name_, builder_condition_func_); | |||
| return Status::OK(); | |||
| } | |||
| // Construct BarrierOp here, local variables initialized in operator due to tree construction restrictions | |||
| BarrierOp::BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name, | |||
| py::function condition_func) | |||
| BarrierOp::BarrierOp(int32_t op_connector_size, const std::string &condition_name, py::function condition_func) | |||
| : PipelineOp(op_connector_size), | |||
| rows_per_buffer_(rows_per_buffer), | |||
| buffer_id_(0), | |||
| clean_up_(false), | |||
| eof_(false), | |||
| condition_name_(condition_name), | |||
| @@ -98,16 +98,13 @@ class BarrierOp : public PipelineOp { | |||
| }; | |||
| // Constructor for BarrierOp | |||
| // @param rows_per_buffer - number of rows in output buffer | |||
| // @param op_connector_size - connector size | |||
| // @param condition_name - the condition name associated with this operator | |||
| // @param condition_func - the blocking condition check per row | |||
| // @note - currently rows_per_buffer should = 1 for barrier. | |||
| // The reason for this is having other values would complicate how the pipeline behaves with other operators | |||
| // One example of such case is having batch after barrier. Batch would be waiting for data and having | |||
| // rows per buffer in this case can result in hanging | |||
| BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name, | |||
| py::function condition_func); | |||
| BarrierOp(int32_t op_connector_size, const std::string &condition_name, py::function condition_func); | |||
| // Destructor | |||
| ~BarrierOp(); | |||
| @@ -156,10 +153,6 @@ class BarrierOp : public PipelineOp { | |||
| bool clean_up_; | |||
| // end of file state, we stop reading data and shut down | |||
| bool eof_; | |||
| // rows per buffer | |||
| int32_t rows_per_buffer_; | |||
| // buffer_id | |||
| int32_t buffer_id_; | |||
| // iterator to pull new rows, we only have one child | |||
| std::unique_ptr<ChildIterator> child_iterator_; | |||
| // condition name, to support multiple barriers | |||
| @@ -248,7 +248,7 @@ Status BatchOp::WorkerEntry(int32_t workerId) { | |||
| RETURN_IF_NOT_OK(out_connector_->SendEOF(workerId)); | |||
| } else if (table_pair.second.ctrl_ == batchCtrl::kNoCtrl) { | |||
| TensorRow new_row; | |||
| RETURN_IF_NOT_OK(MakeBatchedBuffer(std::move(table_pair), &new_row)); | |||
| RETURN_IF_NOT_OK(MakeBatchedRow(std::move(table_pair), &new_row)); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(std::move(new_row), workerId)); | |||
| } | |||
| RETURN_IF_NOT_OK(worker_queues_[workerId]->PopFront(&table_pair)); | |||
| @@ -256,7 +256,7 @@ Status BatchOp::WorkerEntry(int32_t workerId) { | |||
| return Status::OK(); | |||
| } | |||
| Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row) { | |||
| Status BatchOp::MakeBatchedRow(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row) { | |||
| RETURN_UNEXPECTED_IF_NULL(table_pair.first); | |||
| #ifdef ENABLE_PYTHON | |||
| if (!in_col_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc | |||
| @@ -225,7 +225,7 @@ class BatchOp : public ParallelOp { | |||
| // Generate buffer with batched tensors | |||
| // @return Status The status code returned | |||
| Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row); | |||
| Status MakeBatchedRow(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row); | |||
| #ifdef ENABLE_PYTHON | |||
| // Function that calls pyfunc to perform map on batch | |||
| @@ -45,14 +45,13 @@ Status CacheBase::Reset() { | |||
| MS_LOG(DEBUG) << Name() << " performing a self-reset."; | |||
| return Status::OK(); | |||
| } | |||
| CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, | |||
| std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler) | |||
| CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : ParallelOp(num_workers, op_connector_size, std::move(sampler)), | |||
| row_cnt_(0), | |||
| num_cache_miss_(0), | |||
| cache_client_(std::move(cache_client)), | |||
| rows_per_buffer_(rows_per_buf), | |||
| prefetch_size_(rows_per_buffer_), | |||
| prefetch_size_(1), | |||
| num_prefetchers_(num_workers_) { | |||
| // Adjust the prefetch size based on the number of workers. | |||
| auto prefetch_sz_per_thread = cache_client_->GetPrefetchSize() / num_prefetchers_; | |||
| @@ -92,7 +91,7 @@ Status CacheBase::FetchSamplesToWorkers() { | |||
| row_cnt_ = 0; | |||
| ++wait_cnt; | |||
| std::vector<row_id_type> keys; | |||
| keys.reserve(rows_per_buffer_); | |||
| keys.reserve(1); | |||
| std::vector<row_id_type> prefetch_keys; | |||
| prefetch_keys.reserve(prefetch_size_); | |||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||
| @@ -107,15 +106,11 @@ Status CacheBase::FetchSamplesToWorkers() { | |||
| // Batch enough rows for performance reason. | |||
| if (row_cnt_ % prefetch_size_ == 0) { | |||
| RETURN_IF_NOT_OK(send_to_que(prefetch_queues_, prefetch_cnt++ % num_prefetchers_, prefetch_keys)); | |||
| // Now we tell the WorkerEntry to wait for them to come back. If prefetch_size_ is a multiple | |||
| // of rows_per_buffer_, the keys vector will always be empty. But it can be partially filled. | |||
| // The only requirement we set up is rows_per_buffer_ is less than or equal to prefetch_size_. | |||
| // Now we tell the WorkerEntry to wait for them to come back. | |||
| for (auto row_id : prefetch_keys) { | |||
| keys.push_back(row_id); | |||
| if (keys.size() == rows_per_buffer_) { | |||
| RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys)); | |||
| keys.clear(); | |||
| } | |||
| RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys)); | |||
| keys.clear(); | |||
| } | |||
| prefetch_keys.clear(); | |||
| } | |||
| @@ -127,10 +122,8 @@ Status CacheBase::FetchSamplesToWorkers() { | |||
| RETURN_IF_NOT_OK(send_to_que(prefetch_queues_, prefetch_cnt++ % num_prefetchers_, prefetch_keys)); | |||
| for (auto row_id : prefetch_keys) { | |||
| keys.push_back(row_id); | |||
| if (keys.size() == rows_per_buffer_) { | |||
| RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys)); | |||
| keys.clear(); | |||
| } | |||
| RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys)); | |||
| keys.clear(); | |||
| } | |||
| } | |||
| if (!keys.empty()) { | |||
| @@ -42,11 +42,10 @@ class CacheBase : public ParallelOp { | |||
| /// \brief Base class constructor | |||
| /// \param num_workers Number of parallel workers | |||
| /// \param op_connector_size Connector size | |||
| /// \param rows_per_buf Number of rows per buffer | |||
| /// \param cache_client CacheClient for communication to the CacheServer | |||
| /// \param sampler Sampler which is mandatory | |||
| CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, | |||
| std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler); | |||
| CacheBase(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client, | |||
| std::shared_ptr<SamplerRT> sampler); | |||
| /// \brief Destructor | |||
| ~CacheBase(); | |||
| @@ -87,7 +86,6 @@ class CacheBase : public ParallelOp { | |||
| int64_t row_cnt_; | |||
| std::atomic<int64_t> num_cache_miss_; | |||
| std::shared_ptr<CacheClient> cache_client_; | |||
| int32_t rows_per_buffer_; | |||
| std::unique_ptr<Connector<std::vector<row_id_type>>> keys_miss_; | |||
| /// \brief Common function to register resources for interrupt | |||
| @@ -31,7 +31,6 @@ namespace dataset { | |||
| CacheLookupOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| build_num_workers_ = cfg->num_parallel_workers(); | |||
| rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| build_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -52,8 +51,8 @@ Status CacheLookupOp::Builder::SanityCheck() const { | |||
| // The builder "build" method creates the final object and does some init on it | |||
| Status CacheLookupOp::Builder::Build(std::shared_ptr<CacheLookupOp> *ptr) { | |||
| RETURN_IF_NOT_OK(SanityCheck()); | |||
| *ptr = std::make_shared<CacheLookupOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_, | |||
| build_cache_client_, build_sampler_); | |||
| *ptr = | |||
| std::make_shared<CacheLookupOp>(build_num_workers_, build_op_connector_size_, build_cache_client_, build_sampler_); | |||
| return Status::OK(); | |||
| } | |||
| Status CacheLookupOp::operator()() { | |||
| @@ -74,7 +74,6 @@ class CacheLookupOp : public CacheBase, public SamplerRT { | |||
| private: | |||
| int32_t build_num_workers_; | |||
| int32_t rows_per_buffer_; | |||
| int32_t build_op_connector_size_; | |||
| std::shared_ptr<CacheClient> build_cache_client_; | |||
| std::shared_ptr<SamplerRT> build_sampler_; | |||
| @@ -86,9 +85,9 @@ class CacheLookupOp : public CacheBase, public SamplerRT { | |||
| /// \brief Constructor | |||
| /// \note It takes the same argument as the base class. | |||
| /// \see CacheBase | |||
| CacheLookupOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, | |||
| std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler) | |||
| : CacheBase(num_workers, op_connector_size, rows_per_buf, cache_client, sampler), SamplerRT(*(sampler.get())) {} | |||
| CacheLookupOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : CacheBase(num_workers, op_connector_size, cache_client, sampler), SamplerRT(*(sampler.get())) {} | |||
| ~CacheLookupOp() = default; | |||
| // As a parallel op, we override these two functions | |||
| Status operator()() override; | |||
| @@ -33,7 +33,6 @@ namespace dataset { | |||
| CacheOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| build_num_workers_ = cfg->num_parallel_workers(); | |||
| rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| build_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -54,17 +53,16 @@ Status CacheOp::Builder::SanityCheck() const { | |||
| // The builder "build" method creates the final object and does some init on it | |||
| Status CacheOp::Builder::Build(std::shared_ptr<CacheOp> *ptr) { | |||
| RETURN_IF_NOT_OK(SanityCheck()); | |||
| *ptr = std::make_shared<CacheOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_, build_cache_client_, | |||
| build_sampler_); | |||
| *ptr = std::make_shared<CacheOp>(build_num_workers_, build_op_connector_size_, build_cache_client_, build_sampler_); | |||
| RETURN_IF_NOT_OK((*ptr)->InitCache()); | |||
| return Status::OK(); | |||
| } | |||
| // Constructor of CacheOp | |||
| CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, | |||
| std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler) | |||
| : CacheBase(num_workers, op_connector_size, rows_per_buf, std::move(cache_client), std::move(sampler)), | |||
| CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : CacheBase(num_workers, op_connector_size, std::move(cache_client), std::move(sampler)), | |||
| num_guys_in_(0), | |||
| phase_(Phase::kBuildPhase) {} | |||
| @@ -70,14 +70,6 @@ class CacheOp : public CacheBase, public RandomAccessOp { | |||
| return *this; | |||
| } | |||
| /// \brief Setter method | |||
| /// \param rows_per_buffer | |||
| /// \return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| /// \brief Setter method | |||
| /// \param sampler | |||
| /// \return Builder setter method returns reference to the builder. | |||
| @@ -93,7 +85,6 @@ class CacheOp : public CacheBase, public RandomAccessOp { | |||
| private: | |||
| int32_t build_num_workers_; | |||
| int32_t rows_per_buffer_; | |||
| int32_t build_op_connector_size_; | |||
| std::shared_ptr<CacheClient> build_cache_client_; | |||
| std::shared_ptr<SamplerRT> build_sampler_; | |||
| @@ -107,8 +98,8 @@ class CacheOp : public CacheBase, public RandomAccessOp { | |||
| /// \note The builder class should be used to call it. | |||
| /// \param num_workers The number of worker threads. | |||
| /// \param op_connector_size The size of each queue in the connector. | |||
| CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, | |||
| std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler); | |||
| CacheOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client, | |||
| std::shared_ptr<SamplerRT> sampler); | |||
| // Destructor | |||
| ~CacheOp(); | |||
| @@ -41,7 +41,6 @@ constexpr int32_t ShuffleOp::kShuffleStateDrain; | |||
| ShuffleOp::Builder::Builder() : build_shuffle_size_(0), build_reshuffle_each_epoch_(true) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| build_op_connector_size_ = cfg->op_connector_size(); | |||
| build_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| build_shuffle_seed_ = GetSeed(); | |||
| } | |||
| @@ -56,20 +55,17 @@ Status ShuffleOp::Builder::SanityCheck() const { | |||
| Status ShuffleOp::Builder::Build(std::shared_ptr<ShuffleOp> *ptr) { | |||
| RETURN_IF_NOT_OK(SanityCheck()); | |||
| *ptr = std::make_shared<ShuffleOp>(build_shuffle_size_, build_shuffle_seed_, build_op_connector_size_, | |||
| build_reshuffle_each_epoch_, build_rows_per_buffer_); | |||
| build_reshuffle_each_epoch_); | |||
| return Status::OK(); | |||
| } | |||
| // Constructor of the ShuffleOp | |||
| ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch, | |||
| int32_t rows_per_buffer) | |||
| ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch) | |||
| : PipelineOp(op_connector_size), | |||
| shuffle_size_(shuffle_size), | |||
| shuffle_seed_(shuffle_seed), | |||
| reshuffle_each_epoch_(reset_every_epoch), | |||
| rng_(shuffle_seed), | |||
| buffer_counter_(0), | |||
| rows_per_buffer_(rows_per_buffer), | |||
| shuffle_buffer_(std::make_unique<TensorTable>()), | |||
| shuffle_last_row_idx_(0), | |||
| shuffle_buffer_state_(kShuffleStateInit) {} | |||
| @@ -87,7 +83,6 @@ Status ShuffleOp::SelfReset() { | |||
| } | |||
| shuffle_buffer_ = std::make_unique<TensorTable>(); | |||
| buffer_counter_ = 0; | |||
| shuffle_last_row_idx_ = 0; | |||
| shuffle_buffer_state_ = kShuffleStateInit; | |||
| return Status::OK(); | |||
| @@ -104,8 +99,8 @@ void ShuffleOp::Print(std::ostream &out, bool show_all) const { | |||
| // Call the super class for displaying any common detailed info | |||
| PipelineOp::Print(out, show_all); | |||
| // Then show any custom derived-internal stuff | |||
| out << "\nShuffle size: " << shuffle_size_ << "\nRows per buffer: " << rows_per_buffer_ | |||
| << "\nShuffle buffer state: " << shuffle_buffer_state_ << "\nShuffle seed: " << shuffle_seed_ << "\n\n"; | |||
| out << "\nShuffle size: " << shuffle_size_ << "\nShuffle buffer state: " << shuffle_buffer_state_ | |||
| << "\nShuffle seed: " << shuffle_seed_ << "\n\n"; | |||
| } | |||
| } | |||
| @@ -121,9 +121,7 @@ class ShuffleOp : public PipelineOp { | |||
| // @param shuffle_size - The size for the shuffle buffer | |||
| // @param shuffle_seed - The seed to use for random number generation | |||
| // @param op_connector_size - The output connector queue size | |||
| // @param rows_per_buffer - The requested number of rows per buffer | |||
| ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch, | |||
| int32_t rows_per_buffer); | |||
| ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch); | |||
| // Destructor | |||
| ~ShuffleOp() = default; | |||
| @@ -183,8 +181,6 @@ class ShuffleOp : public PipelineOp { | |||
| // (ie uniform_int_distribution) because we will need to create up to |dataset| instances | |||
| // of the distribution object in the common case of a perfect shuffle | |||
| std::mt19937_64 rng_; | |||
| int32_t buffer_counter_; // For creating new buffer id's | |||
| int32_t rows_per_buffer_; // Number of rows to pack into output buffer | |||
| // A single (potentially large) buffer of tensor rows for performing shuffling. | |||
| std::unique_ptr<TensorTable> shuffle_buffer_; | |||
| int32_t shuffle_last_row_idx_; // Internal tracking of the last slot of our shuffle buffer | |||
| @@ -32,7 +32,6 @@ namespace dataset { | |||
| AlbumOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -52,9 +51,8 @@ Status AlbumOp::Builder::Build(std::shared_ptr<AlbumOp> *ptr) { | |||
| MS_LOG(INFO) << "Schema file provided: " << builder_schema_file_ << "."; | |||
| builder_schema_->LoadSchemaFile(builder_schema_file_, builder_columns_to_load_); | |||
| } | |||
| *ptr = std::make_shared<AlbumOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, | |||
| builder_op_connector_size_, builder_decode_, builder_extensions_, | |||
| std::move(builder_schema_), std::move(builder_sampler_)); | |||
| *ptr = std::make_shared<AlbumOp>(builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_decode_, | |||
| builder_extensions_, std::move(builder_schema_), std::move(builder_sampler_)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -69,10 +67,10 @@ Status AlbumOp::Builder::SanityCheck() { | |||
| return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode, | |||
| AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode, | |||
| const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer), | |||
| : MappableLeafOp(num_wkrs, queue_size, std::move(sampler)), | |||
| folder_path_(file_dir), | |||
| decode_(do_decode), | |||
| extensions_(exts), | |||
| @@ -58,14 +58,6 @@ class AlbumOp : public MappableLeafOp { | |||
| /// \brief Destructor. | |||
| ~Builder() = default; | |||
| /// \brief Setter method | |||
| /// \param[in] rows_per_buffer | |||
| /// \return Builder setter method returns reference to the builder | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| /// \brief Setter method | |||
| /// \param[in] size | |||
| /// \return Builder setter method returns reference to the builder | |||
| @@ -154,16 +146,14 @@ class AlbumOp : public MappableLeafOp { | |||
| /// \brief Constructor | |||
| /// \param[in] num_wkrs - Num of workers reading images in parallel | |||
| /// \param[in] rows_per_buffer Number of images (rows) in each buffer | |||
| /// \param[in] file_dir - directory of Album | |||
| /// \param[in] queue_size - connector size | |||
| /// \param[in] do_decode - decode image files | |||
| /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir | |||
| /// \param[in] data_schema - schema of dataset | |||
| /// \param[in] sampler - sampler tells AlbumOp what to read | |||
| AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode, | |||
| const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler); | |||
| AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode, const std::set<std::string> &exts, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); | |||
| /// \brief Destructor. | |||
| ~AlbumOp() = default; | |||
| @@ -273,7 +263,6 @@ class AlbumOp : public MappableLeafOp { | |||
| /// \return Status The status code returned | |||
| Status ComputeColMap() override; | |||
| int32_t rows_per_buffer_; | |||
| std::string folder_path_; // directory of image folder | |||
| bool decode_; | |||
| std::set<std::string> extensions_; // extensions allowed | |||
| @@ -34,7 +34,6 @@ namespace dataset { | |||
| CelebAOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -54,9 +53,9 @@ Status CelebAOp::Builder::Build(std::shared_ptr<CelebAOp> *op) { | |||
| // label is like this:0 1 0 0 1...... | |||
| RETURN_IF_NOT_OK( | |||
| builder_schema_->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| *op = std::make_shared<CelebAOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, | |||
| builder_op_connector_size_, builder_decode_, builder_usage_, builder_extensions_, | |||
| std::move(builder_schema_), std::move(builder_sampler_)); | |||
| *op = std::make_shared<CelebAOp>(builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_decode_, | |||
| builder_usage_, builder_extensions_, std::move(builder_schema_), | |||
| std::move(builder_sampler_)); | |||
| if (*op == nullptr) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "CelebAOp init failed."); | |||
| } | |||
| @@ -76,10 +75,10 @@ Status CelebAOp::Builder::SanityCheck() { | |||
| return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, | |||
| bool decode, const std::string &usage, const std::set<std::string> &exts, | |||
| std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), | |||
| CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, | |||
| const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler)), | |||
| folder_path_(dir), | |||
| decode_(decode), | |||
| extensions_(exts), | |||
| @@ -53,14 +53,6 @@ class CelebAOp : public MappableLeafOp { | |||
| // Destructor. | |||
| ~Builder() = default; | |||
| // Setter method | |||
| // @param int32_t rows_per_buffer | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| // Setter method | |||
| // @param int32_t size | |||
| // @return Builder setter method returns reference to the builder. | |||
| @@ -139,13 +131,11 @@ class CelebAOp : public MappableLeafOp { | |||
| // Constructor | |||
| // @param int32_t - num_workers - Num of workers reading images in parallel | |||
| // @param int32_t - rows_per_buffer Number of images (rows) in each buffer | |||
| // @param std::string - dir directory of celeba dataset | |||
| // @param int32_t queueSize - connector queue size | |||
| // @param std::unique_ptr<Sampler> sampler - sampler tells CelebAOp what to read | |||
| CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, bool decode, | |||
| const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, | |||
| std::shared_ptr<SamplerRT> sampler); | |||
| CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, const std::string &usage, | |||
| const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler); | |||
| ~CelebAOp() override = default; | |||
| @@ -39,7 +39,6 @@ constexpr uint32_t kCifarImageSize = kCifarImageHeight * kCifarImageWidth * kCif | |||
| CifarOp::Builder::Builder() : sampler_(nullptr), usage_("") { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| num_workers_ = cfg->num_parallel_workers(); | |||
| rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| op_connect_size_ = cfg->op_connector_size(); | |||
| cifar_type_ = kCifar10; | |||
| } | |||
| @@ -65,8 +64,8 @@ Status CifarOp::Builder::Build(std::shared_ptr<CifarOp> *ptr) { | |||
| ColDescriptor("fine_label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &another_scalar))); | |||
| } | |||
| *ptr = std::make_shared<CifarOp>(cifar_type_, usage_, num_workers_, rows_per_buffer_, dir_, op_connect_size_, | |||
| std::move(schema_), std::move(sampler_)); | |||
| *ptr = std::make_shared<CifarOp>(cifar_type_, usage_, num_workers_, dir_, op_connect_size_, std::move(schema_), | |||
| std::move(sampler_)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -85,10 +84,9 @@ Status CifarOp::Builder::SanityCheck() { | |||
| return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf, | |||
| const std::string &file_dir, int32_t queue_size, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buf), | |||
| CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, const std::string &file_dir, | |||
| int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_works, queue_size, std::move(sampler)), | |||
| cifar_type_(type), | |||
| usage_(usage), | |||
| folder_path_(file_dir), | |||
| @@ -49,14 +49,6 @@ class CifarOp : public MappableLeafOp { | |||
| // Destructor. | |||
| ~Builder() = default; | |||
| // Setter method | |||
| // @param uint32_t rows_per_buffer | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| // Setter method | |||
| // @param uint32_t size | |||
| // @return Builder setter method returns reference to the builder. | |||
| @@ -122,7 +114,6 @@ class CifarOp : public MappableLeafOp { | |||
| std::string dir_; | |||
| std::string usage_; | |||
| int32_t num_workers_; | |||
| int32_t rows_per_buffer_; | |||
| int32_t op_connect_size_; | |||
| std::shared_ptr<SamplerRT> sampler_; | |||
| std::unique_ptr<DataSchema> schema_; | |||
| @@ -133,13 +124,11 @@ class CifarOp : public MappableLeafOp { | |||
| // @param CifarType type - Cifar10 or Cifar100 | |||
| // @param const std::string &usage - Usage of this dataset, can be 'train', 'test' or 'all' | |||
| // @param uint32_t numWorks - Num of workers reading images in parallel | |||
| // @param uint32_t - rowsPerBuffer Number of images (rows) in each buffer | |||
| // @param std::string - dir directory of cifar dataset | |||
| // @param uint32_t - queueSize - connector queue size | |||
| // @param std::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read | |||
| CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf, | |||
| const std::string &file_dir, int32_t queue_size, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler); | |||
| CifarOp(CifarType type, const std::string &usage, int32_t num_works, const std::string &file_dir, int32_t queue_size, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); | |||
| // Destructor. | |||
| ~CifarOp() = default; | |||
| @@ -36,7 +36,6 @@ ClueOp::Builder::Builder() | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| builder_num_workers_ = config_manager->num_parallel_workers(); | |||
| builder_op_connector_size_ = config_manager->op_connector_size(); | |||
| builder_rows_per_buffer_ = config_manager->rows_per_buffer(); | |||
| builder_worker_connector_size_ = config_manager->worker_connector_size(); | |||
| } | |||
| @@ -67,9 +66,8 @@ Status ClueOp::Builder::Build(std::shared_ptr<ClueOp> *op) { | |||
| } | |||
| std::shared_ptr<ClueOp> clue_op = std::make_shared<ClueOp>( | |||
| builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, ck_map, | |||
| builder_clue_files_list_, builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, | |||
| builder_device_id_); | |||
| builder_num_workers_, builder_num_samples_, builder_worker_connector_size_, ck_map, builder_clue_files_list_, | |||
| builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, builder_device_id_); | |||
| RETURN_IF_NOT_OK(clue_op->Init()); | |||
| *op = std::move(clue_op); | |||
| @@ -87,11 +85,11 @@ std::vector<std::string> ClueOp::Builder::split(const std::string &s, char delim | |||
| return res; | |||
| } | |||
| ClueOp::ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, | |||
| ColKeyMap cols_to_keyword, std::vector<std::string> clue_files_list, int32_t op_connector_size, | |||
| bool shuffle_files, int32_t num_devices, int32_t device_id) | |||
| : NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, num_samples, op_connector_size, | |||
| shuffle_files, num_devices, device_id), | |||
| ClueOp::ClueOp(int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, ColKeyMap cols_to_keyword, | |||
| std::vector<std::string> clue_files_list, int32_t op_connector_size, bool shuffle_files, | |||
| int32_t num_devices, int32_t device_id) | |||
| : NonMappableLeafOp(num_workers, worker_connector_size, num_samples, op_connector_size, shuffle_files, num_devices, | |||
| device_id), | |||
| clue_files_list_(std::move(clue_files_list)), | |||
| cols_to_keyword_(cols_to_keyword) {} | |||
| @@ -200,8 +198,7 @@ void ClueOp::Print(std::ostream &out, bool show_all) const { | |||
| // Call the super class for displaying any common detailed info | |||
| ParallelOp::Print(out, show_all); | |||
| // Then show any custom derived-internal stuff | |||
| out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << total_rows_ | |||
| << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ | |||
| out << "\nSample count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ | |||
| << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nClue files list:\n"; | |||
| for (int i = 0; i < clue_files_list_.size(); ++i) { | |||
| out << " " << clue_files_list_[i]; | |||
| @@ -138,9 +138,9 @@ class ClueOp : public NonMappableLeafOp { | |||
| }; | |||
| // Constructor of ClueOp | |||
| ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, | |||
| ColKeyMap cols_to_keyword, std::vector<std::string> clue_files_list, int32_t op_connector_size, | |||
| bool shuffle_files, int32_t num_devices, int32_t device_id); | |||
| ClueOp(int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, ColKeyMap cols_to_keyword, | |||
| std::vector<std::string> clue_files_list, int32_t op_connector_size, bool shuffle_files, int32_t num_devices, | |||
| int32_t device_id); | |||
| // Default destructor | |||
| ~ClueOp() = default; | |||
| @@ -50,7 +50,6 @@ const unsigned int kPadValueZero = 0; | |||
| CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| builder_task_type_ = TaskType::Detection; | |||
| } | |||
| @@ -100,8 +99,8 @@ Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic."); | |||
| } | |||
| *ptr = std::make_shared<CocoOp>(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_, | |||
| builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_, | |||
| std::move(builder_schema_), std::move(builder_sampler_)); | |||
| builder_op_connector_size_, builder_decode_, std::move(builder_schema_), | |||
| std::move(builder_sampler_)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -122,9 +121,9 @@ Status CocoOp::Builder::SanityCheck() { | |||
| } | |||
| CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, | |||
| int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), | |||
| int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler)), | |||
| decode_(decode), | |||
| task_type_(task_type), | |||
| image_folder_path_(image_folder_path), | |||
| @@ -109,14 +109,6 @@ class CocoOp : public MappableLeafOp { | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param int32_t rows_per_buffer | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param std::shared_ptr<Sampler> sampler | |||
| // @return Builder setter method returns reference to the builder. | |||
| @@ -159,15 +151,14 @@ class CocoOp : public MappableLeafOp { | |||
| // @param std::string image_folder_path - image folder path of Coco | |||
| // @param std::string annotation_path - annotation json path of Coco | |||
| // @param int32_t num_workers - number of workers reading images in parallel | |||
| // @param int32_t rows_per_buffer - number of images (rows) in each buffer | |||
| // @param int32_t queue_size - connector queue size | |||
| // @param int64_t num_samples - number of samples to read | |||
| // @param bool decode - whether to decode images | |||
| // @param std::unique_ptr<DataSchema> data_schema - the schema of the Coco dataset | |||
| // @param std::shared_ptr<Sampler> sampler - sampler tells CocoOp what to read | |||
| CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, | |||
| int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); | |||
| int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler); | |||
| // Destructor | |||
| ~CocoOp() = default; | |||
| @@ -32,7 +32,6 @@ CsvOp::Builder::Builder() | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| builder_num_workers_ = config_manager->num_parallel_workers(); | |||
| builder_op_connector_size_ = config_manager->op_connector_size(); | |||
| builder_rows_per_buffer_ = config_manager->rows_per_buffer(); | |||
| builder_worker_connector_size_ = config_manager->worker_connector_size(); | |||
| } | |||
| @@ -59,8 +58,8 @@ Status CsvOp::Builder::Build(std::shared_ptr<CsvOp> *op) { | |||
| std::shared_ptr<CsvOp> csv_op = std::make_shared<CsvOp>( | |||
| builder_csv_files_list_, builder_field_delim_, builder_column_default_list_, builder_column_name_list_, | |||
| builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, | |||
| builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, builder_device_id_); | |||
| builder_num_workers_, builder_num_samples_, builder_worker_connector_size_, builder_op_connector_size_, | |||
| builder_shuffle_files_, builder_num_devices_, builder_device_id_); | |||
| RETURN_IF_NOT_OK(csv_op->Init()); | |||
| *op = std::move(csv_op); | |||
| @@ -69,11 +68,11 @@ Status CsvOp::Builder::Build(std::shared_ptr<CsvOp> *op) { | |||
| CsvOp::CsvOp(const std::vector<std::string> &csv_files_list, char field_delim, | |||
| const std::vector<std::shared_ptr<BaseRecord>> &column_default, | |||
| const std::vector<std::string> &column_name, int32_t num_workers, int64_t rows_per_buffer, | |||
| int64_t num_samples, int32_t worker_connector_size, int32_t op_connector_size, bool shuffle_files, | |||
| int32_t num_devices, int32_t device_id) | |||
| : NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, num_samples, op_connector_size, | |||
| shuffle_files, num_devices, device_id), | |||
| const std::vector<std::string> &column_name, int32_t num_workers, int64_t num_samples, | |||
| int32_t worker_connector_size, int32_t op_connector_size, bool shuffle_files, int32_t num_devices, | |||
| int32_t device_id) | |||
| : NonMappableLeafOp(num_workers, worker_connector_size, num_samples, op_connector_size, shuffle_files, num_devices, | |||
| device_id), | |||
| csv_files_list_(std::move(csv_files_list)), | |||
| field_delim_(field_delim), | |||
| column_default_list_(column_default), | |||
| @@ -91,11 +90,10 @@ Status CsvOp::Init() { | |||
| return Status::OK(); | |||
| } | |||
| CsvOp::CsvParser::CsvParser(int32_t worker_id, JaggedConnector *connector, int64_t rows_per_buffer, char field_delim, | |||
| CsvOp::CsvParser::CsvParser(int32_t worker_id, JaggedConnector *connector, char field_delim, | |||
| std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, std::string file_path) | |||
| : worker_id_(worker_id), | |||
| buffer_connector_(connector), | |||
| csv_rows_per_buffer_(rows_per_buffer), | |||
| csv_field_delim_(field_delim), | |||
| column_default_(column_default), | |||
| file_path_(file_path), | |||
| @@ -469,8 +467,7 @@ Status CsvOp::CsvParser::InitCsvParser() { | |||
| } | |||
| Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) { | |||
| CsvParser csv_parser(worker_id, jagged_buffer_connector_.get(), rows_per_buffer_, field_delim_, column_default_list_, | |||
| file); | |||
| CsvParser csv_parser(worker_id, jagged_buffer_connector_.get(), field_delim_, column_default_list_, file); | |||
| csv_parser.SetStartOffset(start_offset); | |||
| csv_parser.SetEndOffset(end_offset); | |||
| std::ifstream ifs; | |||
| @@ -516,8 +513,7 @@ void CsvOp::Print(std::ostream &out, bool show_all) const { | |||
| // Call the super class for displaying any common detailed info | |||
| ParallelOp::Print(out, show_all); | |||
| // Then show any custom derived-internal stuff | |||
| out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << total_rows_ | |||
| << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ | |||
| out << "\nSample count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ | |||
| << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nCsv files list:\n"; | |||
| for (int i = 0; i < csv_files_list_.size(); ++i) { | |||
| out << " " << csv_files_list_[i]; | |||
| @@ -592,7 +588,7 @@ Status CsvOp::CalculateNumRowsPerShard() { | |||
| } | |||
| int64_t CsvOp::CountTotalRows(const std::string &file) { | |||
| CsvParser csv_parser(0, jagged_buffer_connector_.get(), rows_per_buffer_, field_delim_, column_default_list_, file); | |||
| CsvParser csv_parser(0, jagged_buffer_connector_.get(), field_delim_, column_default_list_, file); | |||
| std::ifstream ifs; | |||
| ifs.open(file, std::ifstream::in); | |||
| if (!ifs.is_open()) { | |||
| @@ -65,7 +65,7 @@ class CsvOp : public NonMappableLeafOp { | |||
| public: | |||
| CsvParser() = delete; | |||
| CsvParser(int32_t worker_id, JaggedConnector *connector, int64_t rows_per_buffer, char field_delim, | |||
| CsvParser(int32_t worker_id, JaggedConnector *connector, char field_delim, | |||
| std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, std::string file_path); | |||
| ~CsvParser() = default; | |||
| @@ -128,7 +128,6 @@ class CsvOp : public NonMappableLeafOp { | |||
| int32_t worker_id_; | |||
| JaggedConnector *buffer_connector_; | |||
| int64_t csv_rows_per_buffer_; | |||
| const char csv_field_delim_; | |||
| std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default_; | |||
| State cur_state_; | |||
| @@ -261,8 +260,8 @@ class CsvOp : public NonMappableLeafOp { | |||
| CsvOp(const std::vector<std::string> &csv_files_list, char field_delim, | |||
| const std::vector<std::shared_ptr<BaseRecord>> &column_default, const std::vector<std::string> &column_name, | |||
| int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, | |||
| int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id); | |||
| int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, int32_t op_connector_size, | |||
| bool shuffle_files, int32_t num_devices, int32_t device_id); | |||
| // Default destructor | |||
| ~CsvOp() = default; | |||
| @@ -28,7 +28,6 @@ namespace dataset { | |||
| ImageFolderOp::Builder::Builder() : builder_decode_(false), builder_recursive_(false), builder_sampler_(nullptr) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -45,10 +44,9 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr<ImageFolderOp> *ptr) { | |||
| builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar))); | |||
| *ptr = std::make_shared<ImageFolderOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, | |||
| builder_op_connector_size_, builder_recursive_, builder_decode_, | |||
| builder_extensions_, builder_labels_to_read_, std::move(builder_schema_), | |||
| std::move(builder_sampler_)); | |||
| *ptr = std::make_shared<ImageFolderOp>( | |||
| builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_recursive_, builder_decode_, | |||
| builder_extensions_, builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -64,11 +62,10 @@ Status ImageFolderOp::Builder::SanityCheck() { | |||
| return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, | |||
| bool recursive, bool do_decode, const std::set<std::string> &exts, | |||
| const std::map<std::string, int32_t> &map, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer), | |||
| ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode, | |||
| const std::set<std::string> &exts, const std::map<std::string, int32_t> &map, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_wkrs, queue_size, std::move(sampler)), | |||
| folder_path_(file_dir), | |||
| recursive_(recursive), | |||
| decode_(do_decode), | |||
| @@ -63,14 +63,6 @@ class ImageFolderOp : public MappableLeafOp { | |||
| // Destructor. | |||
| ~Builder() = default; | |||
| // Setter method | |||
| // @param int32_t rows_per_buffer | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| // Setter method | |||
| // @param int32_t size | |||
| // @return Builder setter method returns reference to the builder. | |||
| @@ -159,13 +151,12 @@ class ImageFolderOp : public MappableLeafOp { | |||
| // Constructor | |||
| // @param int32_t num_wkrs - Num of workers reading images in parallel | |||
| // @param int32_t - rows_per_buffer Number of images (rows) in each buffer | |||
| // @param std::string - dir directory of ImageNetFolder | |||
| // @param int32_t queue_size - connector queue size | |||
| // @param std::set<std::string> exts - set of file extensions to read, if empty, read everything under the dir | |||
| // @param td::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read | |||
| ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool recursive, | |||
| bool do_decode, const std::set<std::string> &exts, const std::map<std::string, int32_t> &map, | |||
| ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode, | |||
| const std::set<std::string> &exts, const std::map<std::string, int32_t> &map, | |||
| std::unique_ptr<DataSchema>, std::shared_ptr<SamplerRT> sampler); | |||
| // Destructor. | |||
| @@ -33,7 +33,6 @@ namespace dataset { | |||
| ManifestOp::Builder::Builder() : builder_sampler_(nullptr), builder_decode_(false) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -49,9 +48,9 @@ Status ManifestOp::Builder::Build(std::shared_ptr<ManifestOp> *ptr) { | |||
| builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK( | |||
| builder_schema_->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| *ptr = std::make_shared<ManifestOp>(builder_num_workers_, builder_rows_per_buffer_, builder_file_, | |||
| builder_op_connector_size_, builder_decode_, builder_labels_to_read_, | |||
| std::move(builder_schema_), std::move(builder_sampler_), builder_usage_); | |||
| *ptr = std::make_shared<ManifestOp>(builder_num_workers_, builder_file_, builder_op_connector_size_, builder_decode_, | |||
| builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_), | |||
| builder_usage_); | |||
| return Status::OK(); | |||
| } | |||
| @@ -64,10 +63,10 @@ Status ManifestOp::Builder::SanityCheck() { | |||
| return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode, | |||
| ManifestOp::ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode, | |||
| const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler, std::string usage) | |||
| : MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buffer), | |||
| : MappableLeafOp(num_works, queue_size, std::move(sampler)), | |||
| io_block_pushed_(0), | |||
| sampler_ind_(0), | |||
| data_schema_(std::move(data_schema)), | |||
| @@ -46,14 +46,6 @@ class ManifestOp : public MappableLeafOp { | |||
| // Destructor | |||
| ~Builder() = default; | |||
| // Setter method | |||
| // @param int32_t rows_per_buffer | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| // Setter method | |||
| // @param int32_t size | |||
| // @return Builder setter method returns reference to the builder. | |||
| @@ -134,11 +126,10 @@ class ManifestOp : public MappableLeafOp { | |||
| // Constructor | |||
| // @param int32_t num_works - Num of workers reading images in parallel | |||
| // @param int32_t - rows_per_buffer Number of images (rows) in each buffer | |||
| // @param std::string - file list of Manifest | |||
| // @param int32_t queue_size - connector queue size | |||
| // @param td::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read | |||
| ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode, | |||
| ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode, | |||
| const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler, std::string usage); | |||
| // Destructor. | |||
| @@ -24,9 +24,8 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler, | |||
| int32_t rows_per_buffer) | |||
| : ParallelOp(num_wkrs, queue_size, std::move(sampler)), rows_per_buffer_(rows_per_buffer) {} | |||
| MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler) | |||
| : ParallelOp(num_wkrs, queue_size, std::move(sampler)) {} | |||
| // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work | |||
| Status MappableLeafOp::operator()() { | |||
| @@ -47,16 +47,13 @@ namespace dataset { | |||
| template <typename T> | |||
| class Queue; | |||
| using ImageLabelPair = std::shared_ptr<std::pair<std::string, int32_t>>; | |||
| using FolderImagesPair = std::shared_ptr<std::pair<std::string, std::queue<ImageLabelPair>>>; | |||
| class MappableLeafOp : public ParallelOp, public RandomAccessOp { | |||
| public: | |||
| /// Constructor | |||
| /// \param int32_t num_wkrs - Num of workers reading images in parallel | |||
| /// \param int32_t queue_size - connector queue size | |||
| /// \param td::unique_ptr<Sampler> sampler - sampler tells the source what to read | |||
| MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler, int32_t rows_per_buffer); | |||
| MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler); | |||
| /// Destructor. | |||
| ~MappableLeafOp() = default; | |||
| @@ -94,10 +91,6 @@ class MappableLeafOp : public ParallelOp, public RandomAccessOp { | |||
| /// Reset function to be called after every epoch to reset the source op after | |||
| /// \return Status The status code returned | |||
| Status Reset() override; | |||
| int32_t rows_per_buffer_; | |||
| int64_t row_cnt_; | |||
| int64_t buf_cnt_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -50,7 +50,6 @@ MindRecordOp::Builder::Builder() : build_dataset_file_({}) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| build_num_mind_record_workers_ = kDefaultMindRecordWorkers; | |||
| build_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| build_op_connector_queue_size_ = cfg->op_connector_size(); | |||
| builder_num_workers_ = 0; | |||
| build_load_dataset_ = false; | |||
| @@ -114,7 +113,7 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, std::vector<std::str | |||
| int32_t op_connector_queue_size, const std::vector<std::string> &columns_to_load, | |||
| const std::vector<std::shared_ptr<ShardOperator>> &operators, int64_t num_padded, | |||
| const mindrecord::json &sample_json, const std::map<std::string, std::string> &sample_bytes) | |||
| : MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared<SequentialSamplerRT>(0, 0), 1), | |||
| : MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared<SequentialSamplerRT>(0, 0)), | |||
| dataset_file_(dataset_file), | |||
| load_dataset_(load_dataset), | |||
| columns_to_load_(columns_to_load), | |||
| @@ -35,7 +35,6 @@ const int32_t kMnistImageCols = 28; | |||
| MnistOp::Builder::Builder() : builder_sampler_(nullptr), builder_usage_("") { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -52,8 +51,8 @@ Status MnistOp::Builder::Build(std::shared_ptr<MnistOp> *ptr) { | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); | |||
| *ptr = std::make_shared<MnistOp>(builder_usage_, builder_num_workers_, builder_rows_per_buffer_, builder_dir_, | |||
| builder_op_connector_size_, std::move(builder_schema_), std::move(builder_sampler_)); | |||
| *ptr = std::make_shared<MnistOp>(builder_usage_, builder_num_workers_, builder_dir_, builder_op_connector_size_, | |||
| std::move(builder_schema_), std::move(builder_sampler_)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -73,9 +72,9 @@ Status MnistOp::Builder::SanityCheck() { | |||
| return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| MnistOp::MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path, | |||
| int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), | |||
| MnistOp::MnistOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler)), | |||
| usage_(usage), | |||
| folder_path_(folder_path), | |||
| image_path_({}), | |||
| @@ -52,14 +52,6 @@ class MnistOp : public MappableLeafOp { | |||
| // Destructor. | |||
| ~Builder() = default; | |||
| // Setter method | |||
| // @param int32_t rows_per_buffer | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| // Setter method | |||
| // @param int32_t op_connector_size | |||
| // @return Builder setter method returns reference to the builder. | |||
| @@ -121,13 +113,12 @@ class MnistOp : public MappableLeafOp { | |||
| // Constructor | |||
| // @param const std::string &usage - Usage of this dataset, can be 'train', 'test' or 'all' | |||
| // @param int32_t num_workers - number of workers reading images in parallel | |||
| // @param int32_t rows_per_buffer - number of images (rows) in each buffer | |||
| // @param std::string folder_path - dir directory of mnist | |||
| // @param int32_t queue_size - connector queue size | |||
| // @param std::unique_ptr<DataSchema> data_schema - the schema of the mnist dataset | |||
| // @param td::unique_ptr<Sampler> sampler - sampler tells MnistOp what to read | |||
| MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path, | |||
| int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); | |||
| MnistOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); | |||
| // Destructor. | |||
| ~MnistOp() = default; | |||
| @@ -36,13 +36,12 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| NonMappableLeafOp::NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, | |||
| int64_t total_num_rows, int32_t op_connector_size, bool shuffle_files, | |||
| int32_t num_devices, int32_t device_id) | |||
| NonMappableLeafOp::NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows, | |||
| int32_t op_connector_size, bool shuffle_files, int32_t num_devices, | |||
| int32_t device_id) | |||
| : ParallelOp(num_workers, op_connector_size), | |||
| device_id_(device_id), | |||
| num_devices_(num_devices), | |||
| rows_per_buffer_(rows_per_buffer), | |||
| filename_index_(std::make_unique<StringIndex>()), | |||
| load_io_block_queue_(true), | |||
| load_jagged_connector_(true), | |||
| @@ -49,14 +49,13 @@ class NonMappableLeafOp : public ParallelOp { | |||
| // @note The builder class should be used to call this constructor. | |||
| // @param num_workers - number of worker threads reading data from tf_file files. | |||
| // @param worker_connector_size - size of each internal queue. | |||
| // @param rows_per_buffer - number of rows that a full buffer will contain. | |||
| // @param total_num_rows - Number of rows to read | |||
| // @param dataset_files_list - list of filepaths for the dataset files. | |||
| // @param op_connector_size - size of each queue in the connector that the child operator pulls from. | |||
| // @param columns_to_load - the names of the columns to load data from. | |||
| // @param shuffle_files - whether or not to shuffle the files before reading data. | |||
| // @param equal_rows_per_shard - whether or not to get equal rows for each process. | |||
| NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows, | |||
| NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows, | |||
| int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id); | |||
| // Default destructor | |||
| @@ -77,9 +76,6 @@ class NonMappableLeafOp : public ParallelOp { | |||
| // @return Status - the error code returned. | |||
| Status Reset() override; | |||
| // Getter method | |||
| int64_t rows_per_buffer() const { return rows_per_buffer_; } | |||
| // Op name getter | |||
| // @return Name of the current Op | |||
| std::string Name() const override { return "NonMappableLeafOp"; } | |||
| @@ -157,7 +153,6 @@ class NonMappableLeafOp : public ParallelOp { | |||
| bool finished_reading_dataset_; | |||
| int64_t total_rows_; | |||
| int64_t rows_per_buffer_; | |||
| WaitPost io_block_queue_wait_post_; | |||
| bool load_io_block_queue_; | |||
| std::mutex load_io_block_queue_mutex_; | |||
| @@ -37,7 +37,6 @@ RandomDataOp::Builder::Builder() | |||
| // Some arguments to the RandomDataOp have a default argument that is taken from the config. | |||
| // The user may override these defaults by using the builder set methods. | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -97,16 +97,6 @@ class RandomDataOp : public ParallelOp { | |||
| return *this; | |||
| } | |||
| /** | |||
| * Builder set method | |||
| * @param rows_per_buffer - The number of rows in each DataBuffer | |||
| * @return Builder - The modified builder by reference | |||
| */ | |||
| Builder &SetRowsPerBuffer(int64_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| /** | |||
| * Builder set method | |||
| * @param total_rows - The total number of rows in the dataset | |||
| @@ -36,7 +36,6 @@ TextFileOp::Builder::Builder() | |||
| std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); | |||
| builder_num_workers_ = config_manager->num_parallel_workers(); | |||
| builder_op_connector_size_ = config_manager->op_connector_size(); | |||
| builder_rows_per_buffer_ = config_manager->rows_per_buffer(); | |||
| builder_worker_connector_size_ = config_manager->worker_connector_size(); | |||
| } | |||
| @@ -65,21 +64,21 @@ Status TextFileOp::Builder::Build(std::shared_ptr<TextFileOp> *op) { | |||
| RETURN_IF_NOT_OK( | |||
| builder_schema_->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| std::shared_ptr<TextFileOp> text_file_op = std::make_shared<TextFileOp>( | |||
| builder_num_workers_, builder_rows_per_buffer_, builder_total_rows_, builder_worker_connector_size_, | |||
| std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_, builder_shuffle_files_, | |||
| builder_num_devices_, builder_device_id_); | |||
| std::shared_ptr<TextFileOp> text_file_op = | |||
| std::make_shared<TextFileOp>(builder_num_workers_, builder_total_rows_, builder_worker_connector_size_, | |||
| std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_, | |||
| builder_shuffle_files_, builder_num_devices_, builder_device_id_); | |||
| RETURN_IF_NOT_OK(text_file_op->Init()); | |||
| *op = std::move(text_file_op); | |||
| return Status::OK(); | |||
| } | |||
| TextFileOp::TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size, | |||
| TextFileOp::TextFileOp(int32_t num_workers, int64_t total_rows, int32_t worker_connector_size, | |||
| std::unique_ptr<DataSchema> schema, std::vector<std::string> text_files_list, | |||
| int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id) | |||
| : NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, total_rows, op_connector_size, | |||
| shuffle_files, num_devices, device_id), | |||
| : NonMappableLeafOp(num_workers, worker_connector_size, total_rows, op_connector_size, shuffle_files, num_devices, | |||
| device_id), | |||
| text_files_list_(std::move(text_files_list)), | |||
| data_schema_(std::move(schema)) {} | |||
| @@ -94,9 +93,8 @@ void TextFileOp::Print(std::ostream &out, bool show_all) const { | |||
| // Call the super class for displaying any common detailed info | |||
| ParallelOp::Print(out, show_all); | |||
| // Then show any custom derived-internal stuff | |||
| out << "\nRows per buffer: " << rows_per_buffer_ << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_ | |||
| << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") | |||
| << "\nText files list:\n"; | |||
| out << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ | |||
| << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nText files list:\n"; | |||
| for (int i = 0; i < text_files_list_.size(); ++i) { | |||
| out << " " << text_files_list_[i]; | |||
| } | |||
| @@ -129,7 +129,6 @@ class TextFileOp : public NonMappableLeafOp { | |||
| // Constructor of TextFileOp | |||
| // @note The builder class should be used to call this constructor. | |||
| // @param num_workers - number of worker threads reading data from tf_file files. | |||
| // @param rows_per_buffer - number of rows that a full buffer will contain. | |||
| // @param total_num_rows - number of rows to read | |||
| // @param dataset_files_list - list of filepaths for the dataset files. | |||
| // @param data_schema - the data schema object. | |||
| @@ -137,9 +136,9 @@ class TextFileOp : public NonMappableLeafOp { | |||
| // @param columns_to_load - the names of the columns to load data from. | |||
| // @param shuffle_files - whether or not to shuffle the files before reading data. | |||
| // @param equal_rows_per_shard - whether or not to get equal rows for each process. | |||
| TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size, | |||
| std::unique_ptr<DataSchema>, std::vector<std::string> text_files_list, int32_t op_connector_size, | |||
| bool shuffle_files, int32_t num_devices, int32_t device_id); | |||
| TextFileOp(int32_t num_workers, int64_t total_rows, int32_t worker_connector_size, std::unique_ptr<DataSchema>, | |||
| std::vector<std::string> text_files_list, int32_t op_connector_size, bool shuffle_files, | |||
| int32_t num_devices, int32_t device_id); | |||
| // Default destructor | |||
| ~TextFileOp() = default; | |||
| @@ -47,7 +47,6 @@ TFReaderOp::Builder::Builder() | |||
| builder_num_workers_ = config_manager->num_parallel_workers(); | |||
| builder_worker_connector_size_ = config_manager->worker_connector_size(); | |||
| builder_op_connector_size_ = config_manager->op_connector_size(); | |||
| builder_rows_per_buffer_ = config_manager->rows_per_buffer(); | |||
| builder_shuffle_files_ = false; | |||
| builder_data_schema_ = std::make_unique<DataSchema>(); | |||
| } | |||
| @@ -114,22 +113,21 @@ Status TFReaderOp::Builder::Build(std::shared_ptr<TFReaderOp> *out_tf_reader_op) | |||
| } | |||
| std::shared_ptr<TFReaderOp> new_tf_reader_op = std::make_shared<TFReaderOp>( | |||
| builder_num_workers_, builder_worker_connector_size_, builder_rows_per_buffer_, builder_total_rows_, | |||
| builder_dataset_files_list_, std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_, | |||
| builder_shuffle_files_, builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_); | |||
| builder_num_workers_, builder_worker_connector_size_, builder_total_rows_, builder_dataset_files_list_, | |||
| std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_, builder_shuffle_files_, | |||
| builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_); | |||
| RETURN_IF_NOT_OK(new_tf_reader_op->Init()); | |||
| *out_tf_reader_op = std::move(new_tf_reader_op); | |||
| return Status::OK(); | |||
| } | |||
| TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, | |||
| int64_t total_num_rows, std::vector<std::string> dataset_files_list, | |||
| std::unique_ptr<DataSchema> data_schema, int32_t op_connector_size, | |||
| std::vector<std::string> columns_to_load, bool shuffle_files, int32_t num_devices, | |||
| int32_t device_id, bool equal_rows_per_shard) | |||
| : NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, total_num_rows, op_connector_size, | |||
| shuffle_files, num_devices, device_id), | |||
| TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows, | |||
| std::vector<std::string> dataset_files_list, std::unique_ptr<DataSchema> data_schema, | |||
| int32_t op_connector_size, std::vector<std::string> columns_to_load, bool shuffle_files, | |||
| int32_t num_devices, int32_t device_id, bool equal_rows_per_shard) | |||
| : NonMappableLeafOp(num_workers, worker_connector_size, total_num_rows, op_connector_size, shuffle_files, | |||
| num_devices, device_id), | |||
| dataset_files_list_(std::move(dataset_files_list)), | |||
| columns_to_load_(std::move(columns_to_load)), | |||
| data_schema_(std::move(data_schema)), | |||
| @@ -146,8 +144,8 @@ void TFReaderOp::Print(std::ostream &out, bool show_all) const { | |||
| // Call the super class for displaying any common detailed info | |||
| ParallelOp::Print(out, show_all); | |||
| // Then show any custom derived-internal stuff | |||
| out << "\nRows per buffer: " << rows_per_buffer_ << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_ | |||
| << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") | |||
| out << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ | |||
| << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") | |||
| << "\nDataset files list: Size: " << dataset_files_list_.size() << "\n"; | |||
| for (int i = 0; i < dataset_files_list_.size(); ++i) { | |||
| out << " " << dataset_files_list_[i]; | |||
| @@ -173,7 +173,6 @@ class TFReaderOp : public NonMappableLeafOp { | |||
| // @note The builder class should be used to call this constructor. | |||
| // @param num_workers - number of worker threads reading data from tf_file files. | |||
| // @param worker_connector_size - size of each internal queue. | |||
| // @param rows_per_buffer - number of rows that a full buffer will contain. | |||
| // @param total_num_rows - Number of rows to read | |||
| // @param dataset_files_list - list of filepaths for the dataset files. | |||
| // @param data_schema - the data schema object. | |||
| @@ -181,7 +180,7 @@ class TFReaderOp : public NonMappableLeafOp { | |||
| // @param columns_to_load - the names of the columns to load data from. | |||
| // @param shuffle_files - whether or not to shuffle the files before reading data. | |||
| // @param equal_rows_per_shard - whether or not to get equal rows for each process. | |||
| TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows, | |||
| TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows, | |||
| std::vector<std::string> dataset_files_list, std::unique_ptr<DataSchema> data_schema, | |||
| int32_t op_connector_size, std::vector<std::string> columns_to_load, bool shuffle_files, | |||
| int32_t num_devices, int32_t device_id, bool equal_rows_per_shard); | |||
| @@ -47,7 +47,6 @@ const char kImageSetsExtension[] = ".txt"; | |||
| VOCOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| builder_task_type_ = TaskType::Segmentation; | |||
| } | |||
| @@ -78,8 +77,8 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) { | |||
| ColDescriptor(std::string(kColumnTruncate), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| } | |||
| *ptr = std::make_shared<VOCOp>(builder_task_type_, builder_usage_, builder_dir_, builder_labels_to_read_, | |||
| builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_, | |||
| builder_decode_, std::move(builder_schema_), std::move(builder_sampler_)); | |||
| builder_num_workers_, builder_op_connector_size_, builder_decode_, | |||
| std::move(builder_schema_), std::move(builder_sampler_)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -96,10 +95,9 @@ Status VOCOp::Builder::SanityCheck() { | |||
| } | |||
| VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path, | |||
| const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer, | |||
| int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), | |||
| const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler) | |||
| : MappableLeafOp(num_workers, queue_size, std::move(sampler)), | |||
| decode_(decode), | |||
| task_type_(task_type), | |||
| usage_(task_mode), | |||
| @@ -112,14 +112,6 @@ class VOCOp : public MappableLeafOp { | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param int32_t rows_per_buffer | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param std::shared_ptr<Sampler> sampler | |||
| // @return Builder setter method returns reference to the builder. | |||
| @@ -164,14 +156,13 @@ class VOCOp : public MappableLeafOp { | |||
| // @param std::string folder_path - dir directory of VOC | |||
| // @param std::map<std::string, int32_t> class_index - input class-to-index of annotation | |||
| // @param int32_t num_workers - number of workers reading images in parallel | |||
| // @param int32_t rows_per_buffer - number of images (rows) in each buffer | |||
| // @param int32_t queue_size - connector queue size | |||
| // @param bool decode - whether to decode images | |||
| // @param std::unique_ptr<DataSchema> data_schema - the schema of the VOC dataset | |||
| // @param std::shared_ptr<Sampler> sampler - sampler tells VOCOp what to read | |||
| VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path, | |||
| const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer, | |||
| int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); | |||
| const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); | |||
| // Destructor | |||
| ~VOCOp() = default; | |||
| @@ -255,11 +246,9 @@ class VOCOp : public MappableLeafOp { | |||
| bool decode_; | |||
| int64_t row_cnt_; | |||
| int64_t buf_cnt_; | |||
| std::string folder_path_; | |||
| TaskType task_type_; | |||
| std::string usage_; | |||
| int32_t rows_per_buffer_; | |||
| std::unique_ptr<DataSchema> data_schema_; | |||
| std::vector<std::string> image_ids_; | |||
| @@ -33,7 +33,6 @@ ZipOp::Builder::Builder() { | |||
| // using the various builder set methods. | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| } | |||
| @@ -41,18 +40,13 @@ Status ZipOp::Builder::SanityCheck() const { return Status::OK(); } | |||
| Status ZipOp::Builder::Build(std::shared_ptr<ZipOp> *ptr) { | |||
| RETURN_IF_NOT_OK(SanityCheck()); | |||
| *ptr = std::make_shared<ZipOp>(builder_rows_per_buffer_, builder_op_connector_size_); | |||
| *ptr = std::make_shared<ZipOp>(builder_op_connector_size_); | |||
| return Status::OK(); | |||
| } | |||
| // Construct ZipOp here, local variables initialized in operator due to tree construction restrictions | |||
| ZipOp::ZipOp(int32_t rows_per_buffer, int32_t op_connector_size) | |||
| : PipelineOp(op_connector_size), | |||
| children_num_(0), | |||
| rows_per_buffer_(rows_per_buffer), | |||
| buffer_id_(0), | |||
| draining_(false), | |||
| eof_(false) {} | |||
| ZipOp::ZipOp(int32_t op_connector_size) | |||
| : PipelineOp(op_connector_size), children_num_(0), draining_(false), eof_(false) {} | |||
| // destructor | |||
| ZipOp::~ZipOp() {} | |||
| @@ -76,9 +76,8 @@ class ZipOp : public PipelineOp { | |||
| }; | |||
| // Constructor for ZipOp | |||
| // @param rows_per_buffer - number of rows in output buffer | |||
| // @param op_connector_size - connector size | |||
| ZipOp(int32_t rows_per_buffer, int32_t op_connector_size); | |||
| explicit ZipOp(int32_t op_connector_size); | |||
| // Destructor | |||
| ~ZipOp(); | |||
| @@ -136,8 +135,6 @@ class ZipOp : public PipelineOp { | |||
| Status ComputeColMap() override; | |||
| int32_t children_num_; | |||
| int32_t rows_per_buffer_; | |||
| int32_t buffer_id_; | |||
| bool draining_; | |||
| bool eof_; | |||
| std::vector<std::unique_ptr<ChildIterator>> child_iterators_; | |||
| @@ -58,13 +58,13 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro | |||
| // Helper function to inject a shuffle operator over top of current operator being built | |||
| Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, | |||
| int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr<DatasetOp> *shuffle_op) { | |||
| int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op) { | |||
| std::shared_ptr<ShuffleOp> new_shuffle_op = nullptr; | |||
| int64_t shuffle_size = 0; | |||
| RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size)); | |||
| MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size; | |||
| // Add the shuffle op | |||
| *shuffle_op = std::make_shared<ShuffleOp>(shuffle_size, GetSeed(), connector_que_size, true, rows_per_buffer); | |||
| *shuffle_op = std::make_shared<ShuffleOp>(shuffle_size, GetSeed(), connector_que_size, true); | |||
| return Status::OK(); | |||
| } | |||
| @@ -231,7 +231,6 @@ DatasetNode::DatasetNode() | |||
| // Fetch some default value from config manager | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| num_workers_ = cfg->num_parallel_workers(); | |||
| rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| connector_que_size_ = cfg->op_connector_size(); | |||
| worker_connector_size_ = cfg->worker_connector_size(); | |||
| } | |||
| @@ -92,7 +92,7 @@ constexpr char kTFRecordNode[] = "TFRecordDataset"; | |||
| constexpr char kVOCNode[] = "VOCDataset"; | |||
| Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, | |||
| int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr<DatasetOp> *shuffle_op); | |||
| int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op); | |||
| // Helper function to validate dataset files parameter | |||
| Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector<std::string> &dataset_files); | |||
| @@ -323,7 +323,6 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> { | |||
| std::shared_ptr<DatasetCache> cache_; | |||
| int64_t dataset_size_; | |||
| int32_t num_workers_; | |||
| int32_t rows_per_buffer_; | |||
| int32_t connector_que_size_; | |||
| int32_t worker_connector_size_; | |||
| int32_t total_repeats_; // Number of times required to run this operator | |||
| @@ -44,8 +44,7 @@ void ShuffleNode::Print(std::ostream &out) const { | |||
| // Function to build the ShuffleOp | |||
| Status ShuffleNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| auto op = std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_, | |||
| rows_per_buffer_); | |||
| auto op = std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_); | |||
| op->set_total_repeats(GetTotalRepeats()); | |||
| op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(op); | |||
| @@ -74,8 +74,8 @@ Status AlbumNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| auto album_op = std::make_shared<AlbumOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, decode_, | |||
| extensions, std::move(schema), std::move(sampler_rt)); | |||
| auto album_op = std::make_shared<AlbumOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, extensions, | |||
| std::move(schema), std::move(sampler_rt)); | |||
| album_op->set_total_repeats(GetTotalRepeats()); | |||
| album_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(album_op); | |||
| @@ -69,8 +69,8 @@ Status CelebANode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| auto celeba_op = std::make_shared<CelebAOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||
| decode_, usage_, extensions_, std::move(schema), std::move(sampler_rt)); | |||
| auto celeba_op = std::make_shared<CelebAOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, usage_, | |||
| extensions_, std::move(schema), std::move(sampler_rt)); | |||
| celeba_op->set_total_repeats(GetTotalRepeats()); | |||
| celeba_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(celeba_op); | |||
| @@ -66,9 +66,8 @@ Status Cifar100Node::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| auto cifar_op = | |||
| std::make_shared<CifarOp>(CifarOp::CifarType::kCifar100, usage_, num_workers_, rows_per_buffer_, dataset_dir_, | |||
| connector_que_size_, std::move(schema), std::move(sampler_rt)); | |||
| auto cifar_op = std::make_shared<CifarOp>(CifarOp::CifarType::kCifar100, usage_, num_workers_, dataset_dir_, | |||
| connector_que_size_, std::move(schema), std::move(sampler_rt)); | |||
| cifar_op->set_total_repeats(GetTotalRepeats()); | |||
| cifar_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(cifar_op); | |||
| @@ -64,9 +64,8 @@ Status Cifar10Node::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_op | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| auto cifar_op = | |||
| std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, usage_, num_workers_, rows_per_buffer_, dataset_dir_, | |||
| connector_que_size_, std::move(schema), std::move(sampler_rt)); | |||
| auto cifar_op = std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, usage_, num_workers_, dataset_dir_, | |||
| connector_que_size_, std::move(schema), std::move(sampler_rt)); | |||
| cifar_op->set_total_repeats(GetTotalRepeats()); | |||
| cifar_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(cifar_op); | |||
| @@ -177,8 +177,8 @@ Status CLUENode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) | |||
| std::sort(sorted_dataset_files.begin(), sorted_dataset_files.end()); | |||
| std::shared_ptr<ClueOp> clue_op = | |||
| std::make_shared<ClueOp>(num_workers_, rows_per_buffer_, num_samples_, worker_connector_size_, ck_map, | |||
| sorted_dataset_files, connector_que_size_, shuffle_files, num_shards_, shard_id_); | |||
| std::make_shared<ClueOp>(num_workers_, num_samples_, worker_connector_size_, ck_map, sorted_dataset_files, | |||
| connector_que_size_, shuffle_files, num_shards_, shard_id_); | |||
| RETURN_IF_NOT_OK(clue_op->Init()); | |||
| @@ -191,8 +191,8 @@ Status CLUENode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) | |||
| RETURN_IF_NOT_OK(ClueOp::CountAllFileRows(sorted_dataset_files, &num_rows)); | |||
| // Add the shuffle op after this op | |||
| RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, | |||
| rows_per_buffer_, &shuffle_op)); | |||
| RETURN_IF_NOT_OK( | |||
| AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op)); | |||
| shuffle_op->set_total_repeats(GetTotalRepeats()); | |||
| shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(shuffle_op); | |||
| @@ -123,8 +123,8 @@ Status CocoNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| std::shared_ptr<CocoOp> op = | |||
| std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, rows_per_buffer_, | |||
| connector_que_size_, decode_, std::move(schema), std::move(sampler_rt)); | |||
| std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, connector_que_size_, decode_, | |||
| std::move(schema), std::move(sampler_rt)); | |||
| op->set_total_repeats(GetTotalRepeats()); | |||
| op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(op); | |||
| @@ -114,8 +114,8 @@ Status CSVNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| } | |||
| std::shared_ptr<CsvOp> csv_op = std::make_shared<CsvOp>( | |||
| sorted_dataset_files, field_delim_, column_default_list, column_names_, num_workers_, rows_per_buffer_, | |||
| num_samples_, worker_connector_size_, connector_que_size_, shuffle_files, num_shards_, shard_id_); | |||
| sorted_dataset_files, field_delim_, column_default_list, column_names_, num_workers_, num_samples_, | |||
| worker_connector_size_, connector_que_size_, shuffle_files, num_shards_, shard_id_); | |||
| RETURN_IF_NOT_OK(csv_op->Init()); | |||
| @@ -128,8 +128,8 @@ Status CSVNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| RETURN_IF_NOT_OK(CsvOp::CountAllFileRows(sorted_dataset_files, column_names_.empty(), &num_rows)); | |||
| // Add the shuffle op after this op | |||
| RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, | |||
| rows_per_buffer_, &shuffle_op)); | |||
| RETURN_IF_NOT_OK( | |||
| AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op)); | |||
| shuffle_op->set_total_repeats(GetTotalRepeats()); | |||
| shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(shuffle_op); | |||
| @@ -72,9 +72,8 @@ Status ImageFolderNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const nod | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| auto op = | |||
| std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, recursive_, | |||
| decode_, exts_, class_indexing_, std::move(schema), std::move(sampler_rt)); | |||
| auto op = std::make_shared<ImageFolderOp>(num_workers_, dataset_dir_, connector_que_size_, recursive_, decode_, exts_, | |||
| class_indexing_, std::move(schema), std::move(sampler_rt)); | |||
| op->set_total_repeats(GetTotalRepeats()); | |||
| op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(op); | |||
| @@ -94,8 +94,8 @@ Status ManifestNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| manifest_op = std::make_shared<ManifestOp>(num_workers_, rows_per_buffer_, dataset_file_, connector_que_size_, | |||
| decode_, class_index_, std::move(schema), std::move(sampler_rt), usage_); | |||
| manifest_op = std::make_shared<ManifestOp>(num_workers_, dataset_file_, connector_que_size_, decode_, class_index_, | |||
| std::move(schema), std::move(sampler_rt), usage_); | |||
| manifest_op->set_total_repeats(GetTotalRepeats()); | |||
| manifest_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(manifest_op); | |||
| @@ -60,8 +60,8 @@ Status MnistNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| auto op = std::make_shared<MnistOp>(usage_, num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||
| std::move(schema), std::move(sampler_rt)); | |||
| auto op = std::make_shared<MnistOp>(usage_, num_workers_, dataset_dir_, connector_que_size_, std::move(schema), | |||
| std::move(sampler_rt)); | |||
| op->set_total_repeats(GetTotalRepeats()); | |||
| op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(op); | |||
| @@ -82,9 +82,9 @@ Status TextFileNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o | |||
| RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| // Create and initialize TextFileOp | |||
| std::shared_ptr<TextFileOp> text_file_op = std::make_shared<TextFileOp>( | |||
| num_workers_, rows_per_buffer_, num_samples_, worker_connector_size_, std::move(schema), sorted_dataset_files, | |||
| connector_que_size_, shuffle_files, num_shards_, shard_id_); | |||
| std::shared_ptr<TextFileOp> text_file_op = | |||
| std::make_shared<TextFileOp>(num_workers_, num_samples_, worker_connector_size_, std::move(schema), | |||
| sorted_dataset_files, connector_que_size_, shuffle_files, num_shards_, shard_id_); | |||
| RETURN_IF_NOT_OK(text_file_op->Init()); | |||
| if (cache_ == nullptr && shuffle_ == ShuffleMode::kGlobal && !IsDescendantOfCache()) { | |||
| @@ -96,8 +96,8 @@ Status TextFileNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o | |||
| RETURN_IF_NOT_OK(TextFileOp::CountAllFileRows(sorted_dataset_files, &num_rows)); | |||
| // Add the shuffle op after this op | |||
| RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, | |||
| rows_per_buffer_, &shuffle_op)); | |||
| RETURN_IF_NOT_OK( | |||
| AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op)); | |||
| shuffle_op->set_total_repeats(GetTotalRepeats()); | |||
| shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(shuffle_op); | |||
| @@ -124,8 +124,8 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o | |||
| // Create and initialize TFReaderOp | |||
| std::shared_ptr<TFReaderOp> tf_reader_op = std::make_shared<TFReaderOp>( | |||
| num_workers_, worker_connector_size_, rows_per_buffer_, num_samples_, sorted_dir_files, std::move(data_schema), | |||
| connector_que_size_, columns_list_, shuffle_files, num_shards_, shard_id_, shard_equal_rows_); | |||
| num_workers_, worker_connector_size_, num_samples_, sorted_dir_files, std::move(data_schema), connector_que_size_, | |||
| columns_list_, shuffle_files, num_shards_, shard_id_, shard_equal_rows_); | |||
| RETURN_IF_NOT_OK(tf_reader_op->Init()); | |||
| @@ -139,8 +139,7 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o | |||
| RETURN_IF_NOT_OK(TFReaderOp::CountTotalRows(&num_rows, sorted_dir_files)); | |||
| // Add the shuffle op after this op | |||
| RETURN_IF_NOT_OK(AddShuffleOp(sorted_dir_files.size(), num_shards_, num_rows, 0, connector_que_size_, | |||
| rows_per_buffer_, &shuffle_op)); | |||
| RETURN_IF_NOT_OK(AddShuffleOp(sorted_dir_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op)); | |||
| shuffle_op->set_total_repeats(GetTotalRepeats()); | |||
| shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(shuffle_op); | |||
| @@ -112,8 +112,8 @@ Status VOCNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| std::shared_ptr<VOCOp> voc_op; | |||
| voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, rows_per_buffer_, | |||
| connector_que_size_, decode_, std::move(schema), std::move(sampler_rt)); | |||
| voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_, | |||
| decode_, std::move(schema), std::move(sampler_rt)); | |||
| voc_op->set_total_repeats(GetTotalRepeats()); | |||
| voc_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(voc_op); | |||
| @@ -43,11 +43,9 @@ void SyncWaitNode::Print(std::ostream &out) const { | |||
| // Function to build the BarrierOp | |||
| Status SyncWaitNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| // Right now barrier should only take num_rows_per_buffer = 1 | |||
| // The reason for this is because having it otherwise can lead to blocking issues | |||
| // See barrier_op.h for more details | |||
| const int32_t rows_per_buffer = 1; | |||
| auto op = std::make_shared<BarrierOp>(rows_per_buffer, connector_que_size_, condition_name_, callback_); | |||
| auto op = std::make_shared<BarrierOp>(connector_que_size_, condition_name_, callback_); | |||
| op->set_total_repeats(GetTotalRepeats()); | |||
| op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(op); | |||
| @@ -58,7 +58,7 @@ Status ZipNode::ValidateParams() { | |||
| } | |||
| Status ZipNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| auto op = std::make_shared<ZipOp>(rows_per_buffer_, connector_que_size_); | |||
| auto op = std::make_shared<ZipOp>(connector_que_size_); | |||
| op->set_total_repeats(GetTotalRepeats()); | |||
| op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(op); | |||
| @@ -31,7 +31,7 @@ using mindspore::MsLogLevel::ERROR; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false); | |||
| std::shared_ptr<RepeatOp> Repeat(int repeat_cnt); | |||
| @@ -42,10 +42,10 @@ std::shared_ptr<AlbumOp> Album(int64_t num_works, int64_t rows, int64_t conns, s | |||
| std::shared_ptr<AlbumOp> so; | |||
| AlbumOp::Builder builder; | |||
| Status rc = builder.SetNumWorkers(num_works) | |||
| .SetAlbumDir(path) | |||
| .SetRowsPerBuffer(rows) | |||
| .SetOpConnectorSize(conns) | |||
| .SetExtensions({".json"}) | |||
| .SetAlbumDir(path) | |||
| .SetOpConnectorSize(conns) | |||
| .SetExtensions({".json"}) | |||
| .SetSampler(std::move(sampler)) | |||
| .SetDecode(decode) | |||
| .Build(&so); | |||
| @@ -59,12 +59,12 @@ std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t rows, int64_t co | |||
| std::shared_ptr<AlbumOp> so; | |||
| AlbumOp::Builder builder; | |||
| Status rc = builder.SetNumWorkers(num_works) | |||
| .SetSchemaFile(schema_file) | |||
| .SetColumnsToLoad(column_names) | |||
| .SetAlbumDir(path) | |||
| .SetRowsPerBuffer(rows) | |||
| .SetOpConnectorSize(conns) | |||
| .SetExtensions({".json"}) | |||
| .SetSchemaFile(schema_file) | |||
| .SetColumnsToLoad(column_names) | |||
| .SetAlbumDir(path) | |||
| .SetOpConnectorSize(conns) | |||
| .SetExtensions({".json"}) | |||
| .SetSampler(std::move(sampler)) | |||
| .SetDecode(decode) | |||
| .Build(&so); | |||
| @@ -180,8 +180,8 @@ TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) { | |||
| EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {})); | |||
| EXPECT_OK(tensor_map["id"]->GetItemAt<int64_t>(&id, {})); | |||
| MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" | |||
| << tensor_map["label"] << "priority: " << priority << " embedding : " | |||
| << tensor_map["_embedding"]->shape() << " id: " << id << "\n"; | |||
| << tensor_map["label"] << "priority: " << priority | |||
| << " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n"; | |||
| i++; | |||
| di.GetNextAsMap(&tensor_map); | |||
| } | |||
| @@ -34,7 +34,7 @@ class MindDataTestBatchOp : public UT::DatasetOpTesting { | |||
| protected: | |||
| }; | |||
| std::shared_ptr<de::BatchOp> Batch(int32_t batch_size = 1, bool drop = false, int rows_per_buf = 2) { | |||
| std::shared_ptr<de::BatchOp> Batch(int32_t batch_size = 1, bool drop = false) { | |||
| Status rc; | |||
| std::shared_ptr<de::BatchOp> op; | |||
| rc = de::BatchOp::Builder(batch_size).SetDrop(drop).Build(&op); | |||
| @@ -50,10 +50,10 @@ std::shared_ptr<de::RepeatOp> Repeat(int repeat_cnt = 1) { | |||
| return op; | |||
| } | |||
| std::shared_ptr<de::TFReaderOp> TFReader(std::string schema, int rows_per_buf = 2, int num_works = 8) { | |||
| std::shared_ptr<de::TFReaderOp> TFReader(std::string schema, int num_works = 8) { | |||
| std::shared_ptr<de::TFReaderOp> so; | |||
| de::TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({schema}).SetRowsPerBuffer(rows_per_buf).SetNumWorkers(num_works); | |||
| builder.SetDatasetFilesList({schema}).SetNumWorkers(num_works); | |||
| Status rc = builder.Build(&so); | |||
| return so; | |||
| } | |||
| @@ -111,7 +111,7 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) { | |||
| bool success = false; | |||
| auto op1 = TFReader(schema_file); | |||
| auto op2 = Repeat(2); | |||
| auto op3 = Batch(7, true, 99); | |||
| auto op3 = Batch(7, true); | |||
| op1->set_total_repeats(2); | |||
| op1->set_num_repeats_per_epoch(2); | |||
| auto tree = Build({op1, op2, op3}); | |||
| @@ -161,7 +161,7 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) { | |||
| bool success = false; | |||
| auto op1 = TFReader(schema_file); | |||
| auto op2 = Repeat(2); | |||
| auto op3 = Batch(7, false, 99); | |||
| auto op3 = Batch(7, false); | |||
| op1->set_total_repeats(2); | |||
| op1->set_num_repeats_per_epoch(2); | |||
| auto tree = Build({op1, op2, op3}); | |||
| @@ -217,7 +217,7 @@ TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) { | |||
| std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; | |||
| bool success = false; | |||
| auto op1 = TFReader(schema_file); | |||
| auto op2 = Batch(7, false, 99); | |||
| auto op2 = Batch(7, false); | |||
| auto op3 = Repeat(2); | |||
| op1->set_total_repeats(2); | |||
| op1->set_num_repeats_per_epoch(2); | |||
| @@ -270,7 +270,7 @@ TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) { | |||
| std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; | |||
| bool success = false; | |||
| auto op1 = TFReader(schema_file); | |||
| auto op2 = Batch(5, true, 99); | |||
| auto op2 = Batch(5, true); | |||
| auto op3 = Repeat(2); | |||
| op1->set_total_repeats(2); | |||
| op1->set_num_repeats_per_epoch(2); | |||
| @@ -253,7 +253,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) { | |||
| // RandomDataOp | |||
| std::shared_ptr<RandomDataOp> myRandomDataOp; | |||
| rc = RandomDataOp::Builder() | |||
| .SetRowsPerBuffer(4) | |||
| .SetNumWorkers(4) | |||
| .SetDataSchema(std::move(testSchema)) | |||
| .SetTotalRows(50) // 50 samples for now | |||
| @@ -277,7 +277,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) { | |||
| rc = CacheOp::Builder() | |||
| .SetNumWorkers(5) | |||
| .SetClient(myClient) | |||
| .SetRowsPerBuffer(1) | |||
| .SetSampler(std::move(seq_sampler)) | |||
| .Build(&myCacheOp); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -379,7 +379,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) { | |||
| // RandomDataOp | |||
| std::shared_ptr<RandomDataOp> myRandomDataOp; | |||
| rc = RandomDataOp::Builder() | |||
| .SetRowsPerBuffer(2) | |||
| .SetNumWorkers(4) | |||
| .SetDataSchema(std::move(testSchema)) | |||
| .SetTotalRows(10) | |||
| @@ -401,7 +401,6 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) { | |||
| rc = CacheOp::Builder() | |||
| .SetNumWorkers(4) | |||
| .SetClient(myClient) | |||
| .SetRowsPerBuffer(3) | |||
| .SetSampler(std::move(seq_sampler)) | |||
| .Build(&myCacheOp); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -484,7 +483,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestImageFolderCacheMerge) { | |||
| ImageFolderOp::Builder builder; | |||
| builder.SetOpConnectorSize(3) | |||
| .SetNumWorkers(3) | |||
| .SetRowsPerBuffer(2) | |||
| .SetExtensions({".jpg", ".JPEG"}) | |||
| .SetRecursive(true) | |||
| .SetImageFolderDir(datasets_root_path_ + "/testPK/data"); | |||
| @@ -26,41 +26,45 @@ | |||
| #include "securec.h" | |||
| using namespace mindspore::dataset; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| std::shared_ptr<RepeatOp> Repeat(int repeat_cnt); | |||
| std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); | |||
| std::shared_ptr<CelebAOp> Celeba(int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, | |||
| const std::string &dir, std::shared_ptr<SamplerRT> sampler = nullptr, | |||
| bool decode = false, const std::string &dataset_type = "all") { | |||
| std::shared_ptr<CelebAOp> Celeba(int32_t num_workers, int32_t queue_size, const std::string &dir, | |||
| std::shared_ptr<SamplerRT> sampler = nullptr, bool decode = false, | |||
| const std::string &dataset_type = "all") { | |||
| std::shared_ptr<CelebAOp> so; | |||
| CelebAOp::Builder builder; | |||
| Status rc = builder.SetNumWorkers(num_workers) | |||
| .SetCelebADir(dir) | |||
| .SetRowsPerBuffer(rows_per_buffer) | |||
| .SetOpConnectorSize(queue_size) | |||
| .SetSampler(std::move(sampler)) | |||
| .SetDecode(decode) | |||
| .SetUsage(dataset_type).Build(&so); | |||
| .SetUsage(dataset_type) | |||
| .Build(&so); | |||
| return so; | |||
| } | |||
| class MindDataTestCelebaDataset : public UT::DatasetOpTesting { | |||
| protected: | |||
| protected: | |||
| }; | |||
| TEST_F(MindDataTestCelebaDataset, TestSequentialCeleba) { | |||
| std::string dir = datasets_root_path_ + "/testCelebAData/"; | |||
| uint32_t expect_labels[4][40] = {{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}, | |||
| {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, | |||
| {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, | |||
| {0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}}; | |||
| uint32_t expect_labels[4][40] = {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}, | |||
| {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, | |||
| {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, | |||
| {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}}; | |||
| uint32_t count = 0; | |||
| auto tree = Build({Celeba(16, 2, 32, dir)}); | |||
| auto tree = Build({Celeba(16, 2, dir)}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| @@ -86,16 +90,24 @@ TEST_F(MindDataTestCelebaDataset, TestSequentialCeleba) { | |||
| TEST_F(MindDataTestCelebaDataset, TestCelebaRepeat) { | |||
| std::string dir = datasets_root_path_ + "/testCelebAData/"; | |||
| uint32_t expect_labels[8][40] = {{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}, | |||
| {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, | |||
| {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, | |||
| {0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}, | |||
| {0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}, | |||
| {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, | |||
| {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, | |||
| {0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}}; | |||
| uint32_t expect_labels[8][40] = {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}, | |||
| {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, | |||
| {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, | |||
| {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}, | |||
| {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}, | |||
| {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, | |||
| {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, | |||
| {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, | |||
| 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}}; | |||
| uint32_t count = 0; | |||
| auto op1 = Celeba(16, 2, 32, dir); | |||
| auto op1 = Celeba(16, 2, dir); | |||
| auto op2 = Repeat(2); | |||
| auto tree = Build({op1, op2}); | |||
| op1->set_total_repeats(2); | |||
| @@ -131,7 +143,7 @@ TEST_F(MindDataTestCelebaDataset, TestSubsetRandomSamplerCeleba) { | |||
| 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}}; | |||
| std::string dir = datasets_root_path_ + "/testCelebAData/"; | |||
| uint32_t count = 0; | |||
| auto tree = Build({Celeba(16, 2, 32, dir, std::move(sampler))}); | |||
| auto tree = Build({Celeba(16, 2, dir, std::move(sampler))}); | |||
| tree->Prepare(); | |||
| Status rc = tree->Launch(); | |||
| if (rc.IsError()) { | |||
| @@ -47,7 +47,7 @@ std::shared_ptr<CifarOp> Cifarop(uint64_t num_works, uint64_t rows, uint64_t con | |||
| CifarOp::Builder builder; | |||
| Status rc = builder.SetNumWorkers(num_works) | |||
| .SetCifarDir(path) | |||
| .SetRowsPerBuffer(rows) | |||
| .SetOpConnectorSize(conns) | |||
| .SetSampler(std::move(sampler)) | |||
| .SetCifarType(cifar10) | |||
| @@ -45,20 +45,17 @@ TEST_F(MindDataTestClientConfig, TestClientConfig1) { | |||
| std::shared_ptr<ConfigManager> my_conf = GlobalContext::config_manager(); | |||
| ASSERT_EQ(my_conf->num_parallel_workers(), kCfgParallelWorkers); | |||
| ASSERT_EQ(my_conf->rows_per_buffer(), kCfgRowsPerBuffer); | |||
| ASSERT_EQ(my_conf->worker_connector_size(), kCfgWorkerConnectorSize); | |||
| ASSERT_EQ(my_conf->op_connector_size(), kCfgOpConnectorSize); | |||
| ASSERT_EQ(my_conf->seed(), kCfgDefaultSeed); | |||
| my_conf->set_num_parallel_workers(2); | |||
| my_conf->set_rows_per_buffer(1); | |||
| my_conf->set_worker_connector_size(3); | |||
| my_conf->set_op_connector_size(4); | |||
| my_conf->set_seed(5); | |||
| ASSERT_EQ(my_conf->num_parallel_workers(), 2); | |||
| ASSERT_EQ(my_conf->rows_per_buffer(), 1); | |||
| ASSERT_EQ(my_conf->worker_connector_size(), 3); | |||
| ASSERT_EQ(my_conf->op_connector_size(), 4); | |||
| ASSERT_EQ(my_conf->seed(), 5); | |||
| @@ -67,7 +64,6 @@ TEST_F(MindDataTestClientConfig, TestClientConfig1) { | |||
| ASSERT_TRUE(my_conf->LoadFile(file)); | |||
| ASSERT_EQ(my_conf->num_parallel_workers(), kCfgParallelWorkers); | |||
| ASSERT_EQ(my_conf->rows_per_buffer(), kCfgRowsPerBuffer); | |||
| ASSERT_EQ(my_conf->worker_connector_size(), kCfgWorkerConnectorSize); | |||
| ASSERT_EQ(my_conf->op_connector_size(), kCfgOpConnectorSize); | |||
| ASSERT_EQ(my_conf->seed(), kCfgDefaultSeed); | |||
| @@ -50,7 +50,7 @@ TEST_F(MindDataTestCLUEOp, TestCLUEBasic) { | |||
| std::shared_ptr<ClueOp> op; | |||
| ClueOp::Builder builder; | |||
| builder.SetClueFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(16) | |||
| .SetOpConnectorSize(2) | |||
| .SetColsKeyMap(key_map); | |||
| @@ -43,7 +43,7 @@ using mindspore::MsLogLevel::ERROR; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false); | |||
| std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); | |||
| @@ -51,7 +51,7 @@ TEST_F(MindDataTestConcatOp, TestConcatProject) { | |||
| // TFReaderOp1 | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op1; | |||
| TFReaderOp::Builder builder1; | |||
| builder1.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); | |||
| builder1.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); | |||
| std::unique_ptr<DataSchema> schema1 = std::make_unique<DataSchema>(); | |||
| schema1->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {}); | |||
| builder1.SetDataSchema(std::move(schema1)); | |||
| @@ -63,7 +63,7 @@ TEST_F(MindDataTestConcatOp, TestConcatProject) { | |||
| // TFReaderOp2 | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op2; | |||
| TFReaderOp::Builder builder2; | |||
| builder2.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); | |||
| builder2.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); | |||
| std::unique_ptr<DataSchema> schema2 = std::make_unique<DataSchema>(); | |||
| schema2->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {}); | |||
| builder2.SetDataSchema(std::move(schema2)); | |||
| @@ -52,7 +52,7 @@ TEST_F(MindDataTestCSVOp, TestCSVBasic) { | |||
| std::shared_ptr<CsvOp> op; | |||
| CsvOp::Builder builder; | |||
| builder.SetCsvFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(16) | |||
| .SetShuffleFiles(false) | |||
| .SetOpConnectorSize(2) | |||
| .SetFieldDelim(',') | |||
| @@ -44,19 +44,15 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) { | |||
| uint32_t shuffle_size = 32; | |||
| uint32_t connector_size = 8; | |||
| std::shared_ptr<ShuffleOp> leaf_op1 = | |||
| std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32); | |||
| std::shared_ptr<ShuffleOp> leaf_op1 = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false); | |||
| ASSERT_NE(leaf_op1, nullptr); | |||
| my_tree->AssociateNode(leaf_op1); | |||
| shuffle_size = 16; | |||
| std::shared_ptr<ShuffleOp> leaf_op2 = | |||
| std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32); | |||
| std::shared_ptr<ShuffleOp> leaf_op2 = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false); | |||
| ASSERT_NE(leaf_op2, nullptr); | |||
| my_tree->AssociateNode(leaf_op2); | |||
| shuffle_size = 8; | |||
| std::shared_ptr<ShuffleOp> parent_op = | |||
| std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32); | |||
| std::shared_ptr<ShuffleOp> parent_op = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false); | |||
| ASSERT_NE(parent_op, nullptr); | |||
| my_tree->AssociateNode(parent_op); | |||
| @@ -68,8 +64,7 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) { | |||
| parent_op->AddChild(std::move(leaf_op1)); | |||
| parent_op->AddChild(std::move(leaf_op2)); | |||
| shuffle_size = 4; | |||
| std::shared_ptr<DatasetOp> root_op = | |||
| std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32); | |||
| std::shared_ptr<DatasetOp> root_op = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false); | |||
| my_tree->AssignRoot(root_op); | |||
| root_op->AddChild(parent_op); | |||
| ASSERT_NE(root_op, nullptr); | |||
| @@ -105,10 +100,10 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree2) { | |||
| std::string dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data"; | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(2) | |||
| .SetNumWorkers(2) | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetWorkerConnectorSize(2) | |||
| .SetNumWorkers(2) | |||
| .Build(&my_tfreader_op); | |||
| my_tree->AssociateNode(my_tfreader_op); | |||
| @@ -40,7 +40,7 @@ using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false); | |||
| std::shared_ptr<RepeatOp> Repeat(int repeat_cnt); | |||
| @@ -53,7 +53,7 @@ std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int6 | |||
| ImageFolderOp::Builder builder; | |||
| Status rc = builder.SetNumWorkers(num_works) | |||
| .SetImageFolderDir(path) | |||
| .SetRowsPerBuffer(rows) | |||
| .SetOpConnectorSize(conns) | |||
| .SetExtensions({".jpg", ".JPEG"}) | |||
| .SetSampler(std::move(sampler)) | |||
| @@ -156,7 +156,7 @@ TEST_F(MindDataTestCallback, TestBasicCallback) { | |||
| ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape); | |||
| ASSERT_OK(schema->AddColumn(col)); | |||
| std::shared_ptr<RandomDataOp> leaf; | |||
| rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(44).Build(&leaf); | |||
| rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(44).Build(&leaf); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // config mapOp | |||
| std::shared_ptr<MapOp> map_op; | |||
| @@ -208,7 +208,7 @@ TEST_F(MindDataTestCallback, TestMultiEpochCallback) { | |||
| ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape); | |||
| ASSERT_OK(schema->AddColumn(col)); | |||
| std::shared_ptr<RandomDataOp> leaf; | |||
| rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf); | |||
| rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // config mapOp | |||
| std::shared_ptr<MapOp> map_op; | |||
| @@ -273,7 +273,7 @@ TEST_F(MindDataTestCallback, TestSelectedCallback) { | |||
| ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape); | |||
| ASSERT_OK(schema->AddColumn(col)); | |||
| std::shared_ptr<RandomDataOp> leaf; | |||
| rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf); | |||
| rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // config mapOp | |||
| std::shared_ptr<MapOp> map_op; | |||
| @@ -46,9 +46,14 @@ std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t co | |||
| std::map<std::string, int32_t> map = {}, bool decode = false) { | |||
| std::shared_ptr<ManifestOp> so; | |||
| ManifestOp::Builder builder; | |||
| Status rc = builder.SetNumWorkers(num_works).SetManifestFile(file).SetRowsPerBuffer( | |||
| rows).SetOpConnectorSize(conns).SetSampler(std::move(sampler)).SetClassIndex(map).SetDecode(decode) | |||
| .SetUsage(usage).Build(&so); | |||
| Status rc = builder.SetNumWorkers(num_works) | |||
| .SetManifestFile(file) | |||
| .SetOpConnectorSize(conns) | |||
| .SetSampler(std::move(sampler)) | |||
| .SetClassIndex(map) | |||
| .SetDecode(decode) | |||
| .SetUsage(usage) | |||
| .Build(&so); | |||
| return so; | |||
| } | |||
| @@ -110,7 +110,7 @@ class MindDataTestMapOp : public UT::DatasetOpTesting { | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path_}) | |||
| .SetColumnsToLoad({"image", "label", "A", "B"}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(2) | |||
| .SetNumWorkers(2); | |||
| @@ -516,7 +516,7 @@ TEST_F(MindDataTestMapOp, TFReader_Decode_Repeat_Resize) { | |||
| TFReaderOp::Builder sobuilder; | |||
| sobuilder.SetDatasetFilesList({dataset_path_}) | |||
| .SetColumnsToLoad({"image", "label"}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(2) | |||
| .SetNumWorkers(2); | |||
| rc = sobuilder.Build(&my_tfreader_op); | |||
| @@ -29,12 +29,11 @@ | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::MsLogLevel::INFO; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestMindRecordOp : public UT::DatasetOpTesting { | |||
| }; | |||
| class MindDataTestMindRecordOp : public UT::DatasetOpTesting {}; | |||
| TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) { | |||
| // single MindRecord op and nothing else | |||
| @@ -63,10 +62,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) { | |||
| std::shared_ptr<MindRecordOp> my_mindrecord_op; | |||
| MindRecordOp::Builder builder; | |||
| builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) | |||
| .SetLoadDataset(true) | |||
| .SetRowsPerBuffer(3) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list); | |||
| .SetLoadDataset(true) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list); | |||
| rc = builder.Build(&my_mindrecord_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -134,11 +132,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordSample) { | |||
| std::shared_ptr<MindRecordOp> my_mindrecord_op; | |||
| MindRecordOp::Builder builder; | |||
| builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) | |||
| .SetLoadDataset(true) | |||
| .SetRowsPerBuffer(3) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list) | |||
| .SetOperators(operators); | |||
| .SetLoadDataset(true) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list) | |||
| .SetOperators(operators); | |||
| rc = builder.Build(&my_mindrecord_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -206,11 +203,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordShuffle) { | |||
| std::shared_ptr<MindRecordOp> my_mindrecord_op; | |||
| MindRecordOp::Builder builder; | |||
| builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) | |||
| .SetLoadDataset(true) | |||
| .SetRowsPerBuffer(3) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list) | |||
| .SetOperators(operators); | |||
| .SetLoadDataset(true) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list) | |||
| .SetOperators(operators); | |||
| rc = builder.Build(&my_mindrecord_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -281,11 +277,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordCategory) { | |||
| std::shared_ptr<MindRecordOp> my_mindrecord_op; | |||
| MindRecordOp::Builder builder; | |||
| builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) | |||
| .SetLoadDataset(true) | |||
| .SetRowsPerBuffer(3) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list) | |||
| .SetOperators(operators); | |||
| .SetLoadDataset(true) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list) | |||
| .SetOperators(operators); | |||
| rc = builder.Build(&my_mindrecord_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -350,10 +345,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { | |||
| std::shared_ptr<MindRecordOp> my_mindrecord_op; | |||
| MindRecordOp::Builder builder; | |||
| builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) | |||
| .SetLoadDataset(true) | |||
| .SetRowsPerBuffer(3) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list); | |||
| .SetLoadDataset(true) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list); | |||
| rc = builder.Build(&my_mindrecord_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -364,8 +358,7 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { | |||
| uint32_t num_repeats = 2; | |||
| std::shared_ptr<RepeatOp> my_repeat_op; | |||
| rc = RepeatOp::Builder(num_repeats) | |||
| .Build(&my_repeat_op); | |||
| rc = RepeatOp::Builder(num_repeats).Build(&my_repeat_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_repeat_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -375,7 +368,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { | |||
| rc = my_repeat_op->AddChild(my_mindrecord_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| // Set children/root layout. | |||
| rc = my_tree->AssignRoot(my_repeat_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -407,7 +399,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { | |||
| } | |||
| } | |||
| TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { | |||
| // single MindRecord op and nothing else | |||
| // | |||
| @@ -435,10 +426,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { | |||
| std::shared_ptr<MindRecordOp> my_mindrecord_op; | |||
| MindRecordOp::Builder builder; | |||
| builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) | |||
| .SetLoadDataset(true) | |||
| .SetRowsPerBuffer(3) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list); | |||
| .SetLoadDataset(true) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list); | |||
| rc = builder.Build(&my_mindrecord_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -449,8 +439,7 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { | |||
| uint32_t num_repeats = 2; | |||
| std::shared_ptr<RepeatOp> my_repeat_op; | |||
| rc = RepeatOp::Builder(num_repeats) | |||
| .Build(&my_repeat_op); | |||
| rc = RepeatOp::Builder(num_repeats).Build(&my_repeat_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_repeat_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -518,10 +507,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordInvalidColumnList) { | |||
| std::shared_ptr<MindRecordOp> my_mindrecord_op; | |||
| MindRecordOp::Builder builder; | |||
| builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) | |||
| .SetLoadDataset(true) | |||
| .SetRowsPerBuffer(3) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list); | |||
| .SetLoadDataset(true) | |||
| .SetNumMindRecordWorkers(4) | |||
| .SetColumnsToLoad(column_list); | |||
| rc = builder.Build(&my_mindrecord_op); | |||
| ASSERT_TRUE(rc.IsError()); | |||
| ASSERT_TRUE(rc.ToString().find_first_of("illegal column list") != std::string::npos); | |||
| @@ -42,7 +42,7 @@ using mindspore::MsLogLevel::ERROR; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false); | |||
| std::shared_ptr<RepeatOp> Repeat(int repeat_cnt); | |||
| @@ -57,7 +57,7 @@ std::shared_ptr<MnistOp> CreateMnist(int64_t num_wrks, int64_t rows, int64_t con | |||
| MnistOp::Builder builder; | |||
| Status rc = builder.SetNumWorkers(num_wrks) | |||
| .SetDir(path) | |||
| .SetRowsPerBuffer(rows) | |||
| .SetOpConnectorSize(conns) | |||
| .SetSampler(std::move(sampler)) | |||
| .Build(&so); | |||
| @@ -41,7 +41,7 @@ TEST_F(MindDataTestProjectOp, TestProjectProject) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); | |||
| builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| builder.SetDataSchema(std::move(schema)); | |||
| @@ -76,11 +76,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) { | |||
| std::shared_ptr<RandomDataOp> myRandomDataOp; | |||
| RandomDataOp::Builder builder; | |||
| rc = builder.SetRowsPerBuffer(2) | |||
| .SetNumWorkers(1) | |||
| .SetDataSchema(std::move(testSchema)) | |||
| .SetTotalRows(25) | |||
| .Build(&myRandomDataOp); | |||
| rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(25).Build(&myRandomDataOp); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = myTree->AssociateNode(myRandomDataOp); | |||
| @@ -134,9 +130,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) { | |||
| std::shared_ptr<RandomDataOp> myRandomDataOp; | |||
| RandomDataOp::Builder builder; | |||
| rc = builder.SetRowsPerBuffer(2) | |||
| .SetNumWorkers(1) | |||
| .Build(&myRandomDataOp); | |||
| rc = builder.SetNumWorkers(1).Build(&myRandomDataOp); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = myTree->AssociateNode(myRandomDataOp); | |||
| @@ -171,11 +165,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) { | |||
| std::shared_ptr<RandomDataOp> myRandomDataOp; | |||
| RandomDataOp::Builder builder; | |||
| rc = builder.SetRowsPerBuffer(2) | |||
| .SetNumWorkers(1) | |||
| .SetDataSchema(std::move(testSchema)) | |||
| .SetTotalRows(10) | |||
| .Build(&myRandomDataOp); | |||
| rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = myTree->AssociateNode(myRandomDataOp); | |||
| @@ -235,11 +225,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) { | |||
| std::shared_ptr<RandomDataOp> myRandomDataOp; | |||
| RandomDataOp::Builder builder; | |||
| rc = builder.SetRowsPerBuffer(2) | |||
| .SetNumWorkers(1) | |||
| .SetDataSchema(std::move(testSchema)) | |||
| .SetTotalRows(10) | |||
| .Build(&myRandomDataOp); | |||
| rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = myTree->AssociateNode(myRandomDataOp); | |||
| @@ -315,11 +301,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) { | |||
| std::shared_ptr<RandomDataOp> myRandomDataOp; | |||
| RandomDataOp::Builder builder; | |||
| rc = builder.SetRowsPerBuffer(2) | |||
| .SetNumWorkers(4) | |||
| .SetDataSchema(std::move(testSchema)) | |||
| .SetTotalRows(10) | |||
| .Build(&myRandomDataOp); | |||
| rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = myTree->AssociateNode(myRandomDataOp); | |||
| @@ -395,11 +377,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) { | |||
| std::shared_ptr<RandomDataOp> myRandomDataOp; | |||
| RandomDataOp::Builder builder; | |||
| rc = builder.SetRowsPerBuffer(2) | |||
| .SetNumWorkers(4) | |||
| .SetDataSchema(std::move(testSchema)) | |||
| .SetTotalRows(10) | |||
| .Build(&myRandomDataOp); | |||
| rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = myTree->AssociateNode(myRandomDataOp); | |||
| @@ -407,9 +385,9 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) { | |||
| std::shared_ptr<ShuffleOp> myShuffleOp; | |||
| rc = ShuffleOp::Builder() | |||
| .SetRowsPerBuffer(2) | |||
| .SetShuffleSize(4) | |||
| .Build(&myShuffleOp); | |||
| .SetShuffleSize(4) | |||
| .Build(&myShuffleOp); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = myTree->AssociateNode(myShuffleOp); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -53,10 +53,10 @@ TEST_F(MindDataTestRenameOp, TestRenameOpDefault) { | |||
| std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data"; | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(1) | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| @@ -46,7 +46,7 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromDatasetFuntions) { | |||
| std::shared_ptr<TextFileOp> file_op; | |||
| TextFileOp::Builder builder_file; | |||
| builder_file.SetTextFilesList({dataset_path}).SetRowsPerBuffer(1).SetNumWorkers(1).SetOpConnectorSize(2); | |||
| builder_file.SetTextFilesList({dataset_path}).SetNumWorkers(1).SetOpConnectorSize(2); | |||
| Status rc = builder_file.Build(&file_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -119,7 +119,7 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceTokenizerFuntions) { | |||
| std::shared_ptr<TextFileOp> file_op; | |||
| TextFileOp::Builder builder_file; | |||
| builder_file.SetTextFilesList({dataset_path}).SetRowsPerBuffer(1).SetNumWorkers(1).SetOpConnectorSize(2); | |||
| builder_file.SetTextFilesList({dataset_path}).SetNumWorkers(1).SetOpConnectorSize(2); | |||
| Status rc = builder_file.Build(&file_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -56,16 +56,16 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic1) { | |||
| dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data"; | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(1) | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| std::shared_ptr<ShuffleOp> my_shuffle_op; | |||
| rc = ShuffleOp::Builder().SetRowsPerBuffer(2).SetShuffleSize(4).Build(&my_shuffle_op); | |||
| rc = ShuffleOp::Builder().SetShuffleSize(4).Build(&my_shuffle_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_shuffle_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -130,7 +130,6 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(3) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(2) | |||
| .Build(&my_tfreader_op); | |||
| @@ -138,7 +137,7 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) { | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| std::shared_ptr<ShuffleOp> my_shuffle_op; | |||
| rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).SetRowsPerBuffer(3).Build(&my_shuffle_op); | |||
| rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).Build(&my_shuffle_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_shuffle_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -201,14 +200,13 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic3) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(3) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(2) | |||
| .Build(&my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| my_tree->AssociateNode(my_tfreader_op); | |||
| std::shared_ptr<ShuffleOp> my_shuffle_op; | |||
| rc = ShuffleOp::Builder().SetShuffleSize(100).SetRowsPerBuffer(3).Build(&my_shuffle_op); | |||
| rc = ShuffleOp::Builder().SetShuffleSize(100).Build(&my_shuffle_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_shuffle_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -275,7 +273,6 @@ TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(3) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(2) | |||
| .Build(&my_tfreader_op); | |||
| @@ -286,7 +283,6 @@ TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) { | |||
| rc = ShuffleOp::Builder() | |||
| .SetShuffleSize(4) | |||
| .SetShuffleSeed(100) | |||
| .SetRowsPerBuffer(3) | |||
| .SetReshuffleEachEpoch(true) | |||
| .Build(&my_shuffle_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -35,7 +35,7 @@ TEST_F(MindDataTestSkipOp, TestSkipOpFuntions) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); | |||
| builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| builder.SetDataSchema(std::move(schema)); | |||
| @@ -42,7 +42,7 @@ TEST_F(MindDataTestTakeOp, TestTakeProject) { | |||
| // TFReaderOp | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); | |||
| builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| builder.SetDataSchema(std::move(schema)); | |||
| @@ -45,7 +45,7 @@ TEST_F(MindDataTestTextFileOp, TestTextFileBasic) { | |||
| std::shared_ptr<TextFileOp> op; | |||
| TextFileOp::Builder builder; | |||
| builder.SetTextFilesList({dataset_path}).SetRowsPerBuffer(16).SetOpConnectorSize(2); | |||
| builder.SetTextFilesList({dataset_path}).SetOpConnectorSize(2); | |||
| Status rc = builder.Build(&op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -94,7 +94,7 @@ TEST_F(MindDataTestTextFileOp, TestTextFileFileNotExist) { | |||
| std::shared_ptr<TextFileOp> op; | |||
| TextFileOp::Builder builder; | |||
| builder.SetTextFilesList({dataset_path}).SetRowsPerBuffer(16).SetOpConnectorSize(2); | |||
| builder.SetTextFilesList({dataset_path}).SetOpConnectorSize(2); | |||
| Status rc = builder.Build(&op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -44,7 +44,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16); | |||
| builder.SetDatasetFilesList({dataset_path}); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| builder.SetDataSchema(std::move(schema)); | |||
| @@ -148,7 +148,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSmallRowsPerBuffer) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(1); | |||
| builder.SetDatasetFilesList({dataset_path}); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| builder.SetDataSchema(std::move(schema)); | |||
| @@ -200,7 +200,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeQueueSize) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(1).SetRowsPerBuffer(16); | |||
| builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(1); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| builder.SetDataSchema(std::move(schema)); | |||
| @@ -252,10 +252,11 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(16) | |||
| .SetNumWorkers(1); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| builder | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetNumWorkers(1); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| builder.SetDataSchema(std::move(schema)); | |||
| Status rc = builder.Build(&my_tfreader_op); | |||
| @@ -307,7 +308,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) { | |||
| // TFReaderOp | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); | |||
| builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); | |||
| builder.SetDataSchema(std::move(schema)); | |||
| @@ -378,7 +379,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path + "/test.data"}) | |||
| .SetRowsPerBuffer(16) | |||
| .SetNumWorkers(16) | |||
| .SetDataSchema(std::move(data_schema)); | |||
| Status rc = builder.Build(&my_tfreader_op); | |||
| @@ -605,7 +606,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasicNoSchema) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16); | |||
| builder.SetDatasetFilesList({dataset_path}); | |||
| Status rc = builder.Build(&my_tfreader_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| @@ -697,7 +698,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) { | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| TFReaderOp::Builder builder; | |||
| builder.SetDatasetFilesList({invalid_file, valid_file, schema_file}).SetRowsPerBuffer(16); | |||
| builder.SetDatasetFilesList({invalid_file, valid_file, schema_file}); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(schema_file, {}); | |||
| @@ -706,7 +707,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) { | |||
| Status rc = builder.Build(&my_tfreader_op); | |||
| ASSERT_TRUE(!rc.IsOk()); | |||
| builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file}).SetRowsPerBuffer(16); | |||
| builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file}); | |||
| schema = std::make_unique<DataSchema>(); | |||
| schema->LoadSchemaFile(schema_file, {}); | |||
| @@ -39,11 +39,11 @@ | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false); | |||
| std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); | |||
| @@ -61,8 +61,7 @@ TEST_F(MindDataTestVOCOp, TestVOCDetection) { | |||
| std::string task_mode("train"); | |||
| std::shared_ptr<VOCOp> my_voc_op; | |||
| VOCOp::Builder builder; | |||
| Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode) | |||
| .Build(&my_voc_op); | |||
| Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_voc_op); | |||
| @@ -87,7 +86,7 @@ TEST_F(MindDataTestVOCOp, TestVOCDetection) { | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; | |||
| //Display the tensor by calling the printer on it | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| @@ -112,8 +111,7 @@ TEST_F(MindDataTestVOCOp, TestVOCSegmentation) { | |||
| std::string task_mode("train"); | |||
| std::shared_ptr<VOCOp> my_voc_op; | |||
| VOCOp::Builder builder; | |||
| Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode) | |||
| .Build(&my_voc_op); | |||
| Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_voc_op); | |||
| @@ -138,7 +136,7 @@ TEST_F(MindDataTestVOCOp, TestVOCSegmentation) { | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; | |||
| //Display the tensor by calling the printer on it | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| @@ -168,9 +166,7 @@ TEST_F(MindDataTestVOCOp, TestVOCClassIndex) { | |||
| std::shared_ptr<VOCOp> my_voc_op; | |||
| VOCOp::Builder builder; | |||
| Status rc = | |||
| builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode) | |||
| .SetClassIndex(class_index) | |||
| .Build(&my_voc_op); | |||
| builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).SetClassIndex(class_index).Build(&my_voc_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_voc_op); | |||
| @@ -195,7 +191,7 @@ TEST_F(MindDataTestVOCOp, TestVOCClassIndex) { | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; | |||
| //Display the tensor by calling the printer on it | |||
| // Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| @@ -36,22 +36,21 @@ | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::MsLogLevel::INFO; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::MsLogLevel::INFO; | |||
| class MindDataTestZipOp : public UT::DatasetOpTesting { | |||
| }; | |||
| class MindDataTestZipOp : public UT::DatasetOpTesting {}; | |||
| TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) { | |||
| /* Tree: | |||
| * | |||
| * | |||
| * OpId(2) ZipOp | |||
| * / \ | |||
| * OpId(0) TFReaderOp OpId(1) TFReaderOp | |||
| * Start with an empty execution tree | |||
| */ | |||
| /* Tree: | |||
| * | |||
| * | |||
| * OpId(2) ZipOp | |||
| * / \ | |||
| * OpId(0) TFReaderOp OpId(1) TFReaderOp | |||
| * Start with an empty execution tree | |||
| */ | |||
| Status rc; | |||
| MS_LOG(INFO) << "UT test TestZipBasic."; | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| @@ -61,21 +60,19 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) { | |||
| std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data"; | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op); | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op2; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path2}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(1) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op2); | |||
| .SetDatasetFilesList({dataset_path2}) | |||
| .SetWorkerConnectorSize(1) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op2); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op2); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -123,20 +120,19 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) { | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 3); // Should be 3 rows fetched | |||
| ASSERT_EQ(row_count, 3); // Should be 3 rows fetched | |||
| } | |||
| TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) { | |||
| /* Tree: | |||
| * OpId(3) Repeat(3) | |||
| * | |||
| * OpId(2) ZipOp | |||
| * / \ | |||
| * OpId(0) TFReaderOp OpId(1) TFReaderOp | |||
| * | |||
| * Start with an empty execution tree | |||
| */ | |||
| /* Tree: | |||
| * OpId(3) Repeat(3) | |||
| * | |||
| * OpId(2) ZipOp | |||
| * / \ | |||
| * OpId(0) TFReaderOp OpId(1) TFReaderOp | |||
| * | |||
| * Start with an empty execution tree | |||
| */ | |||
| Status rc; | |||
| MS_LOG(INFO) << "UT test TestZipRepeat."; | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| @@ -146,21 +142,21 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) { | |||
| std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data"; | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op); | |||
| .SetDatasetFilesList({dataset_path}) | |||
| .SetWorkerConnectorSize(16) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| std::shared_ptr<TFReaderOp> my_tfreader_op2; | |||
| rc = TFReaderOp::Builder() | |||
| .SetDatasetFilesList({dataset_path2}) | |||
| .SetRowsPerBuffer(2) | |||
| .SetWorkerConnectorSize(1) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op2); | |||
| .SetDatasetFilesList({dataset_path2}) | |||
| .SetWorkerConnectorSize(1) | |||
| .SetNumWorkers(1) | |||
| .Build(&my_tfreader_op2); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_tfreader_op2); | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| @@ -221,5 +217,5 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) { | |||
| EXPECT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 9); // Should be 9 rows fetched | |||
| ASSERT_EQ(row_count, 9); // Should be 9 rows fetched | |||
| } | |||
| @@ -43,21 +43,18 @@ def test_basic(): | |||
| ds.config.load('../data/dataset/declient.cfg') | |||
| # assert ds.config.get_rows_per_buffer() == 32 | |||
| assert ds.config.get_num_parallel_workers() == 8 | |||
| # assert ds.config.get_worker_connector_size() == 16 | |||
| assert ds.config.get_prefetch_size() == 16 | |||
| assert ds.config.get_seed() == 5489 | |||
| assert ds.config.get_monitor_sampling_interval() == 15 | |||
| # ds.config.set_rows_per_buffer(1) | |||
| ds.config.set_num_parallel_workers(2) | |||
| # ds.config.set_worker_connector_size(3) | |||
| ds.config.set_prefetch_size(4) | |||
| ds.config.set_seed(5) | |||
| ds.config.set_monitor_sampling_interval(45) | |||
| # assert ds.config.get_rows_per_buffer() == 1 | |||
| assert ds.config.get_num_parallel_workers() == 2 | |||
| # assert ds.config.get_worker_connector_size() == 3 | |||
| assert ds.config.get_prefetch_size() == 4 | |||