| @@ -90,7 +90,7 @@ Status BarrierOp::blockCond() { | |||
| { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| if (Py_IsInitialized() == 0) { | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); | |||
| } | |||
| // we have condition name, however the flexibility is in python today | |||
| try { | |||
| @@ -99,7 +99,8 @@ Status BarrierOp::blockCond() { | |||
| // Process the return value | |||
| if (!py::isinstance<py::bool_>(ret_py_obj)) { | |||
| return Status(StatusCode::kMDPyFuncException, | |||
| "Invalid parameter, condition wait function should return true/false."); | |||
| "Invalid parameter, condition wait function should return boolean, but got " + | |||
| std::string(ret_py_obj.get_type().str())); | |||
| } | |||
| } catch (const py::error_already_set &e) { | |||
| return Status(StatusCode::kMDPyFuncException, e.what()); | |||
| @@ -134,7 +134,7 @@ Status BatchOp::operator()() { | |||
| if ((num_workers_ > 1 || batch_map_func_) && GetMemoryUsage() > MAX_MEMORY_USAGE_THRESHOLD) { | |||
| MS_LOG(WARNING) << "Memory consumption is more than " << (GetMemoryUsage() * 100) << "%, " | |||
| << "which may cause oom error. Please reduce num_parallel_workers size / " | |||
| << "optimize per_batch_map function / other python data preprocess function to " | |||
| << "optimize 'per_batch_map' function / other python data preprocess function to " | |||
| << "reduce memory usage."; | |||
| } | |||
| #endif | |||
| @@ -203,8 +203,9 @@ Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, TensorRow *d | |||
| first_shape.Print(shape1); | |||
| old_tensor->shape().Print(shape2); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, batch operation expect same shape for each data row, but got inconsistent shape in column " + | |||
| std::to_string(i) + " expected shape for this column is:" + shape1.str() + ", got shape:" + shape2.str()); | |||
| "Inconsistent batch shapes, batch operation expect same shape for each data row, " | |||
| "but got inconsistent shape in column " + | |||
| std::to_string(i) + ", expected shape for this column is:" + shape1.str() + ", got shape:" + shape2.str()); | |||
| } | |||
| } | |||
| } else { // handle string column differently | |||
| @@ -300,7 +301,7 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_rows == out_cols[i].size(), | |||
| "Invalid data, column: " + out_col_names_[i] + | |||
| " expects: " + std::to_string(num_rows) + | |||
| " rows returned from per_batch_map, got: " + std::to_string(out_cols[i].size())); | |||
| " rows returned from 'per_batch_map', got: " + std::to_string(out_cols[i].size())); | |||
| for (auto &t_row : *out_q_table) { | |||
| t_row[col_id] = out_cols[i][row_id++]; | |||
| } | |||
| @@ -339,14 +340,16 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { | |||
| *batch_size = size.cast<int32_t>(); | |||
| if (*batch_size <= 0) { | |||
| return Status(StatusCode::kMDPyFuncException, | |||
| "Invalid parameter, batch_size function should return an integer greater than 0, but got: " + | |||
| "Invalid batch_size function, 'batch_size' function should return an integer greater than 0, " | |||
| "but got: " + | |||
| std::to_string(*batch_size)); | |||
| } | |||
| } catch (const py::error_already_set &e) { | |||
| return Status(StatusCode::kMDPyFuncException, e.what()); | |||
| } catch (const py::cast_error &e) { | |||
| return Status(StatusCode::kMDPyFuncException, | |||
| "Invalid parameter, batch_size function should return an integer greater than 0."); | |||
| return Status( | |||
| StatusCode::kMDPyFuncException, | |||
| "Invalid batch_size function, the return value of batch_size function cast failed: " + std::string(e.what())); | |||
| } | |||
| } | |||
| return Status(StatusCode::kSuccess, "batch_size function call succeeded."); | |||
| @@ -379,11 +382,13 @@ Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBat | |||
| // Parse batch map return value | |||
| py::tuple ret_tuple = py::cast<py::tuple>(ret_py_obj); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(py::isinstance<py::tuple>(ret_tuple), | |||
| "per_batch_map function should return a tuple."); | |||
| "Invalid per_batch_map, 'per_batch_map' function should return a tuple, but got " + | |||
| std::string(ret_py_obj.get_type().str())); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret_tuple.size() == out_col_names_.size(), | |||
| "Incorrect number of columns returned in per_batch_map function. Expects: " + | |||
| "Invalid per_batch_map, the number of columns returned in 'per_batch_map' function " | |||
| "should be " + | |||
| std::to_string(out_col_names_.size()) + | |||
| " got: " + std::to_string(ret_tuple.size())); | |||
| " , but got: " + std::to_string(ret_tuple.size())); | |||
| for (size_t i = 0; i < ret_tuple.size(); i++) { | |||
| TensorRow output_batch; | |||
| // If user returns a type that is neither a list nor an array, issue a error msg. | |||
| @@ -405,7 +410,8 @@ Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBat | |||
| return Status(StatusCode::kMDPyFuncException, e.what()); | |||
| } catch (const py::cast_error &e) { | |||
| return Status(StatusCode::kMDPyFuncException, | |||
| "Invalid parameter, per_batch_map function of batch should return a tuple of list of numpy array."); | |||
| "Invalid per_batch_map, the return value of 'per_batch_map' function cast to py::tuple failed: " + | |||
| std::string(e.what())); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| @@ -432,7 +438,7 @@ Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo & | |||
| if (pad_shapes[col_id].empty()) pad_shapes[col_id] = max_shapes[col_id]; // fill pad shape with -1 | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| pad_shapes[col_id].size() == max_shapes[col_id].size(), | |||
| "Invalid data, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" + | |||
| "Invalid pad_info, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" + | |||
| std::to_string(pad_shapes[col_id].size()) + ", column rank: " + std::to_string(max_shapes[col_id].size())); | |||
| } | |||
| @@ -482,12 +488,14 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info, | |||
| for (const auto &p : pad_info) { | |||
| auto location = column_name_id_map.find(p.first); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(location != column_name_id_map.end(), | |||
| "Invalid parameter, column name: " + p.first + " does not exist."); | |||
| "Invalid pad_info, column name: " + p.first + " does not exist."); | |||
| auto col_id = static_cast<dsize_t>(location->second); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| col_id < pad_vals->size() && col_id < pad_shapes->size(), | |||
| "Invalid parameter, column id must be less than the size of pad_val and pad_shape, but got: " + | |||
| std::to_string(col_id)); | |||
| "Invalid pad_info, column name should be match with the size of pad value and pad shape, but got " | |||
| "column name: " + | |||
| p.first + ", the size of pad value: " + std::to_string(pad_vals->size()) + | |||
| " and the size of pad shape: " + std::to_string(pad_shapes->size()) + "."); | |||
| pad_cols->insert(col_id); | |||
| (*pad_vals)[col_id] = p.second.second; // set pad values | |||
| (*pad_shapes)[col_id] = p.second.first.AsVector(); // empty vector if shape is unknown | |||
| @@ -498,8 +506,9 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info, | |||
| Status BatchOp::ComputeColMap() { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(child_.size() == 1, | |||
| "Invalid data, batch operator can't be used as a single operator, " | |||
| "should be preceded by an operator that reads data, for example, ImageFolderDataset."); | |||
| "Invalid batch, batch operator can't be used as a single operator, " | |||
| "should be preceded by an operator that reads data, for example, " | |||
| "ds1 = ds.ImageFolderDataset().batch()."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!(child_[0]->column_name_id_map().empty()), | |||
| "Invalid data, the column of the previous operator of the batch cannot be empty."); | |||
| @@ -514,7 +523,7 @@ Status BatchOp::ComputeColMap() { | |||
| // check all input columns exist | |||
| for (const auto &col : in_col_names_) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(child_map_.find(col) != child_map_.end(), | |||
| "Invalid parameter, col:" + col + " doesn't exist in dataset."); | |||
| "Invalid input_columns, '" + col + "' of 'input_columns' doesn't exist."); | |||
| } | |||
| // following logic deals with per_batch_map | |||
| @@ -551,8 +560,21 @@ Status BatchOp::ComputeColMap() { | |||
| } | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(column_name_id_map_.size() == (child_map_no_in_col.size() + out_col_names_.size()), | |||
| "Key error in column_name_id_map_. output_columns in batch is not set correctly!"); | |||
| if (column_name_id_map_.size() != (child_map_no_in_col.size() + out_col_names_.size())) { | |||
| const std::string prefix_str = std::string("["); | |||
| auto column_no_in_col = std::accumulate( | |||
| child_map_no_in_col.begin(), child_map_no_in_col.end(), prefix_str, | |||
| [](const std::string &str, const std::pair<std::string, int32_t> &p) { return str + p.first + ","; }); | |||
| column_no_in_col += "]"; | |||
| auto column_out = | |||
| std::accumulate(out_col_names_.begin(), out_col_names_.end(), prefix_str, | |||
| [](const std::string &str, const std::string &out_col) { return str + out_col + ","; }); | |||
| column_out += "]"; | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid output_columns, columns that are not involved in 'per_batch_map' should not be " | |||
| "in output_columns, but got columns that are not in input_columns: " + | |||
| column_no_in_col + ", output_columns: " + column_out + "."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -108,7 +108,7 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T | |||
| for (size_t i = 0; i < number_of_arguments; i++) { | |||
| auto map_item = column_name_id_map_.find(length_dependent_columns_[i]); | |||
| if (map_item == column_name_id_map_.end()) { | |||
| RETURN_STATUS_UNEXPECTED("BucketBatchByLength: Couldn't find the specified column(" + | |||
| RETURN_STATUS_UNEXPECTED("Invalid column, BucketBatchByLength couldn't find the specified column(" + | |||
| length_dependent_columns_[i] + ") in the dataset."); | |||
| } | |||
| int32_t column_index = map_item->second; | |||
| @@ -118,7 +118,8 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T | |||
| RETURN_IF_NOT_OK(output.at(0)->GetItemAt(out_element_length, {0})); | |||
| if (*out_element_length < 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid parameter, element_length_function must return an integer greater than or equal to 0, but got" + | |||
| "Invalid element_length_function, element_length_function must return an integer greater than or equal to 0, " | |||
| "but got" + | |||
| std::to_string(*out_element_length)); | |||
| } | |||
| } else { | |||
| @@ -139,7 +140,8 @@ Status BucketBatchByLengthOp::PadAndBatchBucket(int32_t bucket_index, int32_t ba | |||
| if (pad_shape[i] == TensorShape::kDimUnknown) { | |||
| if (bucket_index + 1 >= bucket_boundaries_.size()) { | |||
| std::string error_message = | |||
| "Invalid data, requested to pad to bucket boundary, element falls in last bucket."; | |||
| "Invalid data, requested to pad to bucket boundary failed, bucket index should be less than " + | |||
| std::to_string(bucket_boundaries_.size()) + ", but got " + std::to_string(bucket_index); | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, error_message); | |||
| } | |||
| @@ -41,7 +41,8 @@ BuildSentencePieceVocabOp::BuildSentencePieceVocabOp(std::shared_ptr<SentencePie | |||
| Status BuildSentencePieceVocabOp::operator()() { | |||
| if (tree_ == nullptr) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "[Internal ERROR] Pipeline init failed, Execution tree not set."); | |||
| } | |||
| RETURN_IF_NOT_OK(sentence_queue_->Register(tree_->AllTasks())); | |||
| RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask( | |||
| @@ -58,7 +59,7 @@ Status BuildSentencePieceVocabOp::operator()() { | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| } | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no operator should be after from_dataset (repeat detected)"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "'build_sentencepiece_vocab' does not support 'repeat'.)"); | |||
| eoe_warning = true; | |||
| } | |||
| // add empty tensorRow for quit | |||
| @@ -71,13 +72,13 @@ Status BuildSentencePieceVocabOp::SentenceThread() { | |||
| TaskManager::FindMe()->Post(); | |||
| if (col_names_.empty() == true) { | |||
| auto itr = column_name_id_map_.find("text"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), | |||
| "Invalid data, 'text' column does not exist in dataset."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid data, 'text' column does not exist."); | |||
| col_id_ = itr->second; | |||
| } else { | |||
| auto itr = column_name_id_map_.find(col_names_[0]); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), | |||
| "Invalid parameter, column name: " + col_names_[0] + " does not exist in dataset."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid column, column name: " + col_names_[0] + | |||
| " does not exist, check existed " | |||
| "column with dataset API 'get_col_names'"); | |||
| col_id_ = itr->second; | |||
| } | |||
| std::unique_ptr<DatasetSentenceIterator> sentence_iter = std::make_unique<DatasetSentenceIterator>(this); | |||
| @@ -89,7 +90,7 @@ Status BuildSentencePieceVocabOp::SentenceThread() { | |||
| } else { | |||
| if (vocab_ == nullptr) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "Invalid parameter, SentencePiece vocab not set."); | |||
| "[Internal ERROR] SentencePiece vocab should not be null."); | |||
| } | |||
| vocab_->set_model_proto(model_proto); | |||
| } | |||
| @@ -131,7 +132,7 @@ bool BuildSentencePieceVocabOp::Done() { return read_done_; } | |||
| void BuildSentencePieceVocabOp::Next(std::string *sentence) { | |||
| if (sentence == nullptr) { | |||
| MS_LOG(ERROR) << "BuildSentencePieceVocab get nullptr element, please check data."; | |||
| MS_LOG(ERROR) << "[Internal ERROR] BuildSentencePieceVocab get nullptr element, please check data."; | |||
| return; | |||
| } | |||
| TensorRow new_row; | |||
| @@ -151,8 +152,8 @@ void BuildSentencePieceVocabOp::Next(std::string *sentence) { | |||
| if (new_row[col_id_]->type().IsNumeric() || new_row[col_id_]->Rank() > 1) { | |||
| ret_status_ = | |||
| Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "Invalid data, build_sentence_piece_vocab only works on string data with rank equal to 1, got type: " + | |||
| new_row[col_id_]->type().ToString() + "and rank: " + std::to_string(new_row[col_id_]->Rank())); | |||
| "Invalid data, build_sentence_piece_vocab only supports string data with rank equal to 1, but got type: " + | |||
| new_row[col_id_]->type().ToString() + ", rank: " + std::to_string(new_row[col_id_]->Rank())); | |||
| read_done_ = true; | |||
| return; | |||
| } | |||
| @@ -69,7 +69,9 @@ class BuildSentencePieceVocabOp : public PipelineOp { | |||
| Status operator()() override; | |||
| Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildSentencePieceVocabOp"); } | |||
| Status Reset() override { | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Reset shouldn't be called in BuildSentencePieceVocabOp."); | |||
| } | |||
| std::string Name() const override { return kBuildSentencePieceVocabOp; } | |||
| @@ -54,7 +54,8 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) { | |||
| while (!new_row.empty()) { | |||
| for (int32_t col : col_ids_) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!new_row[col]->type().IsNumeric(), | |||
| "Invalid data, build_vocab only works on string data, but got numeric data type: " + | |||
| "Invalid datatype, 'build_vocab' only supports string type of input, but got " | |||
| "numeric type: " + | |||
| new_row[col]->type().ToString()); | |||
| for (auto itr = new_row[col]->begin<std::string_view>(); itr != new_row[col]->end<std::string_view>(); ++itr) { | |||
| (*wrkr_map)[std::string(*itr)] += 1; | |||
| @@ -79,7 +80,8 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) { | |||
| Status BuildVocabOp::operator()() { | |||
| // launch the collector thread | |||
| if (tree_ == nullptr) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "[Internal ERROR] Pipeline init failed, Execution tree not set."); | |||
| } | |||
| RETURN_IF_NOT_OK(distributor_queue_->Register(tree_->AllTasks())); | |||
| RETURN_IF_NOT_OK(collector_queue_->Register(tree_->AllTasks())); | |||
| @@ -96,8 +98,9 @@ Status BuildVocabOp::operator()() { | |||
| col_ids_.reserve(col_names_.size()); | |||
| for (std::string col : col_names_) { | |||
| auto itr = column_name_id_map_.find(col); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), | |||
| "Invalid parameter, column name: " + col + " does not exist in dataset."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid column name, column name: " + col + | |||
| " does not exist, check existed column " | |||
| "with dataset API 'get_col_names'"); | |||
| col_ids_.push_back(itr->second); | |||
| } | |||
| } else { | |||
| @@ -113,7 +116,8 @@ Status BuildVocabOp::operator()() { | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| } | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no operator should be after from_dataset (repeat detected)"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, | |||
| "Invalid repeat operator, BuildVocab does not support 'repeat' operator."); | |||
| eoe_warning = true; | |||
| } | |||
| @@ -137,7 +141,8 @@ Status BuildVocabOp::CollectorThread() { | |||
| ++num_quited_worker; | |||
| } | |||
| } // all frequencies are obtained | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), "Invalid data, there are no words in the dataset."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), | |||
| "Invalid data, BuildVocab load data failed that no words found in vocab, check vocab."); | |||
| std::vector<std::string> words; | |||
| // make sure enough is reserved, this will become a partially sorted list eventually | |||
| words.reserve(wrkr_map->size()); | |||
| @@ -158,7 +163,7 @@ Status BuildVocabOp::CollectorThread() { | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(err_msg.empty(), | |||
| "Invalid data, these special words are already in the dataset: " + err_msg + "."); | |||
| "Invalid special words, these special words are already in the vocab: " + err_msg + "."); | |||
| int64_t num_words = std::min(static_cast<int64_t>(words.size()), top_k_); | |||
| if (num_words == 0) { | |||
| @@ -66,7 +66,7 @@ class BuildVocabOp : public ParallelOp<TensorRow, TensorRow> { | |||
| Status operator()() override; | |||
| Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildVocabOp"); } | |||
| Status Reset() override { RETURN_STATUS_UNEXPECTED("[Internal ERROR] Reset shouldn't be called in BuildVocabOp"); } | |||
| private: | |||
| const int32_t interval_; | |||
| @@ -191,7 +191,7 @@ Status CacheBase::FetchFromCache(int32_t worker_id) { | |||
| if (AllowCacheMiss()) { | |||
| ++num_cache_miss_; | |||
| } else { | |||
| std::string errMsg = "Row id " + std::to_string(row_id) + " not found."; | |||
| std::string errMsg = "[Internal ERROR] Row id " + std::to_string(row_id) + " not found."; | |||
| RETURN_STATUS_UNEXPECTED(errMsg); | |||
| } | |||
| } | |||
| @@ -225,7 +225,8 @@ Status CacheBase::UpdateColumnMapFromCache() { | |||
| Status CacheBase::GetPrefetchRow(row_id_type row_id, TensorRow *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(row_id >= 0, "Expect positive row id, but got:" + std::to_string(row_id)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(row_id >= 0, | |||
| "[Internal ERROR] Expect positive row id, but got:" + std::to_string(row_id)); | |||
| RETURN_IF_NOT_OK(prefetch_.PopFront(row_id, out)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -278,7 +279,7 @@ Status CacheBase::Prefetcher(int32_t worker_id) { | |||
| cache_miss.clear(); | |||
| std::unique_ptr<IOBlock> blk; | |||
| RETURN_IF_NOT_OK(prefetch_queues_[worker_id]->PopFront(&blk)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!blk->eof(), "Expect eoe or a regular io block."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!blk->eof(), "[Internal ERROR] Expect eoe or a regular io block."); | |||
| if (!blk->eoe()) { | |||
| RETURN_IF_NOT_OK(blk->GetKeys(&prefetch_keys)); | |||
| Status rc; | |||
| @@ -29,7 +29,7 @@ namespace dataset { | |||
| Status CacheLookupOp::operator()() { | |||
| if (!sampler_) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "Invalid parameter, CacheLookupOp requires a sampler before it can be executed, but got nullptr."); | |||
| "Invalid sampler, Cache requires a sampler before it can be executed, but got nullptr."); | |||
| } | |||
| RETURN_IF_NOT_OK(RegisterResources()); | |||
| @@ -147,7 +147,8 @@ Status CacheMergeOp::CacheMissWorkerEntry(int32_t workerId) { | |||
| } else { | |||
| row_id_type row_id = new_row.getId(); | |||
| if (row_id < 0) { | |||
| std::string errMsg = "Expect positive row id, but got: " + std::to_string(row_id); | |||
| std::string errMsg = | |||
| "[Internal ERROR] row id should be greater than or equal to 0, but got: " + std::to_string(row_id); | |||
| RETURN_STATUS_UNEXPECTED(errMsg); | |||
| } | |||
| if (cache_missing_rows_) { | |||
| @@ -213,7 +214,8 @@ Status CacheMergeOp::PrepareOperator() { // Run any common code from super clas | |||
| // specific logic | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| child_.size() == kNumChildren, | |||
| "Incorrect number of children of CacheMergeOp, required num is 2, but got:" + std::to_string(child_.size())); | |||
| "[Internal ERROR] Incorrect number of children of CacheMergeOp, required num is 2, but got:" + | |||
| std::to_string(child_.size())); | |||
| RETURN_IF_NOT_OK(DatasetOp::PrepareOperator()); | |||
| // Get the computed check sum from all ops in the cache miss class | |||
| uint32_t cache_crc = DatasetOp::GenerateCRC(child_[kCacheMissChildIdx]); | |||
| @@ -231,7 +233,7 @@ Status CacheMergeOp::PrepareOperator() { // Run any common code from super clas | |||
| } | |||
| Status CacheMergeOp::ComputeColMap() { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "Invalid data, cache miss stream is empty."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "[Internal ERROR] cache miss stream is empty."); | |||
| if (column_name_id_map().empty()) { | |||
| column_name_id_map_ = child_[kCacheMissChildIdx]->column_name_id_map(); | |||
| } | |||
| @@ -270,7 +272,7 @@ Status CacheMergeOp::GetRq(row_id_type row_id, CacheMergeOp::TensorRowCacheReque | |||
| RETURN_IF_NOT_OK(mem.allocate(1)); | |||
| *out = mem.GetMutablePointer(); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, map insert fail."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] map insert fail."); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| @@ -43,7 +43,7 @@ Status CacheOp::operator()() { | |||
| RETURN_UNEXPECTED_IF_NULL(tree_); | |||
| if (!sampler_) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "Invalid parameter, CacheOp requires a sampler before it can be executed, but got nullptr."); | |||
| "Invalid sampler, CacheOp requires a sampler before it can be executed, but got nullptr."); | |||
| } | |||
| RETURN_IF_NOT_OK(RegisterResources()); | |||
| @@ -145,9 +145,9 @@ Status CacheOp::WaitForCachingAllRows() { | |||
| BuildPhaseDone = true; | |||
| break; | |||
| case CacheServiceState::kOutOfMemory: | |||
| return Status(StatusCode::kMDOutOfMemory, "Cache server is running out of memory"); | |||
| return Status(StatusCode::kMDOutOfMemory, "Cache server is running out of memory, check memory usage."); | |||
| case CacheServiceState::kNoSpace: | |||
| return Status(StatusCode::kMDNoSpace, "Cache server is running of out spill storage"); | |||
| return Status(StatusCode::kMDNoSpace, "Cache server is running of out spill storage, check memory usage."); | |||
| case CacheServiceState::kNone: | |||
| case CacheServiceState::kError: | |||
| default: | |||
| @@ -74,9 +74,17 @@ Status ConcatOp::Verify(int32_t id, const TensorRow &new_row) { | |||
| // Compare the data type and data rank with these in child[0] | |||
| int32_t index = 0; | |||
| for (auto item : new_row) { | |||
| if ((item->type() != data_type_[index]) || item->Rank() != data_rank_[index++]) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, data type or data rank is not the same with previous dataset."); | |||
| if (item->type() != data_type_[index]) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid datatype, the data type of two datasets concated should be the same, but got " + | |||
| item->type().ToString() + " and " + data_type_[index].ToString() + "."); | |||
| } | |||
| if (item->Rank() != data_rank_[index]) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid datatype, the data rank of two datasets concated should be the same, but got " + | |||
| std::to_string(item->Rank()) + " and " + std::to_string(data_rank_[index]) + "."); | |||
| } | |||
| index++; | |||
| } | |||
| } | |||
| verified_ = true; | |||
| @@ -89,12 +97,13 @@ Status ConcatOp::ComputeColMap() { | |||
| // Obtain columns_name_id_map from child_[0] | |||
| column_name_id_map_ = child_[0]->column_name_id_map(); | |||
| if (column_name_id_map_.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!"); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Child column name map cannot be empty!"); | |||
| } | |||
| // Verify all children have the same column name map | |||
| for (size_t i = 0; i < child_.size(); ++i) { | |||
| if (child_[i]->column_name_id_map() != column_name_id_map_) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column name or column order is not the same with previous dataset."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid columns, 'column name' or 'column order' of concat datasets should be the same."); | |||
| } | |||
| } | |||
| } else { | |||
| @@ -118,7 +127,7 @@ Status ConcatOp::GetNumClasses(int64_t *num_classes) { | |||
| *num_classes = max_num_classes; | |||
| return Status::OK(); | |||
| } | |||
| Status ConcatOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } | |||
| Status ConcatOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ConcatOp is an inlined operator."); } | |||
| bool ConcatOp::IgnoreSample() { | |||
| bool is_not_mappable_or_second_ne_zero = true; | |||
| @@ -184,10 +193,10 @@ Status ConcatOp::GetNextRow(TensorRow *row) { | |||
| return Status::OK(); | |||
| } | |||
| if (row->eof()) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(cur_child_ == 0, "Received an unexpected EOF."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(cur_child_ == 0, "[Internal ERROR] Received an unexpected EOF."); | |||
| for (int32_t i = cur_child_ + 1; i < child_.size(); i++) { | |||
| RETURN_IF_NOT_OK(child_[i]->GetNextRow(row)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(row->eof(), "Row must be an EOF."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(row->eof(), "[Internal ERROR] Row must be an EOF."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -63,7 +63,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) { | |||
| } | |||
| if (operator_id_ == kInvalidOperatorId) { | |||
| std::string err_msg( | |||
| "Cannot add child node. Tree node connections can only " | |||
| "[Internal ERROR] Cannot add child node. Tree node connections can only " | |||
| "be made if the node belongs to a tree."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -71,7 +71,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) { | |||
| // disallow relationships with other trees | |||
| if (tree_ != child->tree_) { | |||
| std::string err_msg( | |||
| "Cannot add child node. Tree node connections can only be made if both nodes belong to the same tree."); | |||
| "Invalid operator structure, the relationship of operators should be one by one, but got too many branches."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| child_.push_back(child); | |||
| @@ -82,7 +82,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) { | |||
| Status DatasetOp::RemoveChild(std::shared_ptr<DatasetOp> child) { | |||
| if (operator_id_ == kInvalidOperatorId) { | |||
| std::string err_msg( | |||
| "Cannot remove child node. Tree node connections can only " | |||
| "[Internal ERROR] Cannot remove child node. Tree node connections can only " | |||
| "be made if the node belongs to a tree."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -90,7 +90,7 @@ Status DatasetOp::RemoveChild(std::shared_ptr<DatasetOp> child) { | |||
| // disallow relationships with other trees | |||
| if (tree_ != child->tree_) { | |||
| std::string err_msg( | |||
| "Cannot remove child node. Tree node connections can only be made if both nodes belong to the same tree."); | |||
| "Invalid operator structure, the relationship of operators should be one by one, but got too many branches."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -132,11 +132,15 @@ void DatasetOp::RemoveParent(const DatasetOp *parent) { | |||
| // Removes this node from the tree and connects it's parent/child together | |||
| Status DatasetOp::Remove() { | |||
| if (parent_.size() > 1) { | |||
| std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one."); | |||
| std::string err_msg( | |||
| "Invalid operator structure, the relationship between operators should be one-to-one, but encountered more than " | |||
| "one parent, namely: " + | |||
| std::to_string(parent_.size())); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| if (child_.size() > 1) { | |||
| std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one."); | |||
| std::string err_msg( | |||
| "Invalid operator structure, the relationship of operators should be one by one, but got too many branches."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -151,7 +155,8 @@ Status DatasetOp::Remove() { | |||
| // If we have a parent, then assign child's parent to point to our parent. | |||
| if (!parent_.empty()) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(parent_[0]->Children().size() == 1, | |||
| "Removing a node whose parent has more than 1 child is not supported."); | |||
| "Invalid operator structure, the relationship of operators should be one by one, " | |||
| "but got too many branches."); | |||
| child_[0]->parent_[0] = parent_[0]; | |||
| } else { | |||
| // We don't have a parent, so we are the root node being removed. | |||
| @@ -293,7 +298,8 @@ Status DatasetOp::GetClassIndexing(std::vector<std::pair<std::string, std::vecto | |||
| return child_[child_.size() - 1]->GetClassIndexing(output_class_indexing); | |||
| } else { | |||
| *output_class_indexing = {}; | |||
| RETURN_STATUS_UNEXPECTED("Trying to get class index from leaf node, missing override."); | |||
| RETURN_STATUS_UNEXPECTED("Unsupported scenario, GetClassIndexing failed for " + Name() + | |||
| " doesn't support GetClassIndexing yet."); | |||
| } | |||
| } | |||
| @@ -343,12 +349,14 @@ std::string DatasetOp::ColumnNameMapAsString() const { | |||
| // Operations changing the column map must overwrite this function. | |||
| Status DatasetOp::ComputeColMap() { | |||
| if (child_.size() > 1) { | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR], no support for the relationship between operators is not one-to-one."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid operator structure, the relationship of operators should be one by one, but got too many branches."); | |||
| } | |||
| if (column_name_id_map_.empty()) { | |||
| column_name_id_map_ = child_[0]->column_name_id_map(); | |||
| if (column_name_id_map_.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!"); | |||
| RETURN_STATUS_UNEXPECTED("Invalid column list, the column list of " + child_[0]->Name() + | |||
| " should have one column at least, but got empty."); | |||
| } | |||
| MS_LOG(DEBUG) << "Setting column map:\n" << DatasetOp::ColumnNameMapAsString(); | |||
| } else { | |||
| @@ -119,8 +119,8 @@ Status DeviceQueueOp::FilterMetadata(TensorRow *row) { | |||
| Status DeviceQueueOp::CheckExceptions(const TensorRow &row) const { | |||
| // this method checks if the row meets the conditions to be sent to TDT | |||
| for (const auto &item : row) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid data, cannot send string tensor to device."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, cannot send tensor with no data to device."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid datatype, cannot send string data to device."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, the data send to device is null."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -152,7 +152,8 @@ Status DeviceQueueOp::operator()() { | |||
| } | |||
| } | |||
| if (tdtInstancePtr->acl_handle_ == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Create channel for sending data failed, please check DEVICE ID setting."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "[Internal ERROR] Create channel for sending data failed, please check DEVICE ID setting."); | |||
| } | |||
| RETURN_IF_NOT_OK(SendDataToAscend()); | |||
| #endif | |||
| @@ -343,7 +344,8 @@ Status DeviceQueueOp::SendRowToTdt(TensorRow curr_row, bool is_profiling_enable, | |||
| #ifdef ENABLE_TDTQUE | |||
| Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) { | |||
| if (!create_data_info_queue_) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "DataInfo queue is not created."); | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "[Internal ERROR] DataInfo queue is not created."); | |||
| } | |||
| // This place has a race condition with operator(), so the first one | |||
| // arrive here will do the initialize work. | |||
| @@ -359,7 +361,7 @@ Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) { | |||
| } | |||
| #else | |||
| Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "GetDataInfo is not supported yet."); | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "'GetDataInfo' only supported on Ascend."); | |||
| } | |||
| #endif | |||
| @@ -446,7 +448,7 @@ Status DeviceQueueOp::PushDataToGPU() { | |||
| if (!ps::PsDataPrefetch::GetInstance().PrefetchData(channel_name_, items[0].data_ptr_, items[0].data_len_, | |||
| items[0].data_type_)) { | |||
| return Status(StatusCode::kMDTimeOut, __LINE__, __FILE__, | |||
| "Failed to prefetch data in current PS mode(cache data when sending)."); | |||
| "[Internal ERROR] Failed to prefetch data in current PS mode(cache data when sending)."); | |||
| } | |||
| RETURN_IF_NOT_OK(RetryPushData(handle, items)); | |||
| #ifndef ENABLE_SECURITY | |||
| @@ -623,18 +625,19 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items, | |||
| for (auto &sub_item : *items) { | |||
| auto rc = pool_[worker_id]->Allocate(sub_item.data_len_, &sub_item.data_ptr_); | |||
| if (rc.IsError() || sub_item.data_ptr_ == nullptr) { | |||
| return Status(StatusCode::kMDOutOfMemory, __LINE__, __FILE__, "Memory malloc failed."); | |||
| return Status(StatusCode::kMDOutOfMemory, __LINE__, __FILE__, "Memory malloc failed, check memory usage."); | |||
| } | |||
| if (curr_row[i] == nullptr) { | |||
| MS_LOG(ERROR) << "The pointer curr_row[" << i << "] is null"; | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "TensorRow 'curr_row' contains nullptr."); | |||
| MS_LOG(ERROR) << "[Internal ERROR] The pointer curr_row[" << i << "] is null"; | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "[Internal ERROR] TensorRow 'curr_row' contains nullptr."); | |||
| } | |||
| sub_item.data_type_ = curr_row[i]->type().ToString(); | |||
| const unsigned char *column_data = curr_row[i]->GetBuffer(); | |||
| if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data, | |||
| static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) { | |||
| MS_LOG(ERROR) << "memcpy_s failed!"; | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "memcpy failed when using memcpy_s do copy."); | |||
| MS_LOG(ERROR) << "[Internal ERROR] memcpy_s failed."; | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "[Internal ERROR] memcpy_s failed."); | |||
| } | |||
| } | |||
| @@ -43,7 +43,7 @@ void EpochCtrlOp::Print(std::ostream &out, bool show_all) const { | |||
| Status EpochCtrlOp::GetNextRow(TensorRow *row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| if (child_.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("EpochCtrlOp can't be the leaf node(first operator) of pipeline."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] EpochCtrlOp can't be the leaf node(first operator) of pipeline."); | |||
| } | |||
| // `retry_if_eoe` is false because EpochCtrlOp does not eat EOE. | |||
| @@ -143,7 +143,7 @@ Status FilterOp::WorkerCompute(const TensorRow &in_row, bool *out_predicate) { | |||
| Status FilterOp::CheckInput(const TensorRow &input) const { | |||
| for (auto &item : input) { | |||
| if (item == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, input tensor is null."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] input tensor is null."); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| @@ -192,7 +192,7 @@ Status MapOp::WorkerEntry(int32_t worker_id) { | |||
| } | |||
| RETURN_IF_NOT_OK(worker_out_queues_[worker_id]->EmplaceBack(std::move(in_row))); | |||
| } else { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in_row.size() != 0, "MapOp got an empty TensorRow."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in_row.size() != 0, "[Internal ERROR] MapOp got an empty TensorRow."); | |||
| TensorRow out_row; | |||
| // Perform the compute function of TensorOp(s) and store the result in new_tensor_table. | |||
| RETURN_IF_NOT_OK(WorkerCompute(in_row, &out_row, job_list)); | |||
| @@ -244,7 +244,11 @@ Status MapOp::WorkerCompute(const TensorRow &in_row, TensorRow *out_row, | |||
| // Sanity check a row in result_table | |||
| if (!result_table.empty() && out_columns_.size() != result_table[0].size()) { | |||
| RETURN_STATUS_UNEXPECTED("Result of a tensorOp doesn't match output column names"); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid columns, the number of columns returned in 'map' operations should match " | |||
| "the number of 'output_columns', but got the number of columns returned in 'map' operations: " + | |||
| std::to_string(result_table[0].size()) + | |||
| ", the number of 'output_columns': " + std::to_string(out_columns_.size()) + "."); | |||
| } | |||
| // Merging the data processed by job (result_table) with the data that are not used. | |||
| @@ -299,7 +303,8 @@ Status MapOp::InitPrivateVariable(std::unordered_map<std::string, int32_t> *col_ | |||
| if (in_columns_.empty()) { | |||
| auto itr = | |||
| std::find_if(col_name_id_map->begin(), col_name_id_map->end(), [](const auto &it) { return it.second == 0; }); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr != col_name_id_map->end(), "Column name id map doesn't have id 0"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr != col_name_id_map->end(), | |||
| "[Internal ERROR] Column name id map doesn't have id 0"); | |||
| MS_LOG(INFO) << "Input columns empty for map op, will apply to the first column in the current table."; | |||
| in_columns_.push_back(itr->first); | |||
| @@ -74,7 +74,7 @@ TensorRow ProjectOp::Project(const TensorRow &row) { | |||
| // However, the ProjectOp is defined as a inlined operator, so it is invalid to launch the | |||
| // functor since this op runs inlined inside another operator. The function is overloaded to | |||
| // ensure that it is not called by mistake (it will generate an error). | |||
| Status ProjectOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. ProjectOp is an inlined operator."); } | |||
| Status ProjectOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ProjectOp is an inlined operator."); } | |||
| Status ProjectOp::EoeReceived(int32_t worker_id) { | |||
| state_ = OpState::kDeOpIdle; | |||
| @@ -92,7 +92,7 @@ Status ProjectOp::ComputeColMap() { | |||
| for (size_t i = 0; i < columns_to_project_.size(); i++) { | |||
| std::string ¤t_column = columns_to_project_[i]; | |||
| if (child_column_name_mapping.find(current_column) == child_column_name_mapping.end()) { | |||
| std::string err_msg = "Invalid parameter, column name: " + current_column + " does not exist in dataset."; | |||
| std::string err_msg = "Invalid column, column name: " + current_column + " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| // Setup the new column name mapping for ourself (base class field) | |||
| @@ -41,7 +41,7 @@ Status RenameOp::GetNextRow(TensorRow *row) { | |||
| return Status::OK(); | |||
| } | |||
| Status RenameOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. RenameOp is an inlined operator."); } | |||
| Status RenameOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] RenameOp is an inlined operator."); } | |||
| // Rename core functionality to compute the new column name id map. | |||
| // We need to overwrite the super class ComputeColMap here because we're making a modification of the | |||
| @@ -71,7 +71,7 @@ Status RenameOp::ComputeColMap() { | |||
| MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << "."; | |||
| if (new_col_name.find(out_columns_[index]) != new_col_name.end()) { | |||
| std::string err_msg( | |||
| "Invalid parameter, rename operation does not support rename one column name into another already exist " | |||
| "Invalid column, rename operation does not support rename one column name into another already exist " | |||
| "column name, existing column name is: " + | |||
| out_columns_[index] + "."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| @@ -82,7 +82,7 @@ Status RenameOp::ComputeColMap() { | |||
| // not found | |||
| if (new_col_name.find(name) != new_col_name.end()) { | |||
| std::string err_msg( | |||
| "Invalid parameter, rename operation does not support rename one column name into another already exist " | |||
| "Invalid column, rename operation does not support rename one column name into another already exist " | |||
| "column name, existing column name is: " + | |||
| name + "."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| @@ -95,7 +95,7 @@ Status RenameOp::ComputeColMap() { | |||
| // only checks number of renamed columns have been found, this input check doesn't check everything | |||
| if (found != in_columns_.size()) { | |||
| MS_LOG(DEBUG) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << "."; | |||
| std::string err_msg = "Invalid parameter, column to be renamed does not exist in dataset."; | |||
| std::string err_msg = "Invalid column, column to be renamed does not exist."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -60,7 +60,7 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const { | |||
| Status RepeatOp::GetNextRow(TensorRow *row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| if (child_.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Pipeline init failed, RepeatOp can't be the first op in pipeline."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Pipeline init failed, RepeatOp can't be the first op in pipeline."); | |||
| } | |||
| RETURN_IF_NOT_OK(child_[0]->GetNextRow(row)); | |||
| @@ -108,7 +108,7 @@ Status RepeatOp::EoeReceived(int32_t worker_id) { | |||
| // However, the RepeatOp is defined as a inlined operator, so it is invalid to launch the | |||
| // functor since this op runs inlined inside another operator. The function is overloaded to | |||
| // ensure that it is not called by mistake (it will generate an error). | |||
| Status RepeatOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. RepeatOp is an inlined operator."); } | |||
| Status RepeatOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] RepeatOp is an inlined operator."); } | |||
| // Base-class override for handling cases when an eof is received. | |||
| Status RepeatOp::EofReceived(int32_t worker_id) { | |||
| @@ -205,7 +205,8 @@ Status ShuffleOp::InitShuffleBuffer() { | |||
| // rows. | |||
| if (shuffle_buffer_state_ != kShuffleStateInit) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "Invalid shuffle buffer state, shuffle buffer should be init first or reset after each epoch."); | |||
| "[Internal ERROR] Invalid shuffle buffer state, shuffle buffer should be init first or reset " | |||
| "after each epoch."); | |||
| } | |||
| // Before we drop into the fetching loop, call the fetch once for the first time | |||
| @@ -220,7 +221,7 @@ Status ShuffleOp::InitShuffleBuffer() { | |||
| } | |||
| if (new_row.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, unable to fetch a single row for shuffle buffer."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Unable to fetch a single row for shuffle buffer."); | |||
| } | |||
| // Now fill the rest of the shuffle buffer until we are unable to get the next row or we reached | |||
| @@ -43,7 +43,7 @@ void SkipOp::Print(std::ostream &out, bool show_all) const { | |||
| } | |||
| } | |||
| Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } | |||
| Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] SkipOp is an inlined operator."); } | |||
| Status SkipOp::GetNextRow(TensorRow *row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| @@ -64,7 +64,7 @@ Status AlbumOp::PrepareData() { | |||
| dirname_offset_ = folder_path_.length(); | |||
| std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder); | |||
| if (!folder.Exists() || dirItr == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_ + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid folder, " + folder_path_ + " does not exist or permission denied."); | |||
| } | |||
| MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << "."; | |||
| @@ -94,7 +94,7 @@ Status AlbumOp::PrepareData() { | |||
| // This function does not return status because we want to just skip bad input, not crash | |||
| bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { | |||
| if (valid == nullptr) { | |||
| MS_LOG(ERROR) << "Album parameter can't be nullptr."; | |||
| MS_LOG(ERROR) << "[Internal ERROR] Album parameter can't be nullptr."; | |||
| return false; | |||
| } | |||
| std::ifstream file_handle; | |||
| @@ -214,8 +214,8 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " + | |||
| data_schema_->Column(col_num).Type().ToString()); | |||
| RETURN_STATUS_UNEXPECTED("Invalid column type, column type of " + data_schema_->Column(col_num).Name() + | |||
| " should be int32 or int64, but got " + data_schema_->Column(col_num).Type().ToString()); | |||
| } | |||
| row->push_back(std::move(label)); | |||
| return Status::OK(); | |||
| @@ -243,7 +243,8 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " + | |||
| RETURN_STATUS_UNEXPECTED("Invalid column type, column type of " + data_schema_->Column(col_num).Name() + | |||
| " should be float32 nor float64, but got " + | |||
| data_schema_->Column(col_num).Type().ToString()); | |||
| } | |||
| row->push_back(std::move(float_array)); | |||
| @@ -323,7 +324,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { | |||
| std::ifstream file_handle(folder_path_ + file); | |||
| if (!file_handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file); | |||
| RETURN_STATUS_UNEXPECTED("Invalid json file, " + folder_path_ + file + " does not exist or permission denied."); | |||
| } | |||
| std::string line; | |||
| while (getline(file_handle, line)) { | |||
| @@ -342,7 +343,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { | |||
| } | |||
| } catch (const std::exception &err) { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, " + folder_path_ + file + " load failed: " + std::string(err.what())); | |||
| } | |||
| } | |||
| file_handle.close(); | |||
| @@ -60,16 +60,16 @@ Status CelebAOp::ParseAttrFile() { | |||
| auto realpath = FileUtils::GetRealPath((folder_path / "list_attr_celeba.txt").ToString().data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << (folder_path / "list_attr_celeba.txt").ToString(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + | |||
| (folder_path / "list_attr_celeba.txt").ToString()); | |||
| MS_LOG(ERROR) << "Invalid file path, " << (folder_path / "list_attr_celeba.txt").ToString() << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + (folder_path / "list_attr_celeba.txt").ToString() + | |||
| " does not exist."); | |||
| } | |||
| std::ifstream attr_file(realpath.value()); | |||
| if (!attr_file.is_open()) { | |||
| std::string attr_file_name = (folder_path / "list_attr_celeba.txt").ToString(); | |||
| return Status(StatusCode::kMDFileNotExist, __LINE__, __FILE__, | |||
| "Invalid file, failed to open Celeba attr file: " + attr_file_name); | |||
| "Invalid attr file, failed to open: " + attr_file_name + ", permission denied."); | |||
| } | |||
| attr_file_ = (folder_path / "list_attr_celeba.txt").ToString(); | |||
| @@ -89,12 +89,11 @@ Status CelebAOp::ParseAttrFile() { | |||
| try { | |||
| num_rows_in_attr_file_ = static_cast<int64_t>(std::stoul(rows_num)); // First line is rows number in attr file | |||
| } catch (std::invalid_argument &e) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, failed to convert rows_num from attr_file to unsigned long, invalid value: " + rows_num + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid rows_num, failed to convert rows_num: " + rows_num + " to unsigned long in " + | |||
| attr_file_ + "."); | |||
| } catch (std::out_of_range &e) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, failed to convert rows_num from attr_file to unsigned long, value out of range: " + rows_num + | |||
| "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid rows_num, rows_num in " + attr_file_ + " is out of range, rows_num is " + | |||
| rows_num + "."); | |||
| } | |||
| (void)getline(attr_file, attr_name); // Second line is attribute name,ignore it | |||
| @@ -125,8 +124,8 @@ bool CelebAOp::CheckDatasetTypeValid() { | |||
| Path folder_path(folder_path_); | |||
| partition_file_.open((folder_path / "list_eval_partition.txt").ToString()); | |||
| if (!partition_file_.is_open()) { | |||
| MS_LOG(ERROR) << "Invalid file, fail to open CelebA partition file, path=" | |||
| << (folder_path / "list_eval_partition.txt").ToString(); | |||
| MS_LOG(ERROR) << "Invalid eval partition file, failed to open eval partition file: " | |||
| << (folder_path / "list_eval_partition.txt").ToString() << " does not exist or permission denied."; | |||
| return false; | |||
| } | |||
| } | |||
| @@ -140,10 +139,12 @@ bool CelebAOp::CheckDatasetTypeValid() { | |||
| try { | |||
| type = std::stoi(vec[1]); | |||
| } catch (std::invalid_argument &e) { | |||
| MS_LOG(WARNING) << "Invalid data, failed to convert to int, invalid value: " << vec[1] << "."; | |||
| MS_LOG(WARNING) << "Invalid number, the second word in list_eval_partition.txt should be numeric, but got: " | |||
| << vec[1] << "."; | |||
| return false; | |||
| } catch (std::out_of_range &e) { | |||
| MS_LOG(WARNING) << "Invalid data, failed to convert to int, value out of range: " << vec[1] << "."; | |||
| MS_LOG(WARNING) << "Invalid number, the second word in list_eval_partition.txt is out of range, word is: " << vec[1] | |||
| << "."; | |||
| return false; | |||
| } | |||
| // train:0, valid=1, test=2 | |||
| @@ -185,12 +186,11 @@ Status CelebAOp::PrepareData() { | |||
| try { | |||
| value = std::stoi(split[label_index]); | |||
| } catch (std::invalid_argument &e) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, failed to convert item from attr_file to int, corresponding value: " + | |||
| split[label_index] + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid label index, the label index in " + file_path.ToString() + | |||
| " should be numeric, but got: " + split[label_index] + "."); | |||
| } catch (std::out_of_range &e) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, failed to convert item from attr_file to int as out of range, corresponding value: " + | |||
| split[label_index] + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid label index, the label index in " + file_path.ToString() + | |||
| " is out of range, index is " + split[label_index] + "."); | |||
| } | |||
| image_labels.second.push_back(value); | |||
| } | |||
| @@ -242,7 +242,8 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { | |||
| Status rc = Decode(image, &image); | |||
| if (rc.IsError()) { | |||
| image = nullptr; | |||
| std::string err_msg = "Invalid data, failed to decode image: " + image_path.ToString(); | |||
| std::string err_msg = | |||
| "Invalid image, " + image_path.ToString() + " decode failed, the image is broken or permission denied."; | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| } | |||
| @@ -113,7 +113,7 @@ Status CifarOp::ReadCifar10BlockData() { | |||
| // check the validity of the file path | |||
| Path file_path(file); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid file, failed to find cifar10 file: " + file); | |||
| "Invalid cifar10 file, " + file + " does not exist or is a directory."); | |||
| std::string file_name = file_path.Basename(); | |||
| if (usage_ == "train") { | |||
| @@ -125,12 +125,12 @@ Status CifarOp::ReadCifar10BlockData() { | |||
| } | |||
| std::ifstream in(file, std::ios::binary); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file + | |||
| ", make sure file not damaged or permission denied."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| in.is_open(), "Invalid cifar10 file, failed to open " + file + ", the file is damaged or permission denied."); | |||
| for (uint32_t index = 0; index < num_cifar10_records / kCifarBlockImageNum; ++index) { | |||
| (void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar10 file: " + file + | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid cifar10 file, failed to read data from: " + file + | |||
| ", re-download dataset(make sure it is CIFAR-10 binary version)."); | |||
| (void)cifar_raw_data_block_->EmplaceBack(image_data); | |||
| // Add file path info | |||
| @@ -155,7 +155,7 @@ Status CifarOp::ReadCifar100BlockData() { | |||
| // check the validity of the file path | |||
| Path file_path(file); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid file, failed to find cifar100 file: " + file); | |||
| "Invalid cifar100 file, " + file + " does not exist or is a directory."); | |||
| std::string file_name = file_path.Basename(); | |||
| // if usage is train/test, get only these 2 files | |||
| @@ -167,16 +167,16 @@ Status CifarOp::ReadCifar100BlockData() { | |||
| } else if (file_name.find("train") != std::string::npos) { | |||
| num_cifar100_records = num_cifar100_train_records; | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, Cifar100 train/test file not found in: " + file_name); | |||
| RETURN_STATUS_UNEXPECTED("Invalid cifar100 file, Cifar100 train/test file is missing in: " + file_name); | |||
| } | |||
| std::ifstream in(file, std::ios::binary); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file + | |||
| ", make sure file not damaged or permission denied."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| in.is_open(), "Invalid cifar100 file, failed to open " + file + ", the file is damaged or permission denied."); | |||
| for (uint32_t index = 0; index < num_cifar100_records / kCifarBlockImageNum; index++) { | |||
| (void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar100 file: " + file + | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid cifar100 file, failed to read data from: " + file + | |||
| ", re-download dataset(make sure it is CIFAR-100 binary version)."); | |||
| (void)cifar_raw_data_block_->EmplaceBack(image_data); | |||
| // Add file path info | |||
| @@ -200,10 +200,10 @@ Status CifarOp::GetCifarFiles() { | |||
| } | |||
| } | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open directory: " + dir_path.ToString() + | |||
| ", make sure file not damaged or permission denied."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid directory, " + dir_path.ToString() + " is not a directory or permission denied."); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(), "Invalid file, no .bin files found under " + folder_path_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(), | |||
| "Invalid cifar folder, cifar(.bin) files are missing under " + folder_path_); | |||
| std::sort(cifar_files_.begin(), cifar_files_.end()); | |||
| return Status::OK(); | |||
| } | |||
| @@ -306,9 +306,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, | |||
| constexpr int64_t num_cifar10_records = 10000; | |||
| for (auto &file : op->cifar_files_) { | |||
| Path file_path(file); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid file, failed to open cifar10 file: " + file + ", make sure file not damaged or permission denied."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid cifar10 file, " + file + " does not exist or is a directory."); | |||
| std::string file_name = file_path.Basename(); | |||
| if (op->usage_ == "train") { | |||
| @@ -321,8 +320,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, | |||
| std::ifstream in(file, std::ios::binary); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file + | |||
| ", make sure file not damaged or permission denied."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| in.is_open(), "Invalid cifar10 file, failed to open " + file + ", the file is damaged or permission denied."); | |||
| *count = *count + num_cifar10_records; | |||
| } | |||
| return Status::OK(); | |||
| @@ -334,9 +333,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, | |||
| Path file_path(file); | |||
| std::string file_name = file_path.Basename(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid file, failed to find cifar100 file: " + file + ", make sure file not damaged or permission denied."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid cifar100 file, " + file + " does not exist or is a directory."); | |||
| if (op->usage_ == "train" && file_path.Basename().find("train") == std::string::npos) continue; | |||
| if (op->usage_ == "test" && file_path.Basename().find("test") == std::string::npos) continue; | |||
| @@ -347,8 +345,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, | |||
| num_cifar100_records += kCifar100RecordsPerTrainFile; | |||
| } | |||
| std::ifstream in(file, std::ios::binary); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file + | |||
| ", make sure file not damaged or permission denied."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| in.is_open(), "Invalid cifar100 file, failed to open " + file + ", the file is damaged or permission denied."); | |||
| } | |||
| *count = num_cifar100_records; | |||
| return Status::OK(); | |||
| @@ -56,7 +56,8 @@ Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| } else { | |||
| std::ifstream file_handle(data.second); | |||
| if (!file_handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + data.second); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + data.second + | |||
| ", the json is damaged or permission denied."); | |||
| } | |||
| std::string contents((std::istreambuf_iterator<char>(file_handle)), std::istreambuf_iterator<char>()); | |||
| nlohmann::json contents_js = nlohmann::json::parse(contents); | |||
| @@ -71,13 +72,15 @@ Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| if (decode_ == true) { | |||
| Status rc = Decode(image, &image); | |||
| if (rc.IsError()) { | |||
| std::string err = "Invalid data, failed to decode image: " + data.first; | |||
| std::string err = | |||
| "Invalid image, failed to decode " + data.first + ", the image is damaged or permission denied."; | |||
| RETURN_STATUS_UNEXPECTED(err); | |||
| } | |||
| if (task_ != taskSuffix) { | |||
| Status rc_t = Decode(task, &task); | |||
| if (rc_t.IsError()) { | |||
| std::string err_t = "Invalid data, failed to decode image: " + data.second; | |||
| std::string err_t = | |||
| "Invalid image, failed to decode " + data.second + ", the image is damaged or permission denied."; | |||
| RETURN_STATUS_UNEXPECTED(err_t); | |||
| } | |||
| } | |||
| @@ -106,8 +109,8 @@ void CityscapesOp::Print(std::ostream &out, bool show_all) const { | |||
| Status CityscapesOp::PrepareData() { | |||
| auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data()); | |||
| if (!real_dataset_dir.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_; | |||
| RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_); | |||
| MS_LOG(ERROR) << "Invalid file path, Cityscapes Dataset dir: " << dataset_dir_ << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, Cityscapes Dataset dir: " + dataset_dir_ + " does not exist."); | |||
| } | |||
| Path dataset_dir(real_dataset_dir.value()); | |||
| @@ -143,15 +146,18 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con | |||
| Path images_dir_p(images_dir); | |||
| if (!images_dir_p.IsDirectory()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, " + images_dir_p.ToString() + " is an invalid directory path."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, Cityscapes Dataset image dir: " + images_dir_p.ToString() + | |||
| " is not a directory path."); | |||
| } | |||
| Path task_dir_p(task_dir); | |||
| if (!task_dir_p.IsDirectory()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, " + task_dir_p.ToString() + " is an invalid directory path."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, Cityscapes Dataset task dir: " + task_dir_p.ToString() + | |||
| " is not a directory path."); | |||
| } | |||
| std::shared_ptr<Path::DirIterator> d_it = Path::DirIterator::OpenDirectory(&images_dir_p); | |||
| if (d_it == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + images_dir_p.ToString()); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to open Cityscapes Dataset image directory: " + | |||
| images_dir_p.ToString()); | |||
| } | |||
| while (d_it->HasNext()) { | |||
| @@ -165,7 +171,8 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con | |||
| Path task_city_dir = task_dir_p / city_dir.Basename(); | |||
| std::shared_ptr<Path::DirIterator> img_city_it = Path::DirIterator::OpenDirectory(&img_city_dir); | |||
| if (img_city_it == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + img_city_dir.ToString()); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to open Cityscapes Dataset image city directory: " + | |||
| img_city_dir.ToString()); | |||
| } | |||
| while (img_city_it->HasNext()) { | |||
| @@ -179,13 +186,15 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con | |||
| Path task_file_path = task_city_dir / (img_file_name.substr(0, img_file_name.find("_leftImg8bit")) + "_" + | |||
| GetTaskSuffix(task_, real_quality_mode)); | |||
| if (!task_file_path.Exists()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, " + task_file_path.ToString() + " not found."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, Cityscapes Dataset task file: " + task_file_path.ToString() + | |||
| " does not exist."); | |||
| } | |||
| image_task_map_[image_file_path.ToString()] = task_file_path.ToString(); | |||
| } | |||
| } catch (const std::exception &err) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset: " + dataset_dir_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset from " + dataset_dir_ + ": " + | |||
| std::string(err.what())); | |||
| } | |||
| } | |||
| @@ -213,7 +222,9 @@ Status CityscapesOp::CountDatasetInfo() { | |||
| num_rows_ = static_cast<int64_t>(image_task_pairs_.size()); | |||
| if (num_rows_ == 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, no valid data matching the dataset API CityscapesDataset. Please check file path or dataset API."); | |||
| "Invalid data, no valid data matching the dataset API 'CityscapesDataset'. Please check dataset API or file " | |||
| "path: " + | |||
| dataset_dir_ + "."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -55,7 +55,7 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector<std::string> key_c | |||
| if (cursor.find(key_chain[i]) != cursor.end()) { | |||
| cursor = cursor[key_chain[i]]; | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, in given JSON file, failed to find key: " + key_chain[i]); | |||
| RETURN_STATUS_UNEXPECTED("Invalid json file, in given JSON file, failed to find key: " + key_chain[i]); | |||
| } | |||
| } | |||
| std::string final_str = key_chain.back(); | |||
| @@ -84,13 +84,13 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector<std::string> key_c | |||
| Status ClueOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) { | |||
| auto realpath = FileUtils::GetRealPath(file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file); | |||
| std::string err_msg = "Invalid file path, " + file + " does not exist."; | |||
| LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||
| } | |||
| std::ifstream handle(realpath.value()); | |||
| if (!handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + file); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file + ", the file is damaged or permission denied."); | |||
| } | |||
| int64_t rows_total = 0; | |||
| @@ -115,7 +115,7 @@ Status ClueOp::LoadFile(const std::string &file, int64_t start_offset, int64_t e | |||
| js = nlohmann::json::parse(line); | |||
| } catch (const std::exception &err) { | |||
| // Catch any exception and convert to Status return code | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse JSON file: " + file); | |||
| RETURN_STATUS_UNEXPECTED("Invalid json, failed to parse " + file + ", " + std::string(err.what())); | |||
| } | |||
| int cols_count = cols_to_keyword_.size(); | |||
| TensorRow t_row(cols_count, nullptr); | |||
| @@ -219,7 +219,7 @@ Status ClueOp::CalculateNumRowsPerShard() { | |||
| } | |||
| std::string file_list = ss.str(); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, CLUEDataset API can't read the data file (interface mismatch or no data found). " | |||
| "Invalid data, 'CLUEDataset' API can't read the data file (interface mismatch or no data found). " | |||
| "Check file path:" + | |||
| file_list); | |||
| } | |||
| @@ -232,13 +232,13 @@ Status ClueOp::CalculateNumRowsPerShard() { | |||
| int64_t CountTotalRowsPerFile(const std::string &file) { | |||
| auto realpath = FileUtils::GetRealPath(file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed, path=" << file; | |||
| MS_LOG(ERROR) << "Invalid file, " << file << " does not exist."; | |||
| return 0; | |||
| } | |||
| std::ifstream handle(realpath.value()); | |||
| if (!handle.is_open()) { | |||
| MS_LOG(ERROR) << "Invalid file, failed to open file: " << file; | |||
| MS_LOG(ERROR) << "Invalid file, failed to open " << file << ": the file is damaged or permission denied."; | |||
| return 0; | |||
| } | |||
| @@ -78,8 +78,8 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| std::shared_ptr<Tensor> image, coordinate; | |||
| auto itr = coordinate_map_.find(image_id); | |||
| if (itr == coordinate_map_.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + | |||
| " in annotation node is not found in image node in JSON file."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id + | |||
| " is missing from image node in annotation file: " + annotation_path_); | |||
| } | |||
| std::string kImageFile = image_folder_path_ + std::string("/") + image_id; | |||
| @@ -115,7 +115,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| } else if (task_type_ == TaskType::Panoptic) { | |||
| RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff or Panoptic."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic."); | |||
| } | |||
| return Status::OK(); | |||
| @@ -128,8 +128,8 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima | |||
| std::vector<uint32_t> iscrowd_row; | |||
| auto itr_item = simple_item_map_.find(image_id); | |||
| if (itr_item == simple_item_map_.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + | |||
| " in annotation node is not found in image node in JSON file."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id + | |||
| " is missing in the node of image from annotation file: " + annotation_path_ + "."); | |||
| } | |||
| std::vector<uint32_t> annotation = itr_item->second; | |||
| @@ -153,7 +153,7 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima | |||
| std::string img_id; | |||
| size_t pos = image_id.find("."); | |||
| if (pos == std::string::npos) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\""); | |||
| RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\""); | |||
| } | |||
| std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id)); | |||
| std::shared_ptr<Tensor> filename; | |||
| @@ -171,8 +171,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_ | |||
| std::vector<uint32_t> item_queue; | |||
| auto itr_item = simple_item_map_.find(image_id); | |||
| if (itr_item == simple_item_map_.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + | |||
| " in annotation node is not found in image node in JSON file."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id + | |||
| " is missing in the node of 'image' from annotation file: " + annotation_path_); | |||
| } | |||
| item_queue = itr_item->second; | |||
| @@ -186,7 +186,7 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_ | |||
| std::string img_id; | |||
| size_t pos = image_id.find("."); | |||
| if (pos == std::string::npos) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\""); | |||
| RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\""); | |||
| } | |||
| std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id)); | |||
| std::shared_ptr<Tensor> filename; | |||
| @@ -206,8 +206,8 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, | |||
| std::vector<uint32_t> area_row; | |||
| auto itr_item = simple_item_map_.find(image_id); | |||
| if (itr_item == simple_item_map_.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + | |||
| " in annotation node is not found in image node in JSON file."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id + | |||
| " is missing in the node of 'image' from annotation file: " + annotation_path_); | |||
| } | |||
| std::vector<uint32_t> annotation = itr_item->second; | |||
| @@ -237,7 +237,7 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, | |||
| std::string img_id; | |||
| size_t pos = image_id.find("."); | |||
| if (pos == std::string::npos) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\""); | |||
| RETURN_STATUS_UNEXPECTED("Invalid image, " + image_id + " should be with suffix like \".jpg\""); | |||
| } | |||
| std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id)); | |||
| std::shared_ptr<Tensor> filename; | |||
| @@ -252,7 +252,9 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, | |||
| template <typename T> | |||
| Status CocoOp::SearchNodeInJson(const nlohmann::json &input_tree, std::string node_name, T *output_node) { | |||
| auto node = input_tree.find(node_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid data, required node not found in JSON: " + node_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid annotation, the attribute of '" + node_name + | |||
| "' is missing in annotation file: " + annotation_path_ + | |||
| "."); | |||
| (*output_node) = *node; | |||
| return Status::OK(); | |||
| } | |||
| @@ -262,17 +264,19 @@ Status CocoOp::PrepareData() { | |||
| try { | |||
| auto realpath = FileUtils::GetRealPath(annotation_path_.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << annotation_path_; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + annotation_path_); | |||
| std::string err_msg = "Invalid file path, Coco Dataset annotation file: " + annotation_path_ + " does not exist."; | |||
| LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||
| } | |||
| std::ifstream in(realpath.value()); | |||
| if (!in.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open annotation file: " + annotation_path_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation file, Coco Dataset annotation file: " + annotation_path_ + | |||
| " open failed, permission denied!"); | |||
| } | |||
| in >> js; | |||
| } catch (const std::exception &err) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open JSON file: " + annotation_path_ + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation file, Coco Dataset annotation file:" + annotation_path_ + | |||
| " load failed, error description: " + std::string(err.what())); | |||
| } | |||
| std::vector<std::string> image_que; | |||
| @@ -292,8 +296,8 @@ Status CocoOp::PrepareData() { | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonAnnoImageId), &image_id)); | |||
| auto itr_file = image_index_.find(image_id); | |||
| if (itr_file == image_index_.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + std::to_string(image_id) + | |||
| " in annotation node is not found in image node in JSON file."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + std::to_string(image_id) + | |||
| " is missing in the node of 'image' from annotation file: " + annotation_path_); | |||
| } | |||
| file_name = itr_file->second; | |||
| switch (task_type_) { | |||
| @@ -313,7 +317,7 @@ Status CocoOp::PrepareData() { | |||
| RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id)); | |||
| break; | |||
| default: | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic."); | |||
| } | |||
| } | |||
| for (auto img : image_que) { | |||
| @@ -322,7 +326,7 @@ Status CocoOp::PrepareData() { | |||
| num_rows_ = image_ids_.size(); | |||
| if (num_rows_ == 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, CocoDataset API can't read the data file (interface mismatch or no data found). " | |||
| "Invalid data, 'CocoDataset' API can't read the data file (interface mismatch or no data found). " | |||
| "Check file in directory: " + | |||
| image_folder_path_ + "."); | |||
| } | |||
| @@ -331,7 +335,8 @@ Status CocoOp::PrepareData() { | |||
| Status CocoOp::ImageColumnLoad(const nlohmann::json &image_tree, std::vector<std::string> *image_vec) { | |||
| if (image_tree.size() == 0) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, no \"image\" node found in JSON file: " + annotation_path_ + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'image' node is missing in annotation file: " + annotation_path_ + | |||
| "."); | |||
| } | |||
| for (auto img : image_tree) { | |||
| std::string file_name; | |||
| @@ -354,8 +359,8 @@ Status CocoOp::DetectionColumnLoad(const nlohmann::json &annotation_tree, const | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCategoryId), &category_id)); | |||
| auto search_category = category_set_.find(category_id); | |||
| if (search_category == category_set_.end()) | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id) + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'category_id': " + std::to_string(category_id) + | |||
| " is missing in the node of 'categories' from annotation file: " + annotation_path_); | |||
| auto node_iscrowd = annotation_tree.find(kJsonAnnoIscrowd); | |||
| if (node_iscrowd != annotation_tree.end()) iscrowd = *node_iscrowd; | |||
| bbox.insert(bbox.end(), node_bbox.begin(), node_bbox.end()); | |||
| @@ -392,13 +397,13 @@ Status CocoOp::KeypointColumnLoad(const nlohmann::json &annotation_tree, const s | |||
| const int32_t &unique_id) { | |||
| auto itr_num_keypoint = annotation_tree.find(kJsonAnnoNumKeypoints); | |||
| if (itr_num_keypoint == annotation_tree.end()) | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, no num_keypoint found in annotation file where image_id: " + std::to_string(unique_id) + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'num_keypoint' node is missing in annotation file: " + | |||
| annotation_path_ + " where 'image_id': " + std::to_string(unique_id) + "."); | |||
| simple_item_map_[image_file].push_back(*itr_num_keypoint); | |||
| auto itr_keypoint = annotation_tree.find(kJsonAnnoKeypoints); | |||
| if (itr_keypoint == annotation_tree.end()) | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, no keypoint found in annotation file where image_id: " + std::to_string(unique_id) + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'keypoint' node is missing in annotation file: " + | |||
| annotation_path_ + " where 'image_id': " + std::to_string(unique_id) + "."); | |||
| coordinate_map_[image_file].push_back(*itr_keypoint); | |||
| return Status::OK(); | |||
| } | |||
| @@ -407,31 +412,34 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s | |||
| const int32_t &image_id) { | |||
| auto itr_segments = annotation_tree.find(kJsonAnnoSegmentsInfo); | |||
| if (itr_segments == annotation_tree.end()) | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, no segments_info found in annotation file where image_id: " + std::to_string(image_id) + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'segments_info' node is missing in annotation file: " + | |||
| annotation_path_ + " where 'image_id': " + std::to_string(image_id) + "."); | |||
| for (auto info : *itr_segments) { | |||
| std::vector<float> bbox; | |||
| uint32_t category_id = 0; | |||
| auto itr_bbox = info.find(kJsonAnnoBbox); | |||
| if (itr_bbox == info.end()) | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, no bbox found in segments_info(in annotation file) where image_id: " + | |||
| std::to_string(image_id) + "."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid annotation, the 'bbox' attribute is missing in the node of 'segments_info' where 'image_id': " + | |||
| std::to_string(image_id) + " from annotation file: " + annotation_path_ + "."); | |||
| bbox.insert(bbox.end(), itr_bbox->begin(), itr_bbox->end()); | |||
| coordinate_map_[image_file].push_back(bbox); | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(info, std::string(kJsonAnnoCategoryId), &category_id)); | |||
| auto search_category = category_set_.find(category_id); | |||
| if (search_category == category_set_.end()) | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id) + "."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'category_id': " + std::to_string(category_id) + | |||
| " is missing in the node of 'categories' from " + annotation_path_ + "."); | |||
| auto itr_iscrowd = info.find(kJsonAnnoIscrowd); | |||
| if (itr_iscrowd == info.end()) | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, no iscrowd found in segments_info where image_id: " + std::to_string(image_id) + "."); | |||
| "Invalid annotation, the attribute of 'iscrowd' is missing in the node of 'segments_info' where 'image_id': " + | |||
| std::to_string(image_id) + " from annotation file: " + annotation_path_ + "."); | |||
| auto itr_area = info.find(kJsonAnnoArea); | |||
| if (itr_area == info.end()) | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, no area found in segments_info where image_id: " + std::to_string(image_id) + "."); | |||
| "Invalid annotation, the attribute of 'area' is missing in the node of 'segments_info' where 'image_id': " + | |||
| std::to_string(image_id) + " from annotation file: " + annotation_path_ + "."); | |||
| simple_item_map_[image_file].push_back(category_id); | |||
| simple_item_map_[image_file].push_back(*itr_iscrowd); | |||
| simple_item_map_[image_file].push_back(*itr_area); | |||
| @@ -441,7 +449,8 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s | |||
| Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) { | |||
| if (categories_tree.size() == 0) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, no categories found in annotation_path: " + annotation_path_); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid annotation, the 'categories' node is missing in annotation file: " + annotation_path_ + "."); | |||
| } | |||
| for (auto category : categories_tree) { | |||
| int32_t id = 0; | |||
| @@ -449,7 +458,9 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) { | |||
| std::vector<int32_t> label_info; | |||
| auto itr_id = category.find(kJsonId); | |||
| if (itr_id == category.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, no JSON id found in categories of " + annotation_path_); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid annotation, the attribute of 'id' is missing in the node of 'categories' from annotation file: " + | |||
| annotation_path_); | |||
| } | |||
| id = *itr_id; | |||
| label_info.push_back(id); | |||
| @@ -458,13 +469,16 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) { | |||
| auto itr_name = category.find(kJsonCategoriesName); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| itr_name != category.end(), | |||
| "Invalid data, no categories name found in categories where id: " + std::to_string(id)); | |||
| "Invalid annotation, the attribute of 'name' is missing in the node of 'categories' where 'id': " + | |||
| std::to_string(id)); | |||
| name = *itr_name; | |||
| if (task_type_ == TaskType::Panoptic) { | |||
| auto itr_isthing = category.find(kJsonCategoriesIsthing); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(itr_isthing != category.end(), | |||
| "Invalid data, nothing found in categories of " + annotation_path_); | |||
| "Invalid annotation, the attribute of 'isthing' is missing in the node of " | |||
| "'categories' from annotation file: " + | |||
| annotation_path_); | |||
| label_info.push_back(*itr_isthing); | |||
| } | |||
| label_index_.emplace_back(std::make_pair(name, label_info)); | |||
| @@ -477,7 +491,8 @@ Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &c | |||
| if (decode_ == true) { | |||
| Status rc = Decode(*tensor, tensor); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(rc.IsOk(), "Invalid data, failed to decode image: " + path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| rc.IsOk(), "Invalid image, failed to decode " + path + ": the image is broken or permission denied."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -505,8 +520,8 @@ Status CocoOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<i | |||
| RETURN_UNEXPECTED_IF_NULL(output_class_indexing); | |||
| if ((*output_class_indexing).empty()) { | |||
| if ((task_type_ != TaskType::Detection) && (task_type_ != TaskType::Panoptic)) { | |||
| MS_LOG(ERROR) << "Invalid parameter, GetClassIndex only valid in \"Detection\" and \"Panoptic\" task."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, GetClassIndex only valid in \"Detection\" and \"Panoptic\" task."); | |||
| MS_LOG(ERROR) << "Invalid task, only 'Detection' and 'Panoptic' task support GetClassIndex."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid task, only 'Detection' and 'Panoptic' task support GetClassIndex."); | |||
| } | |||
| RETURN_IF_NOT_OK(PrepareData()); | |||
| for (const auto &label : label_index_) { | |||
| @@ -111,7 +111,11 @@ int CsvOp::CsvParser::PutRecord(int c) { | |||
| std::string s = std::string(str_buf_.begin(), str_buf_.begin() + pos_); | |||
| std::shared_ptr<Tensor> t; | |||
| if (cur_col_ >= column_default_.size()) { | |||
| err_message_ = "Number of file columns does not match the default records"; | |||
| std::stringstream ss; | |||
| ss << "Invalid columns, the size of column_names should be less than the size of 'column_defaults', " | |||
| << "but got the size of column_names: " << cur_col_ | |||
| << ", the size of column_defaults : " << column_default_.size() << "."; | |||
| err_message_ = ss.str(); | |||
| return -1; | |||
| } | |||
| Status rc; | |||
| @@ -139,7 +143,11 @@ int CsvOp::CsvParser::PutRecord(int c) { | |||
| break; | |||
| } | |||
| if (cur_col_ >= cur_row_.size()) { | |||
| err_message_ = "Number of file columns does not match the tensor table"; | |||
| std::stringstream ss; | |||
| ss << "Invalid columns, the size of column_names should be greater than or equal to the size of columns of " | |||
| << "loading data, but got the size of column_names: " << cur_col_ | |||
| << ", the size of columns in original loaded dataset: " << column_default_.size() << "."; | |||
| err_message_ = ss.str(); | |||
| return -1; | |||
| } | |||
| cur_row_[cur_col_] = std::move(t); | |||
| @@ -166,7 +174,11 @@ int CsvOp::CsvParser::PutRow(int c) { | |||
| } | |||
| if (cur_col_ != column_default_.size()) { | |||
| err_message_ = "The number of columns does not match the definition."; | |||
| std::stringstream ss; | |||
| ss << "Invalid columns, the size of column_names should be less than the size of 'column_defaults', " | |||
| << "but got the size of column_names: " << cur_col_ | |||
| << ", the size of 'column_defaults': " << column_default_.size() << "."; | |||
| err_message_ = ss.str(); | |||
| return -1; | |||
| } | |||
| @@ -201,11 +213,11 @@ int CsvOp::CsvParser::EndFile(int c) { | |||
| int CsvOp::CsvParser::CatchException(int c) { | |||
| if (GetMessage(c) == Message::MS_QUOTE && cur_state_ == State::UNQUOTE) { | |||
| err_message_ = "Invalid quote in unquote field."; | |||
| err_message_ = "Invalid csv file, unexpected quote in unquote field from " + file_path_ + "."; | |||
| } else if (GetMessage(c) == Message::MS_END_OF_FILE && cur_state_ == State::QUOTE) { | |||
| err_message_ = "Reach the end of file in quote field."; | |||
| err_message_ = "Invalid csv file, reach the end of file in quote field, check " + file_path_ + "."; | |||
| } else if (GetMessage(c) == Message::MS_NORMAL && cur_state_ == State::SECOND_QUOTE) { | |||
| err_message_ = "Receive unquote char in quote field."; | |||
| err_message_ = "Invalid csv file, receive unquote char in quote field, check " + file_path_ + "."; | |||
| } | |||
| return -1; | |||
| } | |||
| @@ -459,14 +471,14 @@ Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t en | |||
| auto realpath = FileUtils::GetRealPath(file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << file; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, " + DatasetName() + " file get real path failed, path=" + file); | |||
| MS_LOG(ERROR) << "Invalid file path, " << file << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + file + " does not exist."); | |||
| } | |||
| std::ifstream ifs; | |||
| ifs.open(realpath.value(), std::ifstream::in); | |||
| if (!ifs.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + " file: " + file); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file + ", the file is damaged or permission denied."); | |||
| } | |||
| if (column_name_list_.empty()) { | |||
| std::string tmp; | |||
| @@ -483,17 +495,18 @@ Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t en | |||
| if (err != 0) { | |||
| // if error code is -2, the returned error is interrupted | |||
| if (err == -2) return Status(kMDInterrupted); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse file: " + file + ": line " + | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse csv file: " + file + " at line " + | |||
| std::to_string(csv_parser.GetTotalRows() + 1) + | |||
| ". Error message: " + csv_parser.GetErrorMessage()); | |||
| } | |||
| } | |||
| } catch (std::invalid_argument &ia) { | |||
| std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ": line " + err_row + ", type does not match."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid csv, csv file: " + file + " parse failed at line " + err_row + | |||
| ", type does not match."); | |||
| } catch (std::out_of_range &oor) { | |||
| std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ": line " + err_row + ", value out of range."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid csv, " + file + " parse failed at line " + err_row + " : value out of range."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -594,13 +607,14 @@ int64_t CsvOp::CountTotalRows(const std::string &file) { | |||
| CsvParser csv_parser(0, jagged_rows_connector_.get(), field_delim_, column_default_list_, file); | |||
| Status rc = csv_parser.InitCsvParser(); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "[Internal ERROR], failed to initialize " + DatasetName(true) + " Parser. Error:" << rc; | |||
| MS_LOG(ERROR) << "[Internal ERROR], failed to initialize " + DatasetName(true) + " Parser. Error description:" | |||
| << rc; | |||
| return 0; | |||
| } | |||
| auto realpath = FileUtils::GetRealPath(file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << file; | |||
| MS_LOG(ERROR) << "Invalid file path, csv file: " << file << " does not exist."; | |||
| return 0; | |||
| } | |||
| @@ -673,8 +687,8 @@ Status CsvOp::ComputeColMap() { | |||
| /* Process exception if ERROR in column name solving*/ | |||
| if (!rc.IsOk()) { | |||
| MS_LOG(ERROR) << "Invalid file, fail to analyse column name map, path=" + csv_file; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, fail to analyse column name map, path=" + csv_file); | |||
| MS_LOG(ERROR) << "Invalid file, failed to get column name list from csv file: " + csv_file; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to get column name list from csv file: " + csv_file); | |||
| } | |||
| } | |||
| } else { | |||
| @@ -689,9 +703,10 @@ Status CsvOp::ComputeColMap() { | |||
| if (column_default_list_.size() != column_name_id_map_.size()) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid parameter, the number of column names does not match the default column, size of default column_list: " + | |||
| "Invalid parameter, the size of column_names should be equal to the size of 'column_defaults', but got " | |||
| " size of 'column_defaults': " + | |||
| std::to_string(column_default_list_.size()) + | |||
| ", size of column_name: " + std::to_string(column_name_id_map_.size())); | |||
| ", size of column_names: " + std::to_string(column_name_id_map_.size())); | |||
| } | |||
| return Status::OK(); | |||
| @@ -703,7 +718,7 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) { | |||
| if (!check_flag_) { | |||
| auto realpath = FileUtils::GetRealPath(csv_file_name.data()); | |||
| if (!realpath.has_value()) { | |||
| std::string err_msg = "Invalid file, " + DatasetName() + " file get real path failed, path=" + csv_file_name; | |||
| std::string err_msg = "Invalid file path, csv file: " + csv_file_name + " does not exist."; | |||
| MS_LOG(ERROR) << err_msg; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -721,11 +736,9 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) { | |||
| if (column_name_id_map_.find(col_names[i]) == column_name_id_map_.end()) { | |||
| column_name_id_map_[col_names[i]] = i; | |||
| } else { | |||
| MS_LOG(ERROR) << "Invalid parameter, duplicate column names are not allowed: " + col_names[i] + | |||
| ", The corresponding data files: " + csv_file_name; | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " + col_names[i] + | |||
| ", The corresponding data files: " + csv_file_name); | |||
| MS_LOG(ERROR) << "Invalid parameter, duplicate column " << col_names[i] << " for csv file: " << csv_file_name; | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column " + col_names[i] + | |||
| " for csv file: " + csv_file_name); | |||
| } | |||
| } | |||
| check_flag_ = true; | |||
| @@ -736,11 +749,10 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) { | |||
| if (column_name_id_map_.find(column_name_list_[i]) == column_name_id_map_.end()) { | |||
| column_name_id_map_[column_name_list_[i]] = i; | |||
| } else { | |||
| MS_LOG(ERROR) << "Invalid parameter, duplicate column names are not allowed: " + column_name_list_[i] + | |||
| ", The corresponding data files: " + csv_file_name; | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " + | |||
| column_name_list_[i] + ", The corresponding data files: " + csv_file_name); | |||
| MS_LOG(ERROR) << "Invalid parameter, duplicate column " << column_name_list_[i] | |||
| << " for csv file: " << csv_file_name << "."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column " + column_name_list_[i] + | |||
| " for csv file: " + csv_file_name + "."); | |||
| } | |||
| } | |||
| check_flag_ = true; | |||
| @@ -764,7 +776,7 @@ bool CsvOp::ColumnNameValidate() { | |||
| for (auto &csv_file : csv_files_list_) { | |||
| auto realpath = FileUtils::GetRealPath(csv_file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << csv_file; | |||
| MS_LOG(ERROR) << "Invalid file path, csv file: " << csv_file << " does not exist."; | |||
| return false; | |||
| } | |||
| @@ -781,9 +793,8 @@ bool CsvOp::ColumnNameValidate() { | |||
| match_file = csv_file; | |||
| } else { // Case the other files | |||
| if (col_names != record) { | |||
| MS_LOG(ERROR) | |||
| << "Invalid parameter, every corresponding column name must be identical, either element or permutation. " | |||
| << "Invalid files are: " + match_file + " and " + csv_file; | |||
| MS_LOG(ERROR) << "Invalid parameter, every column name should be equal the record from csv, but got column: " | |||
| << col_names << ", csv record: " << record << ". Check " + match_file + " and " + csv_file + "."; | |||
| return false; | |||
| } | |||
| } | |||
| @@ -76,13 +76,15 @@ Status DIV2KOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| if (decode_ == true) { | |||
| Status hr_rc = Decode(hr_image, &hr_image); | |||
| if (hr_rc.IsError()) { | |||
| std::string err = "Invalid data, failed to decode image: " + data.first; | |||
| std::string err = | |||
| "Invalid image, failed to decode " + data.first + ", the image is damaged or permission denied."; | |||
| RETURN_STATUS_UNEXPECTED(err); | |||
| } | |||
| Status lr_rc = Decode(lr_image, &lr_image); | |||
| if (lr_rc.IsError()) { | |||
| std::string err = "Invalid data, failed to decode image: " + data.second; | |||
| std::string err = | |||
| "Invalid image, failed to decode " + data.second + ", the image is damaged or permission denied."; | |||
| RETURN_STATUS_UNEXPECTED(err); | |||
| } | |||
| } | |||
| @@ -141,7 +143,7 @@ Status DIV2KOp::GetDIV2KLRDirRealName(const std::string &hr_dir_key, const std:: | |||
| out_str += ("\t" + item.first + ": " + item.second + ",\n"); | |||
| }); | |||
| out_str += "\n}"; | |||
| RETURN_STATUS_UNEXPECTED("Invalid param, " + lr_dir_key + " not found in DatasetPramMap: \n" + out_str); | |||
| RETURN_STATUS_UNEXPECTED("Invalid param, dir: " + lr_dir_key + " not found under div2k dataset dir, " + out_str); | |||
| } | |||
| if (downgrade_2017.find(downgrade_) != downgrade_2017.end() && scale_2017.find(scale_) != scale_2017.end()) { | |||
| @@ -158,8 +160,8 @@ Status DIV2KOp::GetDIV2KDataByUsage() { | |||
| auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data()); | |||
| if (!real_dataset_dir.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_; | |||
| RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_); | |||
| MS_LOG(ERROR) << "Invalid file path, div2k dataset dir: " << dataset_dir_ << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, div2k dataset dir: " + dataset_dir_ + " does not exist."); | |||
| } | |||
| Path dataset_dir(real_dataset_dir.value()); | |||
| @@ -167,14 +169,15 @@ Status DIV2KOp::GetDIV2KDataByUsage() { | |||
| Path lr_images_dir = dataset_dir / lr_dir_real_name_; | |||
| if (!hr_images_dir.IsDirectory()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, " + hr_images_dir.ToString() + " is an invalid directory path."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, div2k hr image dir: " + hr_images_dir.ToString() + " is not a directory."); | |||
| } | |||
| if (!lr_images_dir.IsDirectory()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, " + lr_images_dir.ToString() + " is an invalid directory path."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, div2k lr image dir: " + lr_images_dir.ToString() + " is not a directory."); | |||
| } | |||
| auto hr_it = Path::DirIterator::OpenDirectory(&hr_images_dir); | |||
| if (hr_it == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + hr_images_dir.ToString()); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to open div2k hr image dir: " + hr_images_dir.ToString() + | |||
| ", permission denied."); | |||
| } | |||
| std::string image_name; | |||
| @@ -202,12 +205,14 @@ Status DIV2KOp::GetDIV2KDataByUsage() { | |||
| Path lr_image_file_path(lr_image_file_path_); | |||
| if (!lr_image_file_path.Exists()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, " + lr_image_file_path.ToString() + " not found."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, div2k image file: " + lr_image_file_path.ToString() + | |||
| " does not exist."); | |||
| } | |||
| image_hr_lr_map_[hr_image_file_path.ToString()] = lr_image_file_path.ToString(); | |||
| } catch (const std::exception &err) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to load DIV2K Dataset: " + dataset_dir_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to load DIV2K Dataset from " + dataset_dir_ + ": " + | |||
| std::string(err.what())); | |||
| } | |||
| } | |||
| for (auto item : image_hr_lr_map_) { | |||
| @@ -220,7 +225,8 @@ Status DIV2KOp::CountDatasetInfo() { | |||
| num_rows_ = static_cast<int64_t>(image_hr_lr_pairs_.size()); | |||
| if (num_rows_ == 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, no valid data matching the dataset API DIV2KDataset. Please check file path or dataset API."); | |||
| "Invalid data, no valid data matching the dataset API 'DIV2KDataset'. Please check dataset API or file path: " + | |||
| dataset_dir_ + "."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -58,11 +58,12 @@ Status EMnistOp::WalkAllFiles() { | |||
| const std::string train_prefix = "-train"; | |||
| const std::string test_prefix = "-test"; | |||
| auto realpath = FileUtils::GetRealPath(folder_path_.data()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed: " + folder_path_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Invalid file path, " + folder_path_ + " does not exist."); | |||
| Path dir(realpath.value()); | |||
| auto dir_it = Path::DirIterator::OpenDirectory(&dir); | |||
| if (dir_it == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + dir.ToString()); | |||
| RETURN_STATUS_UNEXPECTED("Invalid path, failed to open emnist dataset dir: " + dir.ToString() + | |||
| ", the directory is not a directory or permission denied."); | |||
| } | |||
| std::string prefix; | |||
| prefix = "emnist-" + name_; // used to match usage == "all". | |||
| @@ -88,7 +89,9 @@ Status EMnistOp::WalkAllFiles() { | |||
| std::sort(image_names_.begin(), image_names_.end()); | |||
| std::sort(label_names_.begin(), label_names_.end()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(), | |||
| "Invalid data, num of images is not equal to num of labels."); | |||
| "Invalid data, num of image files should be equal to num of label files under " + | |||
| realpath.value() + ", but got num of images: " + std::to_string(image_names_.size()) + | |||
| ", num of labels: " + std::to_string(label_names_.size()) + "."); | |||
| return Status::OK(); | |||
| } | |||
| @@ -118,12 +121,12 @@ Status EMnistOp::CountTotalRows(const std::string &dir, const std::string &name, | |||
| for (size_t i = 0; i < op->image_names_.size(); ++i) { | |||
| std::ifstream image_reader; | |||
| image_reader.open(op->image_names_[i], std::ios::binary); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), | |||
| "Invalid file, failed to open image file: " + op->image_names_[i]); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), "Invalid file, failed to open " + op->image_names_[i] + | |||
| ": the image file is damaged or permission denied."); | |||
| std::ifstream label_reader; | |||
| label_reader.open(op->label_names_[i], std::ios::binary); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), | |||
| "Invalid file, failed to open label file: " + op->label_names_[i]); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), "Invalid file, failed to open " + op->label_names_[i] + | |||
| ": the label file is damaged or permission denied."); | |||
| uint32_t num_images; | |||
| Status s = op->CheckImage(op->image_names_[i], &image_reader, &num_images); | |||
| image_reader.close(); | |||
| @@ -134,8 +137,10 @@ Status EMnistOp::CountTotalRows(const std::string &dir, const std::string &name, | |||
| label_reader.close(); | |||
| RETURN_IF_NOT_OK(s); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), | |||
| "Invalid data, num of images is not equal to num of labels."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| (num_images == num_labels), | |||
| "Invalid data, num of images should be equal to num of labels, but got num of images: " + | |||
| std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + "."); | |||
| *count = *count + num_images; | |||
| } | |||
| @@ -90,7 +90,8 @@ void FakeImageOp::Print(std::ostream &out, bool show_all) const { | |||
| Status FakeImageOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | |||
| if (cls_ids == nullptr || !cls_ids->empty() || label_list_.empty()) { | |||
| if (label_list_.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("No image found in dataset. Check if image was generated successfully."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "[Internal ERROR] No image found in dataset. Check if image was generated successfully."); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "[Internal ERROR] Map for storing image-index pair is nullptr or has been set in other place, " | |||
| @@ -126,7 +127,7 @@ Status FakeImageOp::PrepareData() { | |||
| label_list_.shrink_to_fit(); | |||
| num_rows_ = label_list_.size(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "Generate image failed, please check dataset API."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "Invalid data, generate fake data failed, please check dataset API."); | |||
| image_tensor_.clear(); | |||
| image_tensor_.resize(num_rows_); | |||
| return Status::OK(); | |||
| @@ -56,12 +56,12 @@ Status FashionMnistOp::CountTotalRows(const std::string &dir, const std::string | |||
| for (size_t i = 0; i < op->image_names_.size(); ++i) { | |||
| std::ifstream image_reader; | |||
| image_reader.open(op->image_names_[i], std::ios::binary); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), | |||
| "Invalid file, failed to open image file: " + op->image_names_[i]); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), "Invalid file, failed to open " + op->image_names_[i] + | |||
| ": the image file is damaged or permission denied."); | |||
| std::ifstream label_reader; | |||
| label_reader.open(op->label_names_[i], std::ios::binary); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), | |||
| "Invalid file, failed to open label file: " + op->label_names_[i]); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), "Invalid file, failed to open " + op->label_names_[i] + | |||
| ": the label file is damaged or permission denied."); | |||
| uint32_t num_images; | |||
| Status s = op->CheckImage(op->image_names_[i], &image_reader, &num_images); | |||
| image_reader.close(); | |||
| @@ -72,8 +72,10 @@ Status FashionMnistOp::CountTotalRows(const std::string &dir, const std::string | |||
| label_reader.close(); | |||
| RETURN_IF_NOT_OK(s); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), | |||
| "Invalid data, num of images is not equal to num of labels."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| (num_images == num_labels), | |||
| "Invalid data, num of images should be equal to num of labels, but got num of images: " + | |||
| std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + "."); | |||
| *count = *count + num_images; | |||
| } | |||
| @@ -48,7 +48,8 @@ Status FlickrOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| if (decode_ == true) { | |||
| Status rc = Decode(image, &image); | |||
| if (rc.IsError()) { | |||
| std::string err = "Invalid data, failed to decode image: " + data.first; | |||
| std::string err = | |||
| "Invalid image, failed to decode " + data.first + ": the image is damaged or permission denied!"; | |||
| RETURN_STATUS_UNEXPECTED(err); | |||
| } | |||
| } | |||
| @@ -76,13 +77,14 @@ void FlickrOp::Print(std::ostream &out, bool show_all) const { | |||
| Status FlickrOp::PrepareData() { | |||
| auto real_file_path = FileUtils::GetRealPath(file_path_.data()); | |||
| if (!real_file_path.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_path_; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_path_); | |||
| MS_LOG(ERROR) << "Invalid file path, " << file_path_ << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_path_ + " does not exist."); | |||
| } | |||
| std::ifstream file_handle(real_file_path.value()); | |||
| if (!file_handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid annotation file, failed to open " + file_path_ + | |||
| " : the file is damaged or permission denied."); | |||
| } | |||
| std::string line; | |||
| @@ -102,16 +104,16 @@ Status FlickrOp::PrepareData() { | |||
| image_name = line.substr(0, flag_idx - 2); // -2 because "#[0-4]\t" | |||
| if (image_name.empty()) { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image_name is not found in Flickr annotation file: " + file_path_ + | |||
| "; line: " + line); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, the attribute of image_name is missing in flickr dataset file: " + | |||
| file_path_ + ", line: " + line); | |||
| } | |||
| image_file_path = (dataset_dir / image_name).ToString(); | |||
| std::string annotation = line.substr(flag_idx + 1); | |||
| if (annotation.empty()) { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, annotation is not found in Flickr annotation file: " + file_path_ + | |||
| "; line: " + line); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, the attribute of annotation is missing in flickr dataset file: " + | |||
| file_path_ + ", line: " + line); | |||
| } | |||
| bool valid = false; | |||
| @@ -127,7 +129,8 @@ Status FlickrOp::PrepareData() { | |||
| image_annotation_map_[image_file_path].emplace_back(annotation); | |||
| } catch (const std::exception &err) { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse flickr dataset file: " + file_path_ + ": " + | |||
| std::string(err.what())); | |||
| } | |||
| } | |||
| @@ -146,8 +149,8 @@ Status FlickrOp::PrepareData() { | |||
| Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) { | |||
| auto real_file_name = FileUtils::GetRealPath(file_name.data()); | |||
| if (!real_file_name.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name); | |||
| MS_LOG(ERROR) << "Invalid file path, flickr dataset file: " << file_name << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, flickr dataset file: " + file_name + " does not exist."); | |||
| } | |||
| std::ifstream file_handle; | |||
| @@ -155,14 +158,16 @@ Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) { | |||
| *valid = false; | |||
| file_handle.open(real_file_name.value(), std::ios::binary | std::ios::in); | |||
| if (!file_handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name); | |||
| RETURN_STATUS_UNEXPECTED("Invalid flickr file, failed to open " + file_name + | |||
| ": the file is damaged or permission denied."); | |||
| } | |||
| unsigned char file_type[read_num]; | |||
| (void)file_handle.read(reinterpret_cast<char *>(file_type), read_num); | |||
| if (file_handle.fail()) { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name); | |||
| RETURN_STATUS_UNEXPECTED("Invalid flickr file, failed to read " + file_name + | |||
| ": the file is damaged or the file content is incomplete."); | |||
| } | |||
| file_handle.close(); | |||
| if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { | |||
| @@ -64,7 +64,7 @@ Status GeneratorOp::CreateGeneratorObject() { | |||
| // Acquire Python GIL | |||
| py::gil_scoped_acquire gil_acquire; | |||
| if (Py_IsInitialized() == 0) { | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized."); | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized."); | |||
| } | |||
| try { | |||
| py::array sample_ids; | |||
| @@ -93,15 +93,17 @@ Status GeneratorOp::Init() { | |||
| Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row) { | |||
| if (!py::isinstance<py::tuple>(py_data)) { | |||
| return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__, | |||
| "Invalid data, Generator should return a tuple of NumPy arrays, currently returned is not a tuple."); | |||
| "Invalid python function, the 'source' of 'GeneratorDataset' should return a tuple of NumPy arrays, " | |||
| "but got " + | |||
| std::string(py_data.get_type().str())); | |||
| } | |||
| py::tuple py_row = py_data.cast<py::tuple>(); | |||
| // Check if returned number of columns matches with column names | |||
| if (py_row.size() != column_names_.size()) { | |||
| return Status( | |||
| StatusCode::kMDPyFuncException, __LINE__, __FILE__, | |||
| "Invalid data, Generator should return same number of NumPy arrays as specified in column_names, the size of" | |||
| " column_names is:" + | |||
| "Invalid python function, the 'source' of 'GeneratorDataset' should return same number of NumPy arrays as " | |||
| "specified in column_names, the size of column_names is:" + | |||
| std::to_string(column_names_.size()) + | |||
| " and number of returned NumPy array is:" + std::to_string(py_row.size())); | |||
| } | |||
| @@ -110,15 +112,18 @@ Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row) | |||
| py::object ret_py_ele = py_row[i]; | |||
| if (!py::isinstance<py::array>(ret_py_ele)) { | |||
| return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__, | |||
| "Invalid data, Generator should return a tuple of NumPy arrays. Ensure each item in tuple that " | |||
| "returned by source function of GeneratorDataset be NumPy array."); | |||
| "Invalid python function, 'GeneratorDataset' should return a tuple of NumPy arrays, but got " + | |||
| std::string(ret_py_ele.get_type().str())); | |||
| } | |||
| std::shared_ptr<Tensor> tensor; | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast<py::array>(), &tensor)); | |||
| if ((!column_types_.empty()) && (column_types_[i] != DataType::DE_UNKNOWN) && | |||
| (column_types_[i] != tensor->type())) { | |||
| return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__, | |||
| "Invalid data, type of returned data in GeneratorDataset is not same with specified column_types."); | |||
| "Invalid python function, type of returned data in 'GeneratorDataset' should be same with " | |||
| "specified column_types, but the type of returned data: " + | |||
| std::string(ret_py_ele.get_type().str()) + | |||
| ", specified column type: " + column_types_[i].ToString()); | |||
| } | |||
| tensor_row->push_back(tensor); | |||
| } | |||
| @@ -173,7 +178,7 @@ Status GeneratorOp::operator()() { | |||
| { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| if (Py_IsInitialized() == 0) { | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); | |||
| } | |||
| try { | |||
| #ifndef ENABLE_SECURITY | |||
| @@ -93,7 +93,8 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| if (decode_ == true) { | |||
| Status rc = Decode(image, &image); | |||
| if (rc.IsError()) { | |||
| std::string err = "Invalid data, failed to decode image: " + folder_path_ + (pair_ptr->first); | |||
| std::string err = "Invalid image, " + folder_path_ + (pair_ptr->first) + | |||
| " decode failed, the image is broken or permission denied."; | |||
| RETURN_STATUS_UNEXPECTED(err); | |||
| } | |||
| } | |||
| @@ -121,7 +122,7 @@ void ImageFolderOp::Print(std::ostream &out, bool show_all) const { | |||
| Status ImageFolderOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | |||
| if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { | |||
| if (image_label_pairs_.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, " + DatasetName(true) + | |||
| RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + DatasetName(true) + | |||
| "Dataset API can't read the data file(interface mismatch or no data found). Check " + | |||
| DatasetName() + " file path: " + folder_path_); | |||
| } else { | |||
| @@ -156,7 +157,7 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) { | |||
| Path folder(folder_path_ + folder_name); | |||
| std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder); | |||
| if (folder.Exists() == false || dirItr == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + folder_name); | |||
| RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + folder_name + " does not exist or permission denied."); | |||
| } | |||
| std::set<std::string> imgs; // use this for ordering | |||
| while (dirItr->HasNext()) { | |||
| @@ -193,8 +194,8 @@ Status ImageFolderOp::RecursiveWalkFolder(Path *dir) { | |||
| RETURN_IF_NOT_OK(folder_name_queue_->EmplaceBack(subdir.ToString().substr(dirname_offset_))); | |||
| } | |||
| if (recursive_ == true) { | |||
| MS_LOG(ERROR) << "RecursiveWalkFolder(&subdir) functionality is disabled permanently. No recursive walk of " | |||
| << "directory will be performed."; | |||
| MS_LOG(ERROR) << "[Internal ERROR] RecursiveWalkFolder(&subdir) functionality is disabled permanently. " | |||
| << "No recursive walk of directory will be performed."; | |||
| } | |||
| } | |||
| } | |||
| @@ -206,7 +207,7 @@ Status ImageFolderOp::StartAsyncWalk() { | |||
| TaskManager::FindMe()->Post(); | |||
| Path dir(folder_path_); | |||
| if (dir.Exists() == false || dir.IsDirectory() == false) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + folder_path_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + folder_path_ + " may not exist or the path is not a directory."); | |||
| } | |||
| dirname_offset_ = folder_path_.length(); | |||
| RETURN_IF_NOT_OK(RecursiveWalkFolder(&dir)); | |||
| @@ -242,10 +243,9 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se | |||
| std::string err_msg = ""; | |||
| int64_t row_cnt = 0; | |||
| err_msg += (dir.Exists() == false || dir.IsDirectory() == false) | |||
| ? "Invalid parameter, input path is invalid or not set, path: " + path | |||
| ? "Invalid dataset_dir, " + path + " does not exist or the path is not a directory. " | |||
| : ""; | |||
| err_msg += | |||
| (num_classes == nullptr && num_rows == nullptr) ? "Invalid parameter, num_class and num_rows are null.\n" : ""; | |||
| err_msg += (num_classes == nullptr && num_rows == nullptr) ? "[Internal ERROR] num_class and num_rows are null." : ""; | |||
| if (err_msg.empty() == false) { | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -266,7 +266,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se | |||
| } else { | |||
| for (const auto &p : class_index) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(folder_names.find(p.first) != folder_names.end(), | |||
| "Invalid parameter, folder: " + p.first + " doesn't exist in " + path + " ."); | |||
| "Invalid subdirectory, class: " + p.first + " doesn't exist in " + path + " ."); | |||
| } | |||
| (*num_classes) = class_index.size(); | |||
| } | |||
| @@ -277,7 +277,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se | |||
| Path subdir(folder_paths.front()); | |||
| dir_itr = Path::DirIterator::OpenDirectory(&subdir); | |||
| if (subdir.Exists() == false || dir_itr == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.ToString()); | |||
| RETURN_STATUS_UNEXPECTED("Invalid subdirectory, ImageFolder Dataset subdirectory: " + subdir.ToString() + | |||
| " does not exist or permission denied"); | |||
| } | |||
| while (dir_itr->HasNext()) { | |||
| if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) { | |||
| @@ -64,7 +64,7 @@ FilenameBlock::FilenameBlock(IOBlockFlags io_block_flags) | |||
| // Gets the filename from the block using the provided index container | |||
| Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj<std::string> &index) const { | |||
| if (out_filename == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Failed to get filename from FilenameBlock."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to get filename from FilenameBlock."); | |||
| } | |||
| // a FilenameBlock only has one key. Call base class method to fetch that key | |||
| @@ -77,7 +77,7 @@ Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj< | |||
| auto &it = r.first; | |||
| *out_filename = it.value(); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Could not find filename from index."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Could not find filename from index."); | |||
| } | |||
| return Status::OK(); | |||
| @@ -38,15 +38,15 @@ LJSpeechOp::LJSpeechOp(const std::string &file_dir, int32_t num_workers, int32_t | |||
| Status LJSpeechOp::PrepareData() { | |||
| auto real_path = FileUtils::GetRealPath(folder_path_.data()); | |||
| if (!real_path.has_value()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + folder_path_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, LJSpeech Dataset folder: " + folder_path_ + " does not exist."); | |||
| } | |||
| Path root_folder(real_path.value()); | |||
| Path metadata_file_path = root_folder / "metadata.csv"; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(metadata_file_path.Exists() && !metadata_file_path.IsDirectory(), | |||
| "Invalid file, failed to find metadata file: " + metadata_file_path.ToString()); | |||
| "Invalid file, failed to find LJSpeech metadata file: " + metadata_file_path.ToString()); | |||
| std::ifstream csv_reader(metadata_file_path.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(csv_reader.is_open(), | |||
| "Invalid file, failed to open metadata file: " + metadata_file_path.ToString() + | |||
| "Invalid file, failed to open LJSpeech metadata file: " + metadata_file_path.ToString() + | |||
| ", make sure file not damaged or permission denied."); | |||
| std::string line = ""; | |||
| while (getline(csv_reader, line)) { | |||
| @@ -64,8 +64,8 @@ Status LJSpeechOp::PrepareData() { | |||
| } | |||
| if (meta_info_list_.empty()) { | |||
| csv_reader.close(); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Reading failed, unable to read valid data from the metadata file: " + metadata_file_path.ToString() + "."); | |||
| RETURN_STATUS_UNEXPECTED("Reading failed, unable to read valid data from the LJSpeech metadata file: " + | |||
| metadata_file_path.ToString() + "."); | |||
| } | |||
| num_rows_ = meta_info_list_.size(); | |||
| csv_reader.close(); | |||
| @@ -76,7 +76,7 @@ Status LJSpeechOp::PrepareData() { | |||
| // 1 function call produces 1 TensorTow | |||
| Status LJSpeechOp::LoadTensorRow(row_id_type index, TensorRow *trow) { | |||
| int32_t num_items = meta_info_list_.size(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(index >= 0 && index < num_items, "The input index is out of range."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(index >= 0 && index < num_items, "[Internal ERROR] The input index is out of range."); | |||
| std::shared_ptr<Tensor> waveform; | |||
| std::shared_ptr<Tensor> sample_rate_scalar; | |||
| std::shared_ptr<Tensor> transcription, normalized_transcription; | |||
| @@ -118,7 +118,7 @@ void LJSpeechOp::Print(std::ostream &out, bool show_all) const { | |||
| Status LJSpeechOp::CountTotalRows(const std::string &dir, int64_t *count) { | |||
| auto real_path = FileUtils::GetRealPath(dir.data()); | |||
| if (!real_path.has_value()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + dir); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, " + dir + " does not exist."); | |||
| } | |||
| Path root_folder(real_path.value()); | |||
| Path metadata_file_path = root_folder / "metadata.csv"; | |||
| @@ -63,7 +63,8 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| if (decode_ == true) { | |||
| Status rc = Decode(image, &image); | |||
| if (rc.IsError()) { | |||
| std::string err = "Invalid data, failed to decode image: " + data.first; | |||
| std::string err = | |||
| "Invalid image, failed to decode: " + data.first + ", the image is damaged or permission denied."; | |||
| RETURN_STATUS_UNEXPECTED(err); | |||
| } | |||
| } | |||
| @@ -91,7 +92,7 @@ void ManifestOp::Print(std::ostream &out, bool show_all) const { | |||
| Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | |||
| if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) { | |||
| if (image_labelname_.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, no image found in dataset."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid manifest file, image data is missing in " + file_); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "[Internal ERROR] Map for containing image-index pair is nullptr or has been set in other place," | |||
| @@ -120,13 +121,14 @@ Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) | |||
| Status ManifestOp::PrepareData() { | |||
| auto realpath = FileUtils::GetRealPath(file_.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_; | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, get real path failed, path=" + file_); | |||
| MS_LOG(ERROR) << "Invalid file path, " << file_ << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_ + " does not exist."); | |||
| } | |||
| std::ifstream file_handle(realpath.value()); | |||
| if (!file_handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Manifest file: " + file_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file_ + | |||
| ": manifest file is damaged or permission denied!"); | |||
| } | |||
| std::string line; | |||
| std::set<std::string> classes; | |||
| @@ -137,7 +139,7 @@ Status ManifestOp::PrepareData() { | |||
| std::string image_file_path = js.value("source", ""); | |||
| if (image_file_path == "") { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, 'source' is not found in Manifest file: " + file_ + " at line " + | |||
| RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'source' is missing in file: " + file_ + " at line " + | |||
| std::to_string(line_count)); | |||
| } | |||
| // If image is not JPEG/PNG/GIF/BMP, drop it | |||
| @@ -149,7 +151,7 @@ Status ManifestOp::PrepareData() { | |||
| std::string usage = js.value("usage", ""); | |||
| if (usage == "") { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, 'usage' is not found in Manifest file: " + file_ + " at line " + | |||
| RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'usage' is missing in file: " + file_ + " at line " + | |||
| std::to_string(line_count)); | |||
| } | |||
| (void)std::transform(usage.begin(), usage.end(), usage.begin(), ::tolower); | |||
| @@ -164,7 +166,7 @@ Status ManifestOp::PrepareData() { | |||
| classes.insert(label_name); | |||
| if (label_name == "") { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, 'name' of label is not found in Manifest file: " + file_ + | |||
| RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'name' attribute of label is missing in file: " + file_ + | |||
| " at line " + std::to_string(line_count)); | |||
| } | |||
| if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) { | |||
| @@ -180,7 +182,8 @@ Status ManifestOp::PrepareData() { | |||
| line_count++; | |||
| } catch (const std::exception &err) { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + file_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid manifest file, parse ManiFest file: " + file_ + " failed, " + | |||
| std::string(err.what())); | |||
| } | |||
| } | |||
| num_classes_ = classes.size(); | |||
| @@ -193,8 +196,8 @@ Status ManifestOp::PrepareData() { | |||
| Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) { | |||
| auto realpath = FileUtils::GetRealPath(file_name.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name); | |||
| MS_LOG(ERROR) << "Invalid file path, " << file_name << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_name + " does not exist."); | |||
| } | |||
| std::ifstream file_handle; | |||
| @@ -202,14 +205,16 @@ Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) { | |||
| *valid = false; | |||
| file_handle.open(realpath.value(), std::ios::binary | std::ios::in); | |||
| if (!file_handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name); | |||
| RETURN_STATUS_UNEXPECTED("Invalid manifest file, failed to open " + file_name + | |||
| " : the manifest file is damaged or permission denied."); | |||
| } | |||
| unsigned char file_type[read_num]; | |||
| (void)file_handle.read(reinterpret_cast<char *>(file_type), read_num); | |||
| if (file_handle.fail()) { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name); | |||
| RETURN_STATUS_UNEXPECTED("Invalid manifest file, failed to read " + file_name + | |||
| " : the manifest file is damaged or permission denied."); | |||
| } | |||
| file_handle.close(); | |||
| if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { | |||
| @@ -68,7 +68,8 @@ Status MindRecordOp::Init() { | |||
| data_schema_ = std::make_unique<DataSchema>(); | |||
| std::vector<std::string> col_names = shard_reader_->GetShardColumn()->GetColumnName(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), "Invalid data, no column names are specified."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), | |||
| "Invalid column, no column names are specified, check mindrecord file."); | |||
| std::vector<mindrecord::ColumnDataType> col_data_types = shard_reader_->GetShardColumn()->GeColumnDataType(); | |||
| std::vector<std::vector<int64_t>> col_shapes = shard_reader_->GetShardColumn()->GetColumnShape(); | |||
| @@ -107,9 +108,8 @@ Status MindRecordOp::Init() { | |||
| if (!load_all_cols) { | |||
| std::unique_ptr<DataSchema> tmp_schema = std::make_unique<DataSchema>(); | |||
| for (std::string colname : columns_to_load_) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| colname_to_ind.find(colname) != colname_to_ind.end(), | |||
| "Invalid data, specified loading column name: " + colname + " does not exist in data file."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(), | |||
| "Invalid column, " + colname + " does not exist in data file."); | |||
| RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname]))); | |||
| } | |||
| data_schema_ = std::move(tmp_schema); | |||
| @@ -177,7 +177,7 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) { | |||
| } | |||
| RETURN_IF_NOT_OK(worker_in_queues_[worker_id]->PopFront(&io_block)); | |||
| } | |||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Unexpected nullptr received in worker."); | |||
| } | |||
| Status MindRecordOp::GetRowFromReader(TensorRow *fetched_row, uint64_t row_id, int32_t worker_id) { | |||
| @@ -231,14 +231,15 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint | |||
| RETURN_IF_NOT_OK(shard_column->GetColumnFromJson(column_name, sample_json_, &data_ptr, &n_bytes)); | |||
| } else if (category == mindrecord::ColumnInBlob) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(sample_bytes_.find(column_name) != sample_bytes_.end(), | |||
| "Invalid data, failed to retrieve blob data from padding sample."); | |||
| "Invalid padded_sample, failed to retrieve blob data from padding sample, " | |||
| "check 'padded_sample'."); | |||
| std::string ss(sample_bytes_[column_name]); | |||
| n_bytes = ss.size(); | |||
| data_ptr = std::make_unique<unsigned char[]>(n_bytes); | |||
| std::copy(ss.begin(), ss.end(), data_ptr.get()); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, retrieved data type is unknown."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid datatype, retrieved data type is unknown."); | |||
| } | |||
| if (data == nullptr) { | |||
| data = reinterpret_cast<const unsigned char *>(data_ptr.get()); | |||
| @@ -254,7 +255,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint | |||
| DataType type = column.Type(); | |||
| // Set shape | |||
| CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, | |||
| "[Internal ERROR] Found memory size of column data type is 0."); | |||
| auto num_elements = n_bytes / column_data_type_size; | |||
| if (type == DataType::DE_STRING) { | |||
| std::string s{data, data + n_bytes}; | |||
| @@ -128,7 +128,7 @@ class MindRecordOp : public MappableLeafOp { | |||
| const mindrecord::json &columns_json, const mindrecord::TaskType task_type); | |||
| Status LoadTensorRow(row_id_type row_id, TensorRow *row) override { | |||
| return Status(StatusCode::kMDSyntaxError, "Cannot call this method."); | |||
| return Status(StatusCode::kMDSyntaxError, "[Internal ERROR] Cannot call this method."); | |||
| } | |||
| // Private function for computing the assignment of the column name map. | |||
| // @return - Status | |||
| @@ -69,7 +69,7 @@ void MnistOp::Print(std::ostream &out, bool show_all) const { | |||
| Status MnistOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const { | |||
| if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { | |||
| if (image_label_pairs_.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, no image found in " + DatasetName() + " file."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, image data is missing."); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "[Internal ERROR] Map for containing image-index pair is nullptr or has been set in other place," | |||
| @@ -89,7 +89,7 @@ Status MnistOp::ReadFromReader(std::ifstream *reader, uint32_t *result) { | |||
| uint32_t res = 0; | |||
| reader->read(reinterpret_cast<char *>(&res), 4); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!reader->fail(), | |||
| "Invalid data, failed to read 4 bytes from " + DatasetName() + " file."); | |||
| "Invalid file, failed to read 4 bytes from " + DatasetName() + " file."); | |||
| *result = SwapEndian(res); | |||
| return Status::OK(); | |||
| } | |||
| @@ -100,17 +100,22 @@ uint32_t MnistOp::SwapEndian(uint32_t val) const { | |||
| } | |||
| Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_reader, uint32_t *num_images) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(), | |||
| "Invalid file, failed to open " + DatasetName() + " image file: " + file_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(), "Invalid " + DatasetName() + " file, failed to open " + | |||
| file_name + " : the file is damaged or permission denied."); | |||
| int64_t image_len = image_reader->seekg(0, std::ios::end).tellg(); | |||
| (void)image_reader->seekg(0, std::ios::beg); | |||
| // The first 16 bytes of the image file are type, number, row and column | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16, | |||
| "Invalid " + DatasetName() + " file, the first data length of " + file_name + | |||
| " should be 16 bytes(contains type, number, row and column), but got " + | |||
| std::to_string(image_len) + "."); | |||
| uint32_t magic_number; | |||
| RETURN_IF_NOT_OK(ReadFromReader(image_reader, &magic_number)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistImageFileMagicNumber, | |||
| "Invalid file, this is not the " + DatasetName() + " image file: " + file_name); | |||
| "Invalid " + DatasetName() + " file, the image number of " + file_name + " should be " + | |||
| std::to_string(kMnistImageFileMagicNumber) + ", but got " + | |||
| std::to_string(magic_number)); | |||
| uint32_t num_items; | |||
| RETURN_IF_NOT_OK(ReadFromReader(image_reader, &num_items)); | |||
| @@ -120,28 +125,38 @@ Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_re | |||
| RETURN_IF_NOT_OK(ReadFromReader(image_reader, &cols)); | |||
| // The image size of the Mnist dataset is fixed at [28,28] | |||
| CHECK_FAIL_RETURN_UNEXPECTED((rows == kMnistImageRows) && (cols == kMnistImageCols), | |||
| "Invalid data, shape of image is not equal to (28, 28)."); | |||
| "Invalid " + DatasetName() + " file, shape of image in " + file_name + | |||
| " should be (28, 28), but got (" + std::to_string(rows) + ", " + std::to_string(cols) + | |||
| ")."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((image_len - 16) == num_items * rows * cols, | |||
| "Invalid data, got truncated data len: " + std::to_string(image_len - 16) + | |||
| ", which is not equal to real data len: " + std::to_string(num_items * rows * cols)); | |||
| "Invalid " + DatasetName() + " file, truncated data length of " + file_name + | |||
| " should be " + std::to_string(image_len - 16) + ", but got " + | |||
| std::to_string(num_items * rows * cols)); | |||
| *num_images = num_items; | |||
| return Status::OK(); | |||
| } | |||
| Status MnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_reader, uint32_t *num_labels) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), | |||
| "Invalid file, failed to open " + DatasetName() + " label file: " + file_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), "Invalid " + DatasetName() + " file, failed to open " + | |||
| file_name + " : the file is damaged or permission denied!"); | |||
| int64_t label_len = label_reader->seekg(0, std::ios::end).tellg(); | |||
| (void)label_reader->seekg(0, std::ios::beg); | |||
| // The first 8 bytes of the image file are type and number | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid " + DatasetName() + " file, the first data length of " + | |||
| file_name + " should be 8 bytes(contains type and number), but got " + | |||
| std::to_string(label_len) + "."); | |||
| uint32_t magic_number; | |||
| RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistLabelFileMagicNumber, | |||
| "Invalid file, this is not the " + DatasetName() + " label file: " + file_name); | |||
| "Invalid " + DatasetName() + " file, the number of labels in " + file_name + | |||
| " should be " + std::to_string(kMnistLabelFileMagicNumber) + ", but got " + | |||
| std::to_string(magic_number) + "."); | |||
| uint32_t num_items; | |||
| RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid data, number of labels is wrong."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid " + DatasetName() + | |||
| " file, the data length of labels in " + file_name + | |||
| " should be " + std::to_string(label_len - 8) + | |||
| ", but got " + std::to_string(num_items) + "."); | |||
| *num_labels = num_items; | |||
| return Status::OK(); | |||
| } | |||
| @@ -151,7 +166,10 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la | |||
| RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images)); | |||
| RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), | |||
| "Invalid data, num_images is not equal to num_labels. Ensure data file is not damaged."); | |||
| "Invalid " + DatasetName() + " file, the images number of " + image_names_[index] + | |||
| " should be equal to the labels number of " + label_names_[index] + | |||
| ", but got images number: " + std::to_string(num_images) + | |||
| ", labels number: " + std::to_string(num_labels) + "."); | |||
| // The image size of the Mnist dataset is fixed at [28,28] | |||
| int64_t size = kMnistImageRows * kMnistImageCols; | |||
| auto images_buf = std::make_unique<char[]>(size * num_images); | |||
| @@ -163,13 +181,13 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la | |||
| } | |||
| (void)image_reader->read(images_buf.get(), size * num_images); | |||
| if (image_reader->fail()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " image: " + image_names_[index] + | |||
| ", size:" + std::to_string(size * num_images) + ". Ensure data file is not damaged."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, failed to read " + image_names_[index] + | |||
| " : the file is damaged or permission denied!"); | |||
| } | |||
| (void)label_reader->read(labels_buf.get(), num_images); | |||
| if (label_reader->fail()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " label:" + label_names_[index] + | |||
| ", size: " + std::to_string(num_images) + ". Ensure data file is not damaged."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, failed to read " + label_names_[index] + | |||
| " : the file is damaged or the file content is incomplete."); | |||
| } | |||
| TensorShape img_tensor_shape = TensorShape({kMnistImageRows, kMnistImageCols, 1}); | |||
| for (int64_t j = 0; j != num_images; ++j) { | |||
| @@ -244,8 +262,10 @@ Status MnistOp::WalkAllFiles() { | |||
| std::sort(image_names_.begin(), image_names_.end()); | |||
| std::sort(label_names_.begin(), label_names_.end()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(), | |||
| "Invalid data, num of images is not equal to num of labels."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| image_names_.size() == label_names_.size(), | |||
| "Invalid " + DatasetName() + " file, num of images should be equal to num of labels, but got num of images: " + | |||
| std::to_string(image_names_.size()) + ", num of labels: " + std::to_string(label_names_.size()) + "."); | |||
| return Status::OK(); | |||
| } | |||
| @@ -279,7 +299,9 @@ Status MnistOp::CountTotalRows(const std::string &dir, const std::string &usage, | |||
| uint32_t num_labels; | |||
| RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), | |||
| "Invalid data, num of images is not equal to num of labels."); | |||
| "Invalid " + op->DatasetName() + | |||
| " file, num of images should be equal to num of labels, but got num of images: " + | |||
| std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + "."); | |||
| *count = *count + num_images; | |||
| // Close the readers | |||
| @@ -215,7 +215,8 @@ bool NonMappableLeafOp::NeedPushFileToBlockQueue(const std::string &file_name, i | |||
| bool push = false; | |||
| int64_t start_index = device_id_ * num_rows_per_shard_; | |||
| if (device_id_ + 1 < 0) { | |||
| MS_LOG(ERROR) << "Device id is invalid, got " + std::to_string(device_id_); | |||
| MS_LOG(ERROR) << "Invalid device id, device id should be greater than or equal 0, but got " | |||
| << std::to_string(device_id_); | |||
| return false; | |||
| } | |||
| @@ -141,7 +141,8 @@ Status PhotoTourOp::GetFileContent(const std::string &info_file, std::string *an | |||
| RETURN_UNEXPECTED_IF_NULL(ans); | |||
| std::ifstream reader; | |||
| reader.open(info_file); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open PhotoTour info file: " + info_file); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open " + info_file + | |||
| ": PhotoTour info file is damaged or permission denied."); | |||
| (void)reader.seekg(0, std::ios::end); | |||
| std::size_t size = reader.tellg(); | |||
| (void)reader.seekg(0, std::ios::beg); | |||
| @@ -183,7 +184,9 @@ Status PhotoTourOp::ReadInfoFile(const std::string &data_dir, const std::string | |||
| switch (col_idx) { | |||
| case ID_3DPOINT: { | |||
| std::string item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour info file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), | |||
| "Invalid data, reading PhotoTour info file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content should not be empty."); | |||
| int id_3dpoint = std::atoi(item.c_str()); | |||
| labels_.push_back(id_3dpoint); | |||
| col_idx = UNKNOWN; | |||
| @@ -191,7 +194,9 @@ Status PhotoTourOp::ReadInfoFile(const std::string &data_dir, const std::string | |||
| } | |||
| case UNKNOWN: { | |||
| std::string item2 = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), "Reading PhotoTour info file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| !item2.empty(), "Invalid data, Reading PhotoTour info file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content in file should not be empty."); | |||
| col_idx = ID_3DPOINT; | |||
| break; | |||
| } | |||
| @@ -225,34 +230,44 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri | |||
| switch (col_idx) { | |||
| case PATCH_ID1: { | |||
| std::string item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), | |||
| "Invalid data,Reading PhotoTour matched file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content should not be empty."); | |||
| patch_id1 = std::atoi(item.c_str()); | |||
| col_idx = LABEL1; | |||
| break; | |||
| } | |||
| case LABEL1: { | |||
| std::string item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), | |||
| "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content should not be empty."); | |||
| label1 = std::atoi(item.c_str()); | |||
| col_idx = UNUSED1; | |||
| break; | |||
| } | |||
| case UNUSED1: { | |||
| std::string item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), | |||
| "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content should not be empty."); | |||
| col_idx = PATCH_ID2; | |||
| break; | |||
| } | |||
| case PATCH_ID2: { | |||
| std::string item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), | |||
| "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content should not be empty."); | |||
| patch_id2 = std::atoi(item.c_str()); | |||
| col_idx = LABEL2; | |||
| break; | |||
| } | |||
| case LABEL2: { | |||
| std::string item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), | |||
| "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content should not be empty."); | |||
| label2 = std::atoi(item.c_str()); | |||
| col_idx = UNUSED2; | |||
| matches_.push_back(std::make_tuple(patch_id1, patch_id2, uint32_t(label1 == label2))); | |||
| @@ -260,13 +275,17 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri | |||
| } | |||
| case UNUSED2: { | |||
| std::string item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), | |||
| "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content should not be empty."); | |||
| col_idx = UNUSED3; | |||
| break; | |||
| } | |||
| case UNUSED3: { | |||
| std::string item2 = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), "Reading PhotoTour matched file failed: " + info_file_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), | |||
| "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + | |||
| " at line: " + std::to_string(pos) + ", the content should not be empty."); | |||
| col_idx = PATCH_ID1; | |||
| break; | |||
| } | |||
| @@ -281,8 +300,9 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri | |||
| Status PhotoTourOp::GetPhotoTourDataTensor(uint32_t index, std::shared_ptr<Tensor> *image_tensor) { | |||
| RETURN_UNEXPECTED_IF_NULL(image_tensor); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(index < kLens.at(name_), | |||
| "Index exceeds the maximum count of image, got: " + std::to_string(index)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| index < kLens.at(name_), | |||
| "[Internal ERROR] Index exceeds the maximum count of image, got: " + std::to_string(index)); | |||
| int image_id = index / (kPatchNumPerRow * kPatchNumPerCol); | |||
| int row_in_image = (index % (kPatchNumPerRow * kPatchNumPerCol)) / kPatchNumPerRow; | |||
| @@ -320,7 +340,7 @@ Status PhotoTourOp::PrepareData() { | |||
| chosen_dataset_folder_path_ = (Path(dataset_dir_) / Path(name_)).ToString(); | |||
| train_ = kTrain.at(usage_); | |||
| auto real_folder_path = FileUtils::GetRealPath(chosen_dataset_folder_path_.data()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + chosen_dataset_folder_path_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), chosen_dataset_folder_path_ + " does not exist."); | |||
| std::vector<cv::String> file_names; | |||
| cv::glob(real_folder_path.value(), file_names); | |||
| @@ -107,7 +107,8 @@ Status Places365Op::GetFileContent(const std::string &info_file, std::string *an | |||
| RETURN_UNEXPECTED_IF_NULL(ans); | |||
| std::ifstream reader; | |||
| reader.open(info_file); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open Places365 file: " + info_file); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| !reader.fail(), "Invalid file, failed to open " + info_file + ": Places365 file is damaged or permission denied."); | |||
| reader.seekg(0, std::ios::end); | |||
| std::size_t size = reader.tellg(); | |||
| reader.seekg(0, std::ios::beg); | |||
| @@ -153,21 +154,21 @@ Status Places365Op::LoadCategories(const std::string &category_meta_name) { | |||
| while ((pos = s.find(" ")) != std::string::npos) { | |||
| switch (col_idx) { | |||
| case CATEGORY: { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), | |||
| "Reading places365 category file failed: " + category_meta_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " + | |||
| category_meta_name + ", space characters not found."); | |||
| category = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!category.empty(), | |||
| "Reading places365 category file failed: " + category_meta_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!category.empty(), "Invalid data, Reading places365 category file failed: " + | |||
| category_meta_name + ", space characters not found."); | |||
| // switch the type of substring. | |||
| col_idx = LABEL; | |||
| break; | |||
| } | |||
| case LABEL: { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), | |||
| "Reading places365 category file failed: " + category_meta_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " + | |||
| category_meta_name + ", space characters not found."); | |||
| std::string label_item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!label_item.empty(), | |||
| "Reading places365 category file failed: " + category_meta_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!label_item.empty(), "Invalid data, Reading places365 category file failed: " + | |||
| category_meta_name + ", space characters not found."); | |||
| label = std::atoi(label_item.c_str()); | |||
| // switch the type of substring. | |||
| col_idx = CATEGORY; | |||
| @@ -204,19 +205,21 @@ Status Places365Op::LoadFileLists(const std::string &filelists_meta_name) { | |||
| while ((pos = s.find(" ")) != std::string::npos) { | |||
| switch (col_idx) { | |||
| case PATH: { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), | |||
| "Reading places365 category file failed: " + filelists_meta_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " + | |||
| filelists_meta_name + ", space characters not found."); | |||
| path = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!path.empty(), "Reading places365 filelist file failed: " + filelists_meta_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!path.empty(), "Invalid data, Reading places365 filelist file failed: " + | |||
| filelists_meta_name + ", space characters not found."); | |||
| // switch the type of substring. | |||
| col_idx = LABEL; | |||
| break; | |||
| } | |||
| case LABEL: { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), | |||
| "Reading places365 category file failed: " + filelists_meta_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " + | |||
| filelists_meta_name + ", space characters not found."); | |||
| std::string item = get_splited_str(pos); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading places365 filelist file failed: " + filelists_meta_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Invalid data, Reading places365 filelist file failed: " + | |||
| filelists_meta_name + ", space characters not found."); | |||
| label = std::atoi(item.c_str()); | |||
| // switch the type of substring. | |||
| col_idx = PATH; | |||
| @@ -233,13 +236,15 @@ Status Places365Op::LoadFileLists(const std::string &filelists_meta_name) { | |||
| Status Places365Op::GetPlaces365DataTensor(uint32_t index, std::shared_ptr<Tensor> *image_tensor) { | |||
| std::string file_path = image_path_label_pairs_[index].first; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(Path(file_path).Exists(), file_path + " File not exists."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(Path(file_path).Exists(), | |||
| "Invalid file path, Places365 image: " + file_path + " does not exists."); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromFile(file_path, image_tensor)); | |||
| if (decode_) { | |||
| Status rc = Decode(*image_tensor, image_tensor); | |||
| if (rc.IsError()) { | |||
| *image_tensor = nullptr; | |||
| std::string err_msg = "Invalid data, failed to decode image: " + file_path; | |||
| std::string err_msg = | |||
| "Invalid image, failed to decode " + file_path + ": the image is damaged or permission denied."; | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| } | |||
| @@ -249,14 +254,15 @@ Status Places365Op::GetPlaces365DataTensor(uint32_t index, std::shared_ptr<Tenso | |||
| Status Places365Op::PrepareData() { | |||
| auto real_folder_path = FileUtils::GetRealPath(root_.data()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + root_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Invalid file path, " + root_ + " does not exist."); | |||
| RETURN_IF_NOT_OK(LoadCategories((Path(real_folder_path.value()) / Path(kCategoriesMeta)).ToString())); | |||
| RETURN_IF_NOT_OK(LoadFileLists((Path(real_folder_path.value()) / Path(kFileListMeta.at(usage_))).ToString())); | |||
| num_rows_ = image_path_label_pairs_.size(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| num_rows_ > 0, | |||
| "Invalid data, no valid data matching the dataset API Places365Dataset. Please check file path or dataset API."); | |||
| "Invalid data, no valid data matching the dataset API Places365Dataset. Please check dataset API or file path: " + | |||
| root_ + "."); | |||
| return Status::OK(); | |||
| } | |||
| @@ -281,7 +287,7 @@ Status Places365Op::CountTotalRows(const std::string &dir, const std::string &us | |||
| for (size_t i = 0; i < op->image_path_label_pairs_.size(); ++i) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(Path(op->image_path_label_pairs_[i].first).Exists(), | |||
| op->image_path_label_pairs_[i].first + " File not exists."); | |||
| "Invalid file path, " + op->image_path_label_pairs_[i].first + " does not exists."); | |||
| } | |||
| *count = op->image_path_label_pairs_.size(); | |||
| return Status::OK(); | |||
| @@ -111,7 +111,9 @@ Status QMnistOp::CountTotalRows(const std::string &dir, const std::string &usage | |||
| uint32_t num_labels; | |||
| RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), | |||
| "Invalid data, num of images is not equal to num of labels."); | |||
| "Invalid data, num of images should be equal to num of labels loading from " + dir + | |||
| ", but got num of images: " + std::to_string(num_images) + | |||
| ", num of labels: " + std::to_string(num_labels) + "."); | |||
| if (usage == "test10k") { | |||
| // only use the first 10k samples and drop the last 50k samples | |||
| @@ -141,7 +143,8 @@ Status QMnistOp::WalkAllFiles() { | |||
| const std::string nist_prefix = "xnist"; | |||
| auto real_folder_path = FileUtils::GetRealPath(folder_path_.data()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + folder_path_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), | |||
| "Invalid QMnist folder, " + folder_path_ + " does not exist or permission denied!"); | |||
| Path root_dir(real_folder_path.value()); | |||
| if (usage_ == "train") { | |||
| @@ -162,20 +165,25 @@ Status QMnistOp::WalkAllFiles() { | |||
| label_names_.push_back((root_dir / Path(nist_prefix + "-" + label_ext)).ToString()); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(), | |||
| "Invalid data, num of images is not equal to num of labels."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| image_names_.size() == label_names_.size(), | |||
| "Invalid data, num of Qmnist image files should be equal to num of Qmnist label files under directory:" + | |||
| folder_path_ + ", but got num of image files: " + std::to_string(image_names_.size()) + | |||
| ", num of label files: " + std::to_string(label_names_.size()) + "."); | |||
| for (size_t i = 0; i < image_names_.size(); i++) { | |||
| Path file_path(image_names_[i]); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), | |||
| "Failed to find " + DatasetName() + " image file: " + file_path.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid file path, Qmnist data file: " + file_path.ToString() + " does not exist or is a directory."); | |||
| MS_LOG(INFO) << DatasetName(true) << " operator found image file at " << file_path.ToString() << "."; | |||
| } | |||
| for (size_t i = 0; i < label_names_.size(); i++) { | |||
| Path file_path(label_names_[i]); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), | |||
| "Failed to find " + DatasetName() + " label file: " + file_path.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid file path, Qmnist data file: " + file_path.ToString() + " does not exist or is a directory."); | |||
| MS_LOG(INFO) << DatasetName(true) << " operator found label file at " << file_path.ToString() << "."; | |||
| } | |||
| @@ -189,7 +197,9 @@ Status QMnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *l | |||
| RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images)); | |||
| RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), | |||
| "Invalid data, num_images is not equal to num_labels. Ensure data file is not damaged."); | |||
| "Invalid data, num of images should be equal to num of labels loading from " + | |||
| folder_path_ + ", but got num of images: " + std::to_string(num_images) + | |||
| ", num of labels: " + std::to_string(num_labels) + "."); | |||
| // The image size of the QMNIST dataset is fixed at [28,28] | |||
| int64_t image_size = kQMnistImageRows * kQMnistImageCols; | |||
| @@ -216,16 +226,16 @@ Status QMnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *l | |||
| } | |||
| (void)image_reader->read(images_buf.get(), image_size * num_images); | |||
| if (image_reader->fail()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " image: " + image_names_[index] + | |||
| ", size:" + std::to_string(image_size * num_images) + | |||
| ". Ensure data file is not damaged."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + std::to_string(image_size * num_images) + | |||
| " bytes from " + image_names_[index] + | |||
| ": the data file is damaged or the content is incomplete."); | |||
| } | |||
| // uint32_t use 4 bytes in memory | |||
| (void)label_reader->read(reinterpret_cast<char *>(labels_buf.get()), label_length * num_labels * 4); | |||
| if (label_reader->fail()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " label:" + label_names_[index] + | |||
| ", size: " + std::to_string(label_length * num_labels) + | |||
| ". Ensure data file is not damaged."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + std::to_string(label_length * num_labels * 4) + | |||
| " bytes from " + label_names_[index] + | |||
| ": the data file is damaged or content is incomplete."); | |||
| } | |||
| TensorShape image_tensor_shape = TensorShape({kQMnistImageRows, kQMnistImageCols, 1}); | |||
| TensorShape label_tensor_shape = TensorShape({kQMnistLabelLength}); | |||
| @@ -258,23 +268,32 @@ Status QMnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_r | |||
| RETURN_UNEXPECTED_IF_NULL(label_reader); | |||
| RETURN_UNEXPECTED_IF_NULL(num_labels); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), | |||
| "Invalid file, failed to open " + DatasetName() + " label file: " + file_name); | |||
| "Invalid file, failed to open " + file_name + ": the label file is permission denied."); | |||
| int64_t label_len = label_reader->seekg(0, std::ios::end).tellg(); | |||
| (void)label_reader->seekg(0, std::ios::beg); | |||
| // The first 12 bytes of the label file are type, number and length | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 12, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 12, | |||
| "Invalid file, load " + file_name + | |||
| " failed: the first 12 bytes of the label file should be type, number and length, " + | |||
| "but got the first read bytes : " + std::to_string(label_len)); | |||
| uint32_t magic_number; | |||
| RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kQMnistLabelFileMagicNumber, | |||
| "Invalid file, this is not the " + DatasetName() + " label file: " + file_name); | |||
| "Invalid label file, the number of labels loading from " + file_name + " should be " + | |||
| std::to_string(kQMnistLabelFileMagicNumber) + ", but got " + | |||
| std::to_string(magic_number) + "."); | |||
| uint32_t num_items; | |||
| RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items)); | |||
| uint32_t length; | |||
| RETURN_IF_NOT_OK(ReadFromReader(label_reader, &length)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(length == kQMnistLabelLength, "Invalid data, length of labels is not equal to 8."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(length == kQMnistLabelLength, "Invalid data, length of every label loading from " + | |||
| file_name + " should be equal to 8, but got " + | |||
| std::to_string(length) + "."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((label_len - 12) == num_items * kQMnistLabelLength * 4, | |||
| "Invalid data, number of labels is wrong."); | |||
| "Invalid data, the total bytes of labels loading from Qmnist label file: " + file_name + | |||
| " should be " + std::to_string(label_len - 12) + ", but got " + | |||
| std::to_string(num_items * kQMnistLabelLength * 4) + "."); | |||
| *num_labels = num_items; | |||
| return Status::OK(); | |||
| } | |||
| @@ -98,7 +98,7 @@ void RandomDataOp::GenerateSchema() { | |||
| std::make_unique<ColDescriptor>(col_name, DataType(newType), TensorImpl::kFlexible, rank, new_shape.get()); | |||
| Status rc = data_schema_->AddColumn(*new_col); | |||
| if (rc.IsError()) MS_LOG(ERROR) << "Failed to generate a schema. Message:" << rc; | |||
| if (rc.IsError()) MS_LOG(ERROR) << "[Internal ERROR] Failed to generate a schema. Message:" << rc; | |||
| } | |||
| } | |||
| @@ -136,7 +136,8 @@ Status RandomDataOp::CreateRandomRow(TensorRow *new_row) { | |||
| buf = std::make_unique<unsigned char[]>(size_in_bytes); | |||
| int ret_code = memset_s(buf.get(), size_in_bytes, random_byte, size_in_bytes); | |||
| if (ret_code != 0) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor."); | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "[Internal ERROR] memset_s failed to set random bytes for a tensor."); | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor)); | |||
| @@ -53,7 +53,7 @@ Status DistributedSamplerRT::InitSampler() { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "Invalid parameter, num_samples must be greater than 0, but got " + | |||
| std::to_string(num_samples_) + ".\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n"); | |||
| num_rows_ > 0, "[Internal ERROR] num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| device_id_ < num_devices_ && device_id_ >= 0 && num_rows_ > 0 && num_samples_ > 0, | |||
| "Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " + | |||
| @@ -96,7 +96,7 @@ Status DistributedSamplerRT::GetNextSample(TensorRow *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| if (cnt_ > samples_per_tensor_) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" + | |||
| "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" + | |||
| std::to_string(cnt_) + ", samples_per_tensor(num_samples): " + std::to_string(samples_per_tensor_)); | |||
| } else if (cnt_ == samples_per_tensor_ && (non_empty_ || !even_dist_)) { | |||
| (*out) = TensorRow(TensorRow::kFlagEOE); | |||
| @@ -29,7 +29,7 @@ Status MindRecordSamplerRT::GetNextSample(TensorRow *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| if (next_id_ > num_samples_) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " + | |||
| "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " + | |||
| std::to_string(next_id_) + ", num_samples_: " + std::to_string(num_samples_)); | |||
| } else if (next_id_ == num_samples_) { | |||
| (*out) = TensorRow(TensorRow::kFlagEOE); | |||
| @@ -53,8 +53,8 @@ Status MindRecordSamplerRT::InitSampler() { | |||
| if (!sample_ids_) { | |||
| // Note, sample_ids_.empty() is okay and will just give no sample ids. | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Init Sampler failed as sample_ids is empty, here ShardReader did not provide a valid sample ids vector via" | |||
| " MindRecordSamplerRT"); | |||
| "[Internal ERROR]Init Sampler failed as sample_ids is empty, here ShardReader did not provide a valid sample ids " | |||
| "vector via MindRecordSamplerRT."); | |||
| } | |||
| // Usually, the num samples is given from the user interface. In our case, that data is in mindrecord. | |||
| @@ -44,7 +44,7 @@ Status PKSamplerRT::InitSampler() { | |||
| // Compute that here for this case to find the total number of samples that are available to return. | |||
| // (in this case, samples per class * total classes). | |||
| if (samples_per_class_ > std::numeric_limits<int64_t>::max() / static_cast<int64_t>(labels_.size())) { | |||
| RETURN_STATUS_UNEXPECTED("Overflow in counting num_rows"); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Overflow in counting num_rows"); | |||
| } | |||
| num_rows_ = samples_per_class_ * static_cast<int64_t>(labels_.size()); | |||
| @@ -72,7 +72,7 @@ Status PKSamplerRT::GetNextSample(TensorRow *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| if (next_id_ > num_samples_ || num_samples_ == 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " + | |||
| "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " + | |||
| std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_)); | |||
| } else if (next_id_ == num_samples_) { | |||
| (*out) = TensorRow(TensorRow::kFlagEOE); | |||
| @@ -39,7 +39,7 @@ Status PythonSamplerRT::GetNextSample(TensorRow *out) { | |||
| { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| if (Py_IsInitialized() == 0) { | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); | |||
| } | |||
| try { | |||
| py::object py_ret = py_sampler_instance.attr("_get_indices")(); | |||
| @@ -57,7 +57,8 @@ Status PythonSamplerRT::GetNextSample(TensorRow *out) { | |||
| return Status(StatusCode::kMDPyFuncException, e.what()); | |||
| } catch (const py::cast_error &e) { | |||
| return Status(StatusCode::kMDPyFuncException, | |||
| "Invalid data, python sampler iterator should return an integer index."); | |||
| "Invalid data, Python sampler iterator should return an integer index, but error raised: " + | |||
| std::string(e.what())); | |||
| } | |||
| } | |||
| (*out) = {sample_ids}; | |||
| @@ -71,7 +72,7 @@ Status PythonSamplerRT::InitSampler() { | |||
| return Status::OK(); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_)); | |||
| num_rows_ > 0, "[Internal ERROR] num_rows must be greater than 0, but got " + std::to_string(num_rows_)); | |||
| // Special value of 0 for num_samples means that the user wants to sample the entire set of data. | |||
| // If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly. | |||
| if (num_samples_ == 0 || num_samples_ > num_rows_) { | |||
| @@ -80,12 +81,13 @@ Status PythonSamplerRT::InitSampler() { | |||
| { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| if (Py_IsInitialized() == 0) { | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); | |||
| } | |||
| try { | |||
| py_sampler_instance.attr("_handshake")(num_rows_, num_samples_); | |||
| } catch (const py::error_already_set &e) { | |||
| return Status(StatusCode::kMDPyFuncException, e.what()); | |||
| return Status(StatusCode::kMDPyFuncException, | |||
| "[Internal ERROR] python sampler execute _handshake failed: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -98,7 +100,7 @@ Status PythonSamplerRT::ResetSampler() { | |||
| need_to_reset_ = false; | |||
| py::gil_scoped_acquire gil_acquire; | |||
| if (Py_IsInitialized() == 0) { | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); | |||
| } | |||
| try { | |||
| py_sampler_instance.attr("reset")(); | |||
| @@ -36,8 +36,9 @@ RandomSamplerRT::RandomSamplerRT(bool replacement, int64_t num_samples, bool res | |||
| Status RandomSamplerRT::GetNextSample(TensorRow *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| if (next_id_ > num_samples_) { | |||
| RETURN_STATUS_UNEXPECTED("Sampler index must be less than or equal to num_samples(total rows in dataset), but got" + | |||
| std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_)); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got" + | |||
| std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_)); | |||
| } else if (next_id_ == num_samples_) { | |||
| (*out) = TensorRow(TensorRow::kFlagEOE); | |||
| } else { | |||
| @@ -81,7 +82,7 @@ Status RandomSamplerRT::InitSampler() { | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| num_samples_ > 0 && num_rows_ > 0, | |||
| "Invalid parameter, num_samples and num_rows must be greater than 0, but got num_samples: " + | |||
| "[Internal ERROR] num_samples and num_rows must be greater than 0, but got num_samples: " + | |||
| std::to_string(num_samples_) + ", num_rows: " + std::to_string(num_rows_)); | |||
| samples_per_tensor_ = samples_per_tensor_ > num_samples_ ? num_samples_ : samples_per_tensor_; | |||
| rnd_.seed(seed_); | |||
| @@ -28,7 +28,7 @@ Status RandomAccessOp::GetNumRowsInDataset(int64_t *num) const { | |||
| // Here, it is just a getter method to return the value. However, it is invalid if there is | |||
| // not a value set for this count, so generate a failure if that is the case. | |||
| if (num == nullptr || num_rows_ == -1) { | |||
| RETURN_STATUS_UNEXPECTED("Get num rows in Dataset failed, num_rows has not been set yet."); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Get num rows in Dataset failed, num_rows has not been set yet."); | |||
| } | |||
| (*num) = num_rows_; | |||
| return Status::OK(); | |||
| @@ -55,7 +55,7 @@ Status SamplerRT::HandshakeRandomAccessOp(const RandomAccessOp *op) { | |||
| RETURN_IF_NOT_OK(child_sampler->HandshakeRandomAccessOp(op)); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "RandomAccessOp init failed, as it is nullptr."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "[Internal ERROR] RandomAccessOp init failed, as it is nullptr."); | |||
| // If there's a child sampler, set the row count to be it's sample count | |||
| if (HasChildSampler()) { | |||
| @@ -114,7 +114,7 @@ Status SamplerRT::GetAllIdsThenReset(py::array *data) { | |||
| { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| if (Py_IsInitialized() == 0) { | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); | |||
| return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); | |||
| } | |||
| try { | |||
| RETURN_IF_NOT_OK(sample_ids->GetDataAsNumpy(data)); | |||
| @@ -127,7 +127,9 @@ Status SamplerRT::GetAllIdsThenReset(py::array *data) { | |||
| #endif | |||
| Status SamplerRT::SetNumSamples(int64_t num_samples) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "Invalid parameter, num_samples must be greater than or equal to 0."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| num_samples >= 0, | |||
| "Invalid parameter, 'num_samples' must be greater than or equal to 0, but got " + std::to_string(num_samples)); | |||
| num_samples_ = num_samples; | |||
| return Status::OK(); | |||
| } | |||
| @@ -161,13 +163,13 @@ Status SamplerRT::AddChild(std::shared_ptr<SamplerRT> child) { | |||
| // Only samplers can be added, not any other DatasetOp. | |||
| std::shared_ptr<SamplerRT> sampler = std::dynamic_pointer_cast<SamplerRT>(child); | |||
| if (!sampler) { | |||
| std::string err_msg("Cannot add child, child is not a sampler object."); | |||
| std::string err_msg("[Internal ERROR] Cannot add child, child is not a sampler object."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| // Samplers can have at most 1 child. | |||
| if (!child_.empty()) { | |||
| std::string err_msg("Cannot add child sampler, this sampler already has a child."); | |||
| std::string err_msg("[Internal ERROR] Cannot add child sampler, this sampler already has a child."); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -42,7 +42,7 @@ class RandomAccessOp { | |||
| // @param std::map<int64_t, std::vector<int64_t>> * map | |||
| // @return Status The status code returned | |||
| virtual Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *map) const { | |||
| RETURN_STATUS_UNEXPECTED("GetClassIds needs to be override to support PK"); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] GetClassIds needs to be override to support PK."); | |||
| } | |||
| // default destructor | |||
| @@ -29,7 +29,7 @@ Status SequentialSamplerRT::GetNextSample(TensorRow *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| if (id_count_ > num_samples_) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" + | |||
| "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" + | |||
| std::to_string(id_count_) + ", num_samples_: " + std::to_string(num_samples_)); | |||
| } else if (id_count_ == num_samples_) { | |||
| (*out) = TensorRow(TensorRow::kFlagEOE); | |||
| @@ -41,10 +41,9 @@ Status WeightedRandomSamplerRT::InitSampler() { | |||
| if (num_samples_ == 0 || num_samples_ > num_rows_) { | |||
| num_samples_ = num_rows_; | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| num_rows_ > 0 && num_samples_, | |||
| "Invalid parameter, num_samples and num_rows must be greater than 0, but got num_rows: " + | |||
| std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0 && num_samples_, | |||
| "[Internal ERROR] num_samples and num_rows must be greater than 0, but got num_rows: " + | |||
| std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(samples_per_tensor_ > 0, | |||
| "Invalid parameter, samples_per_tensor(num_samples) must be greater than 0, but got " + | |||
| std::to_string(samples_per_tensor_) + ".\n"); | |||
| @@ -160,8 +159,9 @@ Status WeightedRandomSamplerRT::GetNextSample(TensorRow *out) { | |||
| } | |||
| if (genId >= num_rows_) { | |||
| RETURN_STATUS_UNEXPECTED("Generated indice is out of bound, expect range [0, num_data-1], got indice: " + | |||
| std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1)); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "[Internal ERROR] Generated indice is out of bound, expect range [0, num_data-1], got indice: " + | |||
| std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1)); | |||
| } | |||
| if (HasChildSampler()) { | |||
| @@ -76,7 +76,8 @@ Status SBUOp::ReadImageToTensor(const std::string &path, std::shared_ptr<Tensor> | |||
| if (decode_ == true) { | |||
| Status rc = Decode(*tensor, tensor); | |||
| if (rc.IsError()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path); | |||
| RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode image:" + path + | |||
| ", the image is damaged or permission denied."); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| @@ -129,18 +130,21 @@ Status SBUOp::PrepareData() { | |||
| Path root_dir(real_folder_path.value()); | |||
| url_path_ = root_dir / url_file_name; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(url_path_.Exists() && !url_path_.IsDirectory(), | |||
| "Invalid file, failed to find SBU url file: " + url_path_.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| url_path_.Exists() && !url_path_.IsDirectory(), | |||
| "Invalid file, SBU url file: " + url_path_.ToString() + " does not exist or is a directory."); | |||
| MS_LOG(INFO) << "SBU operator found url file " << url_path_.ToString() << "."; | |||
| caption_path_ = root_dir / caption_file_name; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(caption_path_.Exists() && !caption_path_.IsDirectory(), | |||
| "Invalid file, failed to find SBU caption file: " + caption_path_.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| caption_path_.Exists() && !caption_path_.IsDirectory(), | |||
| "Invalid file, SBU caption file: " + caption_path_.ToString() + " does not exist or is a directory."); | |||
| MS_LOG(INFO) << "SBU operator found caption file " << caption_path_.ToString() << "."; | |||
| image_folder_ = root_dir / image_folder_name; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_folder_.Exists() && image_folder_.IsDirectory(), | |||
| "Invalid folder, failed to find SBU image folder: " + image_folder_.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| image_folder_.Exists() && image_folder_.IsDirectory(), | |||
| "Invalid folder, SBU image folder:" + image_folder_.ToString() + " does not exist or is not a directory."); | |||
| MS_LOG(INFO) << "SBU operator found image folder " << image_folder_.ToString() << "."; | |||
| std::ifstream url_file_reader; | |||
| @@ -149,10 +153,11 @@ Status SBUOp::PrepareData() { | |||
| url_file_reader.open(url_path_.ToString(), std::ios::in); | |||
| caption_file_reader.open(caption_path_.ToString(), std::ios::in); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(url_file_reader.is_open(), | |||
| "Invalid file, failed to open SBU url file: " + url_path_.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(caption_file_reader.is_open(), | |||
| "Invalid file, failed to open SBU caption file: " + caption_path_.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(url_file_reader.is_open(), "Invalid file, failed to open " + url_path_.ToString() + | |||
| ": the SBU url file is permission denied."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| caption_file_reader.is_open(), | |||
| "Invalid file, failed to open " + caption_path_.ToString() + ": the SBU caption file is permission denied."); | |||
| Status rc = GetAvailablePairs(url_file_reader, caption_file_reader); | |||
| url_file_reader.close(); | |||
| @@ -172,8 +177,8 @@ Status SBUOp::GetAvailablePairs(std::ifstream &url_file_reader, std::ifstream &c | |||
| while (std::getline(url_file_reader, url_line) && std::getline(caption_file_reader, caption_line)) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| (url_line.empty() && caption_line.empty()) || (!url_line.empty() && !caption_line.empty()), | |||
| "Invalid data, SBU url and caption file are mismatched: " + url_path_.ToString() + " and " + | |||
| caption_path_.ToString()); | |||
| "Invalid data, SBU url: " + url_path_.ToString() + " and caption file: " + caption_path_.ToString() + | |||
| " load empty data at line: " + std::to_string(line_num) + "."); | |||
| if (!url_line.empty() && !caption_line.empty()) { | |||
| line_num++; | |||
| RETURN_IF_NOT_OK(this->ParsePair(url_line, caption_line)); | |||
| @@ -182,7 +187,8 @@ Status SBUOp::GetAvailablePairs(std::ifstream &url_file_reader, std::ifstream &c | |||
| image_caption_pairs_.shrink_to_fit(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_caption_pairs_.size() > 0, "No valid images in " + image_folder_.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(image_caption_pairs_.size() > 0, | |||
| "Invalid data, no valid images in " + image_folder_.ToString() + ", check SBU dataset."); | |||
| // base field of RandomAccessOp | |||
| num_rows_ = image_caption_pairs_.size(); | |||
| @@ -80,13 +80,14 @@ Status TextFileOp::LoadTensor(const std::string &line, TensorRow *out_row) { | |||
| Status TextFileOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) { | |||
| auto realpath = FileUtils::GetRealPath(file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " get real path failed, path=" << file; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, " + DatasetName() + " get real path failed, path=" + file); | |||
| MS_LOG(ERROR) << "Invalid file path, " << file << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + file + " does not exist."); | |||
| } | |||
| std::ifstream handle(realpath.value()); | |||
| if (!handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + file); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open text:" + file + | |||
| ", the file is damaged or permission denied."); | |||
| } | |||
| int64_t rows_total = 0; | |||
| @@ -170,13 +171,13 @@ Status TextFileOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) { | |||
| int64_t CountTotalRows(const std::string &file) { | |||
| auto realpath = FileUtils::GetRealPath(file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file; | |||
| MS_LOG(ERROR) << "Invalid file, " << file << " does not exist."; | |||
| return 0; | |||
| } | |||
| std::ifstream handle(realpath.value()); | |||
| if (!handle.is_open()) { | |||
| MS_LOG(ERROR) << "Invalid file, failed to open file: " << file; | |||
| MS_LOG(ERROR) << "Invalid file, failed to open text file:" << file << ", the file is damaged or permission denied."; | |||
| return 0; | |||
| } | |||
| @@ -44,7 +44,7 @@ const int64_t kTFRecordFileLimit = 0x140000000; | |||
| bool TFReaderOp::ValidateFirstRowCrc(const std::string &filename) { | |||
| auto realpath = FileUtils::GetRealPath(filename.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filename; | |||
| MS_LOG(ERROR) << "Invalid file path, " << filename << " does not exist."; | |||
| return false; | |||
| } | |||
| @@ -126,7 +126,7 @@ Status TFReaderOp::Init() { | |||
| } | |||
| if (total_rows_ < 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid parameter, num_samples or num_rows for TFRecordDataset must be greater than 0, but got: " + | |||
| "[Internal ERROR] num_samples or num_rows for TFRecordDataset must be greater than 0, but got: " + | |||
| std::to_string(total_rows_)); | |||
| } | |||
| @@ -267,14 +267,14 @@ Status TFReaderOp::FillIOBlockNoShuffle() { | |||
| Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, int64_t end_offset, int32_t worker_id) { | |||
| auto realpath = FileUtils::GetRealPath(filename.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filename; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + filename); | |||
| MS_LOG(ERROR) << "Invalid file path, " << filename << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + filename + " does not exist."); | |||
| } | |||
| std::ifstream reader; | |||
| reader.open(realpath.value()); | |||
| if (!reader) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + filename); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, " + filename + " open failed: permission denied!"); | |||
| } | |||
| int64_t rows_read = 0; | |||
| @@ -304,7 +304,7 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i | |||
| if (start_offset == kInvalidOffset || (rows_total >= start_offset && rows_total < end_offset)) { | |||
| dataengine::Example tf_file; | |||
| if (!tf_file.ParseFromString(serialized_example)) { | |||
| std::string errMsg = "Invalid file, failed to parse tfrecord file : " + filename; | |||
| std::string errMsg = "Failed to parse tfrecord file: " + filename + ", make sure protobuf version is suitable."; | |||
| MS_LOG(DEBUG) << errMsg + ", details of string: " << serialized_example; | |||
| RETURN_STATUS_UNEXPECTED(errMsg); | |||
| } | |||
| @@ -333,7 +333,8 @@ Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *ou | |||
| const google::protobuf::Map<std::string, dataengine::Feature> &feature_map = example_features.feature(); | |||
| auto iter_column = feature_map.find(current_col.Name()); | |||
| if (iter_column == feature_map.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid columns_list, column name: " + current_col.Name() + | |||
| " does not exist in tfrecord file, check tfrecord files."); | |||
| } | |||
| const dataengine::Feature &column_values_list = iter_column->second; | |||
| RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col)); | |||
| @@ -383,11 +384,13 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature | |||
| break; | |||
| } | |||
| case dataengine::Feature::KindCase::KIND_NOT_SET: { | |||
| std::string err_msg = "Invalid data, column type in tf record file must be uint8, int64 or float32."; | |||
| std::string err_msg = | |||
| "Unrecognized datatype, column type in tfrecord file must be uint8, int64 or float32, check tfrecord file."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| default: { | |||
| std::string err_msg = "Invalid data, column type in tf record file must be uint8, int64 or float32."; | |||
| std::string err_msg = | |||
| "Unrecognized datatype, column type in tfrecord file must be uint8, int64 or float32, check tfrecord file."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| } | |||
| @@ -404,8 +407,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||
| // Must be single byte type for each element! | |||
| if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 && | |||
| current_col.Type() != DataType::DE_STRING) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + | |||
| ", data type should be int8, uint8 or string, but got " + current_col.Type().ToString(); | |||
| std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + | |||
| " should be int8, uint8 or string, but got " + current_col.Type().ToString(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -439,7 +442,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||
| for (int i = 1; i < cur_shape.Size(); ++i) { | |||
| if (cur_shape[i] == TensorShape::kDimUnknown) { | |||
| std::string err_msg = | |||
| "Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name(); | |||
| "Invalid data dimension, only one dimension shape supported is -1, but the 0th and the" + | |||
| std::to_string(i) + "th dimension shape of " + current_col.Name() + " are both -1."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| new_pad_size *= cur_shape[i]; | |||
| @@ -447,10 +451,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||
| pad_size = new_pad_size; | |||
| } else { | |||
| if (cur_shape.known() && cur_shape.NumOfElements() != max_size) { | |||
| std::string err_msg = "Invalid data, shape in schema's column '" + current_col.Name() + "' is incorrect." + | |||
| "\nshape received: " + cur_shape.ToString() + | |||
| "\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) + | |||
| "\nexpected total elements in shape: " + std::to_string(max_size); | |||
| std::string err_msg = "Data dimensions of '" + current_col.Name() + | |||
| "' do not match, the expected total elements of shape " + cur_shape.ToString() + | |||
| " should be " + std::to_string(max_size) + ", but got " + | |||
| std::to_string(cur_shape.NumOfElements()); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| } | |||
| @@ -469,8 +473,8 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng | |||
| // KFloatList can only map to DE types: | |||
| // DE_FLOAT32 | |||
| if (current_col.Type() != DataType::DE_FLOAT32) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + | |||
| ", data type should be string, but got " + current_col.Type().ToString(); | |||
| std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + | |||
| " should be string, but got " + current_col.Type().ToString(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -507,9 +511,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dat | |||
| } else if (current_col.Type() == DataType::DE_INT8) { | |||
| RETURN_IF_NOT_OK(LoadIntList<int8_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else { | |||
| std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() + | |||
| ", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" + | |||
| ", but got " + current_col.Type().ToString(); | |||
| std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + | |||
| " should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8, but got " + | |||
| current_col.Type().ToString(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -522,8 +526,8 @@ template <typename T> | |||
| Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, | |||
| int32_t *num_elements, std::shared_ptr<Tensor> *tensor) { | |||
| if (!(current_col.Type().IsInt())) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + | |||
| ", data type should be int, but got " + current_col.Type().ToString(); | |||
| std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + " should be int, but got " + | |||
| current_col.Type().ToString(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -551,8 +555,8 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin | |||
| Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::string> columns_to_load) { | |||
| auto realpath = FileUtils::GetRealPath(tf_file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << tf_file; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + tf_file); | |||
| MS_LOG(ERROR) << "Invalid file path, " << tf_file << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + tf_file + " does not exist."); | |||
| } | |||
| std::ifstream reader; | |||
| @@ -572,7 +576,8 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri | |||
| dataengine::Example example; | |||
| if (!example.ParseFromString(serialized_example)) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse tfrecord file: " + serialized_example); | |||
| RETURN_STATUS_UNEXPECTED("Failed to parse tfrecord file: " + realpath.value() + | |||
| ", fields that failed to parse: " + serialized_example); | |||
| } | |||
| const dataengine::Features &example_features = example.features(); | |||
| @@ -587,7 +592,7 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri | |||
| for (const auto &curr_col_name : columns_to_load) { | |||
| auto it = feature_map.find(curr_col_name); | |||
| if (it == feature_map.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, failed to find column name: " + curr_col_name); | |||
| RETURN_STATUS_UNEXPECTED("Invalid columns_list, tfrecord file failed to find column name: " + curr_col_name); | |||
| } | |||
| std::string column_name = it->first; | |||
| @@ -609,10 +614,12 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri | |||
| break; | |||
| case dataengine::Feature::KindCase::KIND_NOT_SET: | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type of tf record file must be uint8, int64 or float32."); | |||
| RETURN_STATUS_UNEXPECTED("Unrecognized column type, the column type of " + column_name + | |||
| " should be uint8, int64 or float32, but got unrecognized column type."); | |||
| default: | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type of tf record file must be uint8, int64 or float32."); | |||
| RETURN_STATUS_UNEXPECTED("Unsupported column type, the column type of " + column_name + | |||
| " should be uint8, int64 or float32, but got unsupported column type."); | |||
| } | |||
| RETURN_IF_NOT_OK( | |||
| @@ -633,7 +640,9 @@ Status TFReaderOp::CountTotalRows(int64_t *out_total_rows, const std::vector<std | |||
| std::vector<std::future<int64_t>> async_results; | |||
| if (threads <= 0) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, the threads of TFReader should be greater than zero, but got zero."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid threads number, the threads number of TFReader should be greater than zero, but got " + | |||
| std::to_string(threads) + "."); | |||
| } | |||
| int64_t chunk_size = filenames.size() / threads; | |||
| int64_t remainder = filenames.size() % threads; | |||
| @@ -672,7 +681,7 @@ Status TFReaderOp::CountTotalRows(int64_t *out_total_rows, const std::vector<std | |||
| *out_total_rows = total_rows; | |||
| } catch (const std::exception &e) { | |||
| std::string err_msg = "Unexpected error occurred: "; | |||
| err_msg += e.what(); | |||
| err_msg += std::string(e.what()); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -684,7 +693,7 @@ int64_t TFReaderOp::CountTotalRowsSectioned(const std::vector<std::string> &file | |||
| for (int i = begin; i < end; i++) { | |||
| auto realpath = FileUtils::GetRealPath(filenames[i].data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filenames[i]; | |||
| MS_LOG(ERROR) << "Invalid file path, " << filenames[i] << " does not exist."; | |||
| continue; | |||
| } | |||
| @@ -107,7 +107,7 @@ int64_t USPSOp::CountRows(const std::string &data_file) { | |||
| std::ifstream data_file_reader; | |||
| data_file_reader.open(data_file, std::ios::in); | |||
| if (!data_file_reader.is_open()) { | |||
| MS_LOG(ERROR) << "Invalid file, failed to open file: " << data_file; | |||
| MS_LOG(ERROR) << "Invalid file, failed to open " << data_file << ": the file is permission denied."; | |||
| return 0; | |||
| } | |||
| @@ -124,7 +124,8 @@ int64_t USPSOp::CountRows(const std::string &data_file) { | |||
| Status USPSOp::GetFiles() { | |||
| auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(real_dataset_dir.has_value(), "Get real path failed: " + dataset_dir_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(real_dataset_dir.has_value(), | |||
| "Invalid file path, USPS dataset dir: " + dataset_dir_ + " does not exist."); | |||
| Path root_dir(real_dataset_dir.value()); | |||
| const Path train_file_name("usps"); | |||
| @@ -144,16 +145,18 @@ Status USPSOp::GetFiles() { | |||
| if (use_train) { | |||
| Path train_path = root_dir / train_file_name; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(train_path.Exists() && !train_path.IsDirectory(), | |||
| "Invalid file, failed to find USPS train data file: " + train_path.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| train_path.Exists() && !train_path.IsDirectory(), | |||
| "Invalid file, USPS dataset train file: " + train_path.ToString() + " does not exist or is a directory."); | |||
| data_files_list_.emplace_back(train_path.ToString()); | |||
| MS_LOG(INFO) << "USPS operator found train data file " << train_path.ToString() << "."; | |||
| } | |||
| if (use_test) { | |||
| Path test_path = root_dir / test_file_name; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(test_path.Exists() && !test_path.IsDirectory(), | |||
| "Invalid file, failed to find USPS test data file: " + test_path.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| test_path.Exists() && !test_path.IsDirectory(), | |||
| "Invalid file, USPS dataset test file: " + test_path.ToString() + " does not exist or is a directory."); | |||
| data_files_list_.emplace_back(test_path.ToString()); | |||
| MS_LOG(INFO) << "USPS operator found test data file " << test_path.ToString() << "."; | |||
| } | |||
| @@ -163,7 +166,8 @@ Status USPSOp::GetFiles() { | |||
| Status USPSOp::LoadFile(const std::string &data_file, int64_t start_offset, int64_t end_offset, int32_t worker_id) { | |||
| std::ifstream data_file_reader(data_file); | |||
| if (!data_file_reader.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + data_file); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open USPS dataset file: " + data_file + | |||
| ", the file is permission denied."); | |||
| } | |||
| int64_t rows_total = 0; | |||
| @@ -210,8 +214,8 @@ Status USPSOp::LoadTensor(std::string *line, TensorRow *trow) { | |||
| auto images_buffer = std::make_unique<unsigned char[]>(kUSPSImageSize); | |||
| auto labels_buffer = std::make_unique<uint32_t[]>(1); | |||
| if (images_buffer == nullptr || labels_buffer == nullptr) { | |||
| MS_LOG(ERROR) << "Failed to allocate memory for USPS buffer."; | |||
| RETURN_STATUS_UNEXPECTED("Failed to allocate memory for USPS buffer."); | |||
| MS_LOG(ERROR) << "[Internal ERROR] Failed to allocate memory for USPS buffer."; | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to allocate memory for USPS buffer."); | |||
| } | |||
| RETURN_IF_NOT_OK(this->ParseLine(line, images_buffer, labels_buffer)); | |||
| @@ -245,10 +249,12 @@ Status USPSOp::ParseLine(std::string *line, const std::unique_ptr<unsigned char[ | |||
| } else { | |||
| size_t split_pos = item.find(":"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(split_pos != std::string::npos, "Invalid data, USPS data file is corrupted."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(split_pos != std::string::npos, | |||
| "Invalid data, split character ':' is missing in USPS data file."); | |||
| // check pixel index | |||
| CHECK_FAIL_RETURN_UNEXPECTED(std::stoi(item.substr(0, split_pos)) == (split_num - 1), | |||
| "Invalid data, USPS data file is corrupted."); | |||
| "Invalid data, the character before ':' should be " + std::to_string(split_num - 1) + | |||
| ", but got " + item.substr(0, split_pos) + "."); | |||
| std::string pixel_str = item.substr(split_pos + 1, item.length() - split_pos); | |||
| // transform the real pixel value from [-1, 1] to the integers within [0, 255] | |||
| @@ -257,7 +263,10 @@ Status USPSOp::ParseLine(std::string *line, const std::unique_ptr<unsigned char[ | |||
| line->erase(0, pos + 1); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(split_num == (kUSPSImageSize + 1), "Invalid data, USPS data file is corrupted."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(split_num == (kUSPSImageSize + 1), | |||
| "Invalid data, the number of split characters ':' in USPS data file is corrupted, " | |||
| "should be " + | |||
| std::to_string(kUSPSImageSize + 1) + ", but got " + std::to_string(split_num) + "."); | |||
| return Status::OK(); | |||
| } | |||
| @@ -274,7 +283,7 @@ Status USPSOp::CalculateNumRowsPerShard() { | |||
| } | |||
| std::string file_list = ss.str(); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, USPSDataset API can't read the data file (interface mismatch or no data found). " | |||
| "Invalid data, 'USPSDataset' API can't read the data file (interface mismatch or no data found). " | |||
| "Check file: " + | |||
| file_list); | |||
| } | |||
| @@ -118,14 +118,15 @@ Status VOCOp::ParseImageIds() { | |||
| auto realpath = FileUtils::GetRealPath(image_sets_file.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << image_sets_file; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + image_sets_file); | |||
| MS_LOG(ERROR) << "Invalid file path, " << image_sets_file << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + image_sets_file + " does not exist."); | |||
| } | |||
| std::ifstream in_file; | |||
| in_file.open(realpath.value()); | |||
| if (in_file.fail()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + image_sets_file); | |||
| RETURN_STATUS_UNEXPECTED("Invalid ImageSets file, failed to open ImageSets file: " + image_sets_file + | |||
| ", the file is damaged or permission denied."); | |||
| } | |||
| std::string id; | |||
| while (getline(in_file, id)) { | |||
| @@ -187,28 +188,30 @@ Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float | |||
| std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " + | |||
| std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) + | |||
| ", " + std::to_string(static_cast<int>(ymax)) + "}"; | |||
| RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path); | |||
| RETURN_STATUS_UNEXPECTED("Invalid bndbox, the coordinate of bndbox in " + path + | |||
| " should be greater than 0, but got " + invalid_bbox); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status VOCOp::ParseAnnotationBbox(const std::string &path) { | |||
| if (!Path(path).Exists()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + path + " does not exist."); | |||
| } | |||
| Annotation annotation; | |||
| XMLDocument doc; | |||
| XMLError e = doc.LoadFile(common::SafeCStr(path)); | |||
| if (e != XMLError::XML_SUCCESS) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to load xml file: " + path); | |||
| RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load " + path + ": the xml file is damaged or incorrect format."); | |||
| } | |||
| XMLElement *root = doc.RootElement(); | |||
| if (root == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, failed to load root element for xml file."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load root element of " + path + | |||
| ": the format of xml file is incorrect."); | |||
| } | |||
| XMLElement *object = root->FirstChildElement("object"); | |||
| if (object == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, no object found in " + path); | |||
| RETURN_STATUS_UNEXPECTED("Invalid xml, the node of object is missing in " + path + "."); | |||
| } | |||
| while (object != nullptr) { | |||
| std::string label_name; | |||
| @@ -226,7 +229,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) { | |||
| ParseNodeValue(bbox_node, "ymax", &ymax); | |||
| RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path); | |||
| RETURN_STATUS_UNEXPECTED("Invalid xml, the node of bndbox is missing in " + path); | |||
| } | |||
| if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 && | |||
| @@ -254,7 +257,8 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co | |||
| if (decode_ == true) { | |||
| Status rc = Decode(*tensor, tensor); | |||
| if (rc.IsError()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path); | |||
| RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode " + path + | |||
| ": the image is damaged or permission denied."); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| @@ -280,7 +284,7 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) { | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| item.second.size() == 6, | |||
| "Invalid parameter, annotation only support 6 parameters, but got " + std::to_string(item.second.size())); | |||
| "[Internal ERROR] annotation only support 6 parameters, but got " + std::to_string(item.second.size())); | |||
| std::vector<float> tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]}; | |||
| bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end()); | |||
| @@ -328,8 +332,8 @@ Status VOCOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<in | |||
| RETURN_UNEXPECTED_IF_NULL(output_class_indexing); | |||
| if ((*output_class_indexing).empty()) { | |||
| if (task_type_ != TaskType::Detection) { | |||
| MS_LOG(ERROR) << "Invalid parameter, GetClassIndexing only valid in \"Detection\" task."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, GetClassIndexing only valid in \"Detection\" task."); | |||
| MS_LOG(ERROR) << "Invalid task, only 'Detection' task support GetClassIndexing."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid task, only 'Detection' task support GetClassIndexing."); | |||
| } | |||
| RETURN_IF_NOT_OK(ParseImageIds()); | |||
| RETURN_IF_NOT_OK(ParseAnnotationIds()); | |||
| @@ -49,12 +49,12 @@ YesNoOp::YesNoOp(const std::string &file_dir, int32_t num_workers, int32_t queue | |||
| Status YesNoOp::PrepareData() { | |||
| auto realpath = FileUtils::GetRealPath(dataset_dir_.data()); | |||
| if (!realpath.has_value()) { | |||
| MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_; | |||
| RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_); | |||
| MS_LOG(ERROR) << "Invalid file path, " << dataset_dir_ << " does not exist."; | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, " + dataset_dir_ + " does not exist."); | |||
| } | |||
| Path dir(realpath.value()); | |||
| if (dir.Exists() == false || dir.IsDirectory() == false) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, failed to open speech commands: " + dataset_dir_); | |||
| RETURN_STATUS_UNEXPECTED("Invalid directory, " + dataset_dir_ + " does not exist or is not a directory."); | |||
| } | |||
| std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir); | |||
| RETURN_UNEXPECTED_IF_NULL(dir_itr); | |||
| @@ -101,8 +101,9 @@ Status YesNoOp::Split(const std::string &line, std::vector<int32_t> *split_num) | |||
| split_num->emplace_back(stoi(split[i])); | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Converting char to int confront with an error in function stoi()."; | |||
| RETURN_STATUS_UNEXPECTED("Converting char to int confront with an error in function stoi()."); | |||
| MS_LOG(ERROR) << "[Internal ERROR] Converting char to int confront with an error in function stoi: " << e.what(); | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Converting char to int confront with an error in function stoi: " + | |||
| std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -38,7 +38,7 @@ void TakeOp::Print(std::ostream &out, bool show_all) const { | |||
| } | |||
| } | |||
| Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } | |||
| Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] TakeOp is an inlined operator."); } | |||
| Status TakeOp::GetNextRow(TensorRow *row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| @@ -101,9 +101,7 @@ Status ZipOp::ComputeColMap() { | |||
| int32_t old_id = pair.second; | |||
| // check if name already exists in column name descriptor | |||
| if (column_name_id_map_.count(name) == 1) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, key: " + name + | |||
| " already exists when zipping datasets. Check for duplicate key names in different " | |||
| "dataset."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, duplicate column " + name + " already exists when zipping datasets."); | |||
| } | |||
| column_name_id_map_[name] = old_id + colsCurrent; | |||
| } | |||
| @@ -115,7 +113,7 @@ Status ZipOp::ComputeColMap() { | |||
| return Status::OK(); | |||
| } | |||
| Status ZipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } | |||
| Status ZipOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ZipOp is an inlined operator."); } | |||
| Status ZipOp::GetNextRow(TensorRow *row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| @@ -210,7 +210,7 @@ def test_cifar10_exception(): | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=256) | |||
| error_msg_7 = "no .bin files found" | |||
| error_msg_7 = r"cifar\(.bin\) files are missing" | |||
| with pytest.raises(RuntimeError, match=error_msg_7): | |||
| ds1 = ds.Cifar10Dataset(NO_BIN_DIR) | |||
| for _ in ds1.__iter__(): | |||
| @@ -360,7 +360,7 @@ def test_cifar100_exception(): | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=256) | |||
| error_msg_7 = "no .bin files found" | |||
| error_msg_7 = r"cifar\(.bin\) files are missing" | |||
| with pytest.raises(RuntimeError, match=error_msg_7): | |||
| ds1 = ds.Cifar100Dataset(NO_BIN_DIR) | |||
| for _ in ds1.__iter__(): | |||
| @@ -300,7 +300,7 @@ def test_coco_case_exception(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "required node not found in JSON" in str(e) | |||
| assert "the attribute of 'images' is missing" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_CATEGORY_ID_FILE, task="Detection") | |||
| @@ -308,7 +308,7 @@ def test_coco_case_exception(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "category_id can't find in categories" in str(e) | |||
| assert "the attribute of 'category_id': 7 is missing" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection") | |||
| @@ -316,7 +316,7 @@ def test_coco_case_exception(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "failed to open JSON file" in str(e) | |||
| assert "Invalid annotation file, Coco Dataset annotation file:" in str(e) | |||
| try: | |||
| sampler = ds.PKSampler(3) | |||
| @@ -239,7 +239,7 @@ def test_csv_dataset_exception(): | |||
| with pytest.raises(Exception) as err: | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| pass | |||
| assert "failed to parse file" in str(err.value) | |||
| assert "failed to parse" in str(err.value) | |||
| TEST_FILE1 = '../data/dataset/testCSV/quoted.csv' | |||
| def exception_func(item): | |||
| @@ -359,7 +359,6 @@ def test_emnist_exception(): | |||
| with pytest.raises(RuntimeError, match=error_msg_8): | |||
| data = ds.EMnistDataset(DATA_DIR, "mnist", "train") | |||
| data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| with pytest.raises(RuntimeError, match=error_msg_8): | |||
| @@ -638,7 +638,7 @@ def test_generator_error_2(): | |||
| for _ in data1: | |||
| pass | |||
| print("========", str(info.value)) | |||
| assert "Generator should return a tuple of NumPy arrays" in str(info.value) | |||
| assert "'GeneratorDataset' should return a tuple of NumPy arrays" in str(info.value) | |||
| def test_generator_error_3(): | |||
| @@ -663,7 +663,8 @@ def test_generator_error_4(): | |||
| for _ in data1: | |||
| pass | |||
| assert "Unexpected error. Result of a tensorOp doesn't match output column names" in str(info.value) | |||
| assert "the number of columns returned in 'map' operations should match the number of 'output_columns'"\ | |||
| in str(info.value) | |||
| def test_generator_sequential_sampler(): | |||
| @@ -167,7 +167,7 @@ def test_manifest_dataset_exception(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "Invalid data, 'source' is not found in Manifest file" in str(e) | |||
| assert "Invalid manifest file, 'source' is missing in" in str(e) | |||
| NO_USAGE_DATA_FILE = "../data/dataset/testManifestData/invalidNoUsage.manifest" | |||
| try: | |||
| @@ -176,7 +176,7 @@ def test_manifest_dataset_exception(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "Invalid data, 'usage' is not found in Manifest file" in str(e) | |||
| assert "Invalid manifest file, 'usage' is missing in" in str(e) | |||
| if __name__ == '__main__': | |||
| @@ -307,7 +307,7 @@ def test_tf_wrong_schema(): | |||
| pass | |||
| except RuntimeError as e: | |||
| exception_occurred = True | |||
| assert "shape in schema's column 'image' is incorrect" in str(e) | |||
| assert "Data dimensions of 'image' do not match" in str(e) | |||
| assert exception_occurred, "test_tf_wrong_schema failed." | |||
| @@ -318,7 +318,7 @@ def test_tfrecord_invalid_columns(): | |||
| data = ds.TFRecordDataset(FILES, columns_list=invalid_columns_list) | |||
| with pytest.raises(RuntimeError) as info: | |||
| _ = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__() | |||
| assert "Invalid data, failed to find column name: not_exist" in str(info.value) | |||
| assert "Invalid columns_list, tfrecord file failed to find column name: not_exist" in str(info.value) | |||
| def test_tfrecord_exception(): | |||
| @@ -214,12 +214,12 @@ def test_usps_exception(): | |||
| for _ in test_data.__iter__(): | |||
| pass | |||
| error_msg_9 = "failed to find USPS train data file" | |||
| error_msg_9 = "usps does not exist or is a directory" | |||
| with pytest.raises(RuntimeError, match=error_msg_9): | |||
| train_data = ds.USPSDataset(WRONG_DIR, "train") | |||
| for _ in train_data.__iter__(): | |||
| pass | |||
| error_msg_10 = "failed to find USPS test data file" | |||
| error_msg_10 = "usps.t does not exist or is a directory" | |||
| with pytest.raises(RuntimeError, match=error_msg_10): | |||
| test_data = ds.USPSDataset(WRONG_DIR, "test") | |||
| for _ in test_data.__iter__(): | |||
| @@ -240,7 +240,7 @@ def test_voc_exception(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "Invalid bndbox: {321, 121, 421, 120}" in str(e) | |||
| assert "should be greater than 0, but got {321, 121, 421, 120}" in str(e) | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| @@ -68,6 +68,9 @@ def test_auto_offload(): | |||
| dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_array_equal(img_0, img_1) | |||
| # Need to turn off here or subsequent test cases will fail. | |||
| ds.config.set_auto_offload(False) | |||
| def test_offload_concat_dataset_1(): | |||
| """ | |||
| @@ -369,9 +369,12 @@ def test_multi_col_map(): | |||
| # test exceptions | |||
| assert "output_columns with value 233 is not of type" in batch_map_config(2, 2, split_col, ["col2"], 233) | |||
| assert "column_order with value 233 is not of type" in batch_map_config(2, 2, split_col, ["col2"], ["col1"], 233) | |||
| assert "output_columns in batch is not set correctly" in batch_map_config(2, 2, split_col, ["col2"], ["col1"]) | |||
| assert "Incorrect number of columns" in batch_map_config(2, 2, split_col, ["col2"], ["col3", "col4", "col5"]) | |||
| assert "col-1 doesn't exist" in batch_map_config(2, 2, split_col, ["col-1"], ["col_x", "col_y"]) | |||
| assert "columns that are not involved in 'per_batch_map' should not be in output_columns"\ | |||
| in batch_map_config(2, 2, split_col, ["col2"], ["col1"]) | |||
| assert "the number of columns returned in 'per_batch_map' function should be 3"\ | |||
| in batch_map_config(2, 2, split_col, ["col2"], ["col3", "col4", "col5"]) | |||
| assert "'col-1' of 'input_columns' doesn't exist"\ | |||
| in batch_map_config(2, 2, split_col, ["col-1"], ["col_x", "col_y"]) | |||
| def test_exceptions_2(): | |||
| @@ -379,16 +382,16 @@ def test_exceptions_2(): | |||
| for i in range(num): | |||
| yield (np.array([i]),) | |||
| def simple_copy(colList, batchInfo): | |||
| return ([np.copy(arr) for arr in colList],) | |||
| def simple_copy(col_list, batch_info): | |||
| return ([np.copy(arr) for arr in col_list],) | |||
| def concat_copy(colList, batchInfo): | |||
| def concat_copy(col_list, batch_info): | |||
| # this will duplicate the number of rows returned, which would be wrong! | |||
| return ([np.copy(arr) for arr in colList] * 2,) | |||
| return ([np.copy(arr) for arr in col_list] * 2,) | |||
| def shrink_copy(colList, batchInfo): | |||
| def shrink_copy(col_list, batch_info): | |||
| # this will duplicate the number of rows returned, which would be wrong! | |||
| return ([np.copy(arr) for arr in colList][0:int(len(colList) / 2)],) | |||
| return ([np.copy(arr) for arr in col_list][0:int(len(col_list) / 2)],) | |||
| def test_exceptions_config(gen_num, batch_size, in_cols, per_batch_map): | |||
| data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).batch(batch_size, input_columns=in_cols, | |||
| @@ -401,9 +404,9 @@ def test_exceptions_2(): | |||
| return str(e) | |||
| # test exception where column name is incorrect | |||
| assert "col:num1 doesn't exist" in test_exceptions_config(4, 2, ["num1"], simple_copy) | |||
| assert "expects: 2 rows returned from per_batch_map, got: 4" in test_exceptions_config(4, 2, ["num"], concat_copy) | |||
| assert "expects: 4 rows returned from per_batch_map, got: 2" in test_exceptions_config(4, 4, ["num"], shrink_copy) | |||
| assert "'num1' of 'input_columns' doesn't exist" in test_exceptions_config(4, 2, ["num1"], simple_copy) | |||
| assert "expects: 2 rows returned from 'per_batch_map', got: 4" in test_exceptions_config(4, 2, ["num"], concat_copy) | |||
| assert "expects: 4 rows returned from 'per_batch_map', got: 2" in test_exceptions_config(4, 4, ["num"], shrink_copy) | |||
| if __name__ == '__main__': | |||