| @@ -14,22 +14,22 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include <utility> | |||
| #include "minddata/dataset/core/tensor_row.h" | |||
| #include <utility> | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| TensorRow::TensorRow() noexcept : id_(kDefaultRowId) {} | |||
| TensorRow::TensorRow() noexcept : id_(kDefaultRowId), path_({}) {} | |||
| TensorRow::TensorRow(size_type n, TensorRow::value_type t) noexcept : id_(kDefaultRowId), row_(n, t) {} | |||
| TensorRow::TensorRow(size_type n, TensorRow::value_type t) noexcept : id_(kDefaultRowId), path_({}), row_(n, t) {} | |||
| TensorRow::TensorRow(const TensorRow::vector_type &v) : id_(kDefaultRowId), row_(v) {} | |||
| TensorRow::TensorRow(const TensorRow::vector_type &v) : id_(kDefaultRowId), path_({}), row_(v) {} | |||
| TensorRow::TensorRow(row_id_type id, const std::initializer_list<value_type> &lst) : id_(id), row_(lst) {} | |||
| TensorRow::TensorRow(row_id_type id, const std::initializer_list<value_type> &lst) : id_(id), path_({}), row_(lst) {} | |||
| TensorRow::TensorRow(const TensorRow &tr) : id_(tr.id_), row_(tr.row_) {} | |||
| TensorRow::TensorRow(const TensorRow &tr) : id_(tr.id_), path_(tr.path_), row_(tr.row_) {} | |||
| TensorRow &TensorRow::operator=(const TensorRow &tr) { | |||
| if (this == &tr) { | |||
| @@ -37,6 +37,7 @@ TensorRow &TensorRow::operator=(const TensorRow &tr) { | |||
| } | |||
| row_ = tr.row_; | |||
| id_ = tr.id_; | |||
| path_ = tr.path_; | |||
| return *this; | |||
| } | |||
| @@ -45,13 +46,14 @@ TensorRow &TensorRow::operator=(const std::initializer_list<TensorRow::value_typ | |||
| return *this; | |||
| } | |||
| TensorRow::TensorRow(TensorRow::vector_type &&v) noexcept : id_(kDefaultRowId), row_(std::move(v)) {} | |||
| TensorRow::TensorRow(TensorRow::vector_type &&v) noexcept : id_(kDefaultRowId), path_({}), row_(std::move(v)) {} | |||
| TensorRow::TensorRow(row_id_type id, std::initializer_list<value_type> &&lst) noexcept | |||
| : id_(id), row_(std::move(lst)) {} | |||
| : id_(id), path_({}), row_(std::move(lst)) {} | |||
| TensorRow::TensorRow(TensorRow &&tr) noexcept { | |||
| id_ = tr.id_; | |||
| path_ = std::move(tr.path_); | |||
| row_ = std::move(tr.row_); | |||
| } | |||
| @@ -62,6 +64,7 @@ TensorRow &TensorRow::operator=(TensorRow &&tr) noexcept { | |||
| row_ = std::move(tr.row_); | |||
| id_ = tr.id_; | |||
| tr.id_ = kDefaultRowId; | |||
| path_ = std::move(tr.path_); | |||
| return *this; | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include <deque> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/tensor.h" | |||
| @@ -164,6 +165,10 @@ class TensorRow { | |||
| void setId(row_id_type id) { id_ = id; } | |||
| std::vector<std::string> getPath() const { return path_; } | |||
| void setPath(std::vector<std::string> path) { path_ = path; } | |||
| const vector_type &getRow() const { return row_; } | |||
| int64_t SizeInBytes() const { | |||
| @@ -219,6 +224,7 @@ class TensorRow { | |||
| protected: | |||
| row_id_type id_; | |||
| std::vector<std::string> path_; | |||
| std::vector<std::shared_ptr<Tensor>> row_; | |||
| }; | |||
| } // namespace dataset | |||
| @@ -41,11 +41,31 @@ Status CpuMapJob::Run(std::vector<TensorRow> in, std::vector<TensorRow> *out) { | |||
| // Call compute function for cpu | |||
| Status rc = ops_[i]->Compute(input_row, &result_row); | |||
| if (rc.IsError()) { | |||
| if (input_row.getId() >= 0) { | |||
| MS_LOG(ERROR) << "The TensorRow with id=" + std::to_string(input_row.getId()) + " failed on " + | |||
| std::to_string(i) + " TensorOp in Map: " + ops_[i]->Name(); | |||
| std::string err_msg = ""; | |||
| std::string op_name = ops_[i]->Name(); | |||
| std::string abbr_op_name = op_name.substr(0, op_name.length() - 2); | |||
| err_msg += "map operation: [" + abbr_op_name + "] failed. "; | |||
| if (input_row.getPath().size() > 0 && !input_row.getPath()[0].empty()) { | |||
| err_msg += "The corresponding data files: " + input_row.getPath()[0]; | |||
| if (input_row.getPath().size() > 1) { | |||
| std::set<std::string> path_set; | |||
| path_set.insert(input_row.getPath()[0]); | |||
| for (auto j = 1; j < input_row.getPath().size(); j++) { | |||
| if (!input_row.getPath()[j].empty() && path_set.find(input_row.getPath()[j]) == path_set.end()) { | |||
| err_msg += ", " + input_row.getPath()[j]; | |||
| path_set.insert(input_row.getPath()[j]); | |||
| } | |||
| } | |||
| } | |||
| err_msg += ". "; | |||
| } | |||
| return rc; | |||
| std::string tensor_err_msg = rc.GetErrDescription(); | |||
| if (rc.GetLineOfCode() < 0) { | |||
| err_msg += "Error description:\n"; | |||
| } | |||
| err_msg += tensor_err_msg; | |||
| rc.SetErrDescription(err_msg); | |||
| RETURN_IF_NOT_OK(rc); | |||
| } | |||
| // Assign result_row to to_process for the next TensorOp processing, except for the last TensorOp in the list. | |||
| @@ -288,6 +288,13 @@ Status MapOp::WorkerCompute(DataBuffer *in_buffer, TensorQTable *new_tensor_tabl | |||
| (void)std::transform(to_process_indices_.begin(), to_process_indices_.end(), std::back_inserter(to_process), | |||
| [&cur_row](const auto &it) { return std::move(cur_row[it]); }); | |||
| to_process.setId(cur_row.getId()); | |||
| std::vector<std::string> cur_row_path = cur_row.getPath(); | |||
| if (cur_row_path.size() > 0) { | |||
| std::vector<std::string> to_process_path; | |||
| (void)std::transform(to_process_indices_.begin(), to_process_indices_.end(), std::back_inserter(to_process_path), | |||
| [&cur_row_path](const auto &it) { return cur_row_path[it]; }); | |||
| to_process.setPath(to_process_path); | |||
| } | |||
| job_input_table.push_back(std::move(to_process)); | |||
| original_table.push_back(std::move(cur_row)); | |||
| } | |||
| @@ -88,6 +88,9 @@ Status ProjectOp::Project(std::unique_ptr<DataBuffer> *data_buffer) { | |||
| TensorRow new_row; | |||
| (void)std::transform(projected_column_indices_.begin(), projected_column_indices_.end(), | |||
| std::back_inserter(new_row), [¤t_row](uint32_t x) { return current_row[x]; }); | |||
| // Now if columns changed after map, we don't know which column we should keep, | |||
| // so temporarily we don't support print file_path after ProjectOp. | |||
| new_row.setPath({}); | |||
| new_tensor_table->push_back(new_row); | |||
| } | |||
| (*data_buffer)->set_tensor_table(std::move(new_tensor_table)); | |||
| @@ -95,7 +95,7 @@ AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir | |||
| bool StrComp(const std::string &a, const std::string &b) { | |||
| // returns 1 if string "a" represent a numeric value less than string "b" | |||
| // the following will always return name, provided there is only one "." character in name | |||
| // "." character is guaranteed to exist since the extension is checked befor this function call. | |||
| // "." character is guaranteed to exist since the extension is checked before this function call. | |||
| int64_t value_a = std::stoi(a.substr(1, a.find(".")).c_str()); | |||
| int64_t value_b = std::stoi(b.substr(1, b.find(".")).c_str()); | |||
| return value_a < value_b; | |||
| @@ -441,7 +441,7 @@ Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, | |||
| // Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer | |||
| // possible optimization: the helper functions of LoadTensorRow should be optimized | |||
| // to take a reference to a column descriptor? | |||
| // the design of this class is to make the code more readable, forgoing minor perfomance gain like | |||
| // the design of this class is to make the code more readable, forgoing minor performance gain like | |||
| // getting rid of duplicated checks | |||
| Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, TensorRow *row) { | |||
| // testing here is to just print out file path | |||
| @@ -530,6 +530,8 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, Tenso | |||
| } | |||
| } | |||
| file_handle.close(); | |||
| std::vector<std::string> path(row->size(), folder_path_ + file); | |||
| row->setPath(path); | |||
| return Status::OK(); | |||
| } | |||
| @@ -87,6 +87,7 @@ CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::stri | |||
| extensions_(exts), | |||
| data_schema_(std::move(schema)), | |||
| num_rows_in_attr_file_(0), | |||
| attr_file_(""), | |||
| usage_(usage) { | |||
| attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size); | |||
| io_block_queues_.Init(num_workers_, queue_size); | |||
| @@ -120,6 +121,7 @@ Status CelebAOp::ParseAttrFile() { | |||
| "Invalid file, failed to open Celeba attr file: " + attr_file_name); | |||
| } | |||
| attr_file_ = (folder_path / "list_attr_celeba.txt").toString(); | |||
| const auto PushBackToQueue = [this](std::vector<std::string> &vec, std::ifstream &attr_file, | |||
| std::ifstream &partition_file) { | |||
| Status s = attr_info_queue_->EmplaceBack(vec); | |||
| @@ -409,6 +411,8 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string, | |||
| label->Squeeze(); | |||
| (*row) = TensorRow(row_id, {std::move(image), std::move(label)}); | |||
| // Add file path info | |||
| row->setPath({image_path.toString(), attr_file_}); | |||
| return Status::OK(); | |||
| } | |||
| @@ -232,6 +232,7 @@ class CelebAOp : public ParallelOp, RandomAccessOp { | |||
| std::vector<std::pair<std::string, std::vector<int32_t>>> image_labels_vec_; | |||
| std::string usage_; | |||
| std::ifstream partition_file_; | |||
| std::string attr_file_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -216,14 +216,19 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) { | |||
| std::shared_ptr<Tensor> fine_label; | |||
| std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first; | |||
| std::shared_ptr<Tensor> copy_image; | |||
| uint64_t path_index = std::ceil(index / kCifarBlockImageNum); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromTensor(ori_image, ©_image)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar(cifar_image_label_pairs_[index].second[0], &label)); | |||
| if (cifar_image_label_pairs_[index].second.size() > 1) { | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar(cifar_image_label_pairs_[index].second[1], &fine_label)); | |||
| (*trow) = TensorRow(index, {copy_image, std::move(label), std::move(fine_label)}); | |||
| // Add file path info | |||
| trow->setPath({path_record_[path_index], path_record_[path_index], path_record_[path_index]}); | |||
| } else { | |||
| (*trow) = TensorRow(index, {copy_image, std::move(label)}); | |||
| // Add file path info | |||
| trow->setPath({path_record_[path_index], path_record_[path_index]}); | |||
| } | |||
| return Status::OK(); | |||
| @@ -310,6 +315,8 @@ Status CifarOp::ReadCifar10BlockData() { | |||
| (void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar10 file: " + file); | |||
| (void)cifar_raw_data_block_->EmplaceBack(image_data); | |||
| // Add file path info | |||
| path_record_.push_back(file); | |||
| } | |||
| in.close(); | |||
| } | |||
| @@ -350,6 +357,8 @@ Status CifarOp::ReadCifar100BlockData() { | |||
| (void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar100 file: " + file); | |||
| (void)cifar_raw_data_block_->EmplaceBack(image_data); | |||
| // Add file path info | |||
| path_record_.push_back(file); | |||
| } | |||
| in.close(); | |||
| } | |||
| @@ -219,7 +219,7 @@ class CifarOp : public ParallelOp, public RandomAccessOp { | |||
| // @return | |||
| Status ParseCifarData(); | |||
| // Method derived from RandomAccess Op, enable Sampler to get all ids for each calss | |||
| // Method derived from RandomAccess Op, enable Sampler to get all ids for each class | |||
| // @param (std::map<uint64_t, std::vector<uint64_t >> * map - key label, val all ids for this class | |||
| // @return Status The status code returned | |||
| Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const override; | |||
| @@ -238,6 +238,7 @@ class CifarOp : public ParallelOp, public RandomAccessOp { | |||
| const std::string usage_; // can only be either "train" or "test" | |||
| std::unique_ptr<Queue<std::vector<unsigned char>>> cifar_raw_data_block_; | |||
| std::vector<std::string> cifar_files_; | |||
| std::vector<std::string> path_record_; | |||
| std::vector<std::pair<std::shared_ptr<Tensor>, std::vector<uint32_t>>> cifar_image_label_pairs_; | |||
| }; | |||
| } // namespace dataset | |||
| @@ -202,6 +202,9 @@ Status ClueOp::LoadFile(const std::string &file, const int64_t start_offset, con | |||
| } | |||
| int cols_count = cols_to_keyword_.size(); | |||
| TensorRow tRow(cols_count, nullptr); | |||
| // Add file path info | |||
| std::vector<std::string> file_path(cols_count, file); | |||
| tRow.setPath(file_path); | |||
| tensor_table->push_back(std::move(tRow)); | |||
| int cout = 0; | |||
| for (auto &p : cols_to_keyword_) { | |||
| @@ -97,7 +97,7 @@ Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) { | |||
| ColDescriptor(std::string(kJsonAnnoArea), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| break; | |||
| default: | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff, Keypoint or Panoptic."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic."); | |||
| } | |||
| *ptr = std::make_shared<CocoOp>(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_, | |||
| builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_, | |||
| @@ -263,7 +263,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Te | |||
| } else if (task_type_ == TaskType::Panoptic) { | |||
| RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff or Panoptic."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff or Panoptic."); | |||
| } | |||
| return Status::OK(); | |||
| @@ -302,6 +302,8 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima | |||
| Tensor::CreateFromVector(iscrowd_row, TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), &iscrowd)); | |||
| (*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)}); | |||
| std::string image_full_path = image_folder_path_ + std::string("/") + image_id; | |||
| trow->setPath({image_full_path, annotation_path_, annotation_path_, annotation_path_}); | |||
| return Status::OK(); | |||
| } | |||
| @@ -324,6 +326,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_ | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(item_queue, TensorShape(bbox_dim), &item)); | |||
| (*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(item)}); | |||
| std::string image_full_path = image_folder_path_ + std::string("/") + image_id; | |||
| trow->setPath({image_full_path, annotation_path_, annotation_path_}); | |||
| return Status::OK(); | |||
| } | |||
| @@ -332,7 +336,7 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_ | |||
| // column ["bbox"] with datatype=float32 | |||
| // column ["category_id"] with datatype=uint32 | |||
| // column ["iscrowd"] with datatype=uint32 | |||
| // column ["area"] with datattype=uint32 | |||
| // column ["area"] with datatype=uint32 | |||
| Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image, | |||
| std::shared_ptr<Tensor> coordinate, TensorRow *trow) { | |||
| std::shared_ptr<Tensor> category_id, iscrowd, area; | |||
| @@ -365,6 +369,8 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, | |||
| (*trow) = TensorRow( | |||
| row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)}); | |||
| std::string image_full_path = image_folder_path_ + std::string("/") + image_id; | |||
| trow->setPath({image_full_path, annotation_path_, annotation_path_, annotation_path_, annotation_path_}); | |||
| return Status::OK(); | |||
| } | |||
| @@ -461,7 +467,7 @@ Status CocoOp::ParseAnnotationIds() { | |||
| RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id)); | |||
| break; | |||
| default: | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff, Keypoint or Panoptic."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic."); | |||
| } | |||
| } | |||
| for (auto img : image_que) { | |||
| @@ -110,12 +110,14 @@ Status CsvOp::Init() { | |||
| } | |||
| CsvOp::CsvParser::CsvParser(int32_t worker_id, std::shared_ptr<JaggedConnector> connector, int64_t rows_per_buffer, | |||
| char field_delim, std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default) | |||
| char field_delim, std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, | |||
| std::string file_path) | |||
| : worker_id_(worker_id), | |||
| buffer_connector_(connector), | |||
| csv_rows_per_buffer_(rows_per_buffer), | |||
| csv_field_delim_(field_delim), | |||
| column_default_(column_default), | |||
| file_path_(file_path), | |||
| cur_state_(START_OF_FILE), | |||
| pos_(0), | |||
| cur_row_(0), | |||
| @@ -358,8 +360,11 @@ Status CsvOp::CsvParser::InitCsvParser() { | |||
| {{State::START_OF_FILE, Message::MS_NORMAL}, | |||
| {State::UNQUOTE, | |||
| [this](CsvParser &, char c) -> int { | |||
| TensorRow row(column_default_.size(), nullptr); | |||
| std::vector<std::string> file_path(column_default_.size(), file_path_); | |||
| row.setPath(file_path); | |||
| this->tensor_table_ = std::make_unique<TensorQTable>(); | |||
| this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr)); | |||
| this->tensor_table_->push_back(row); | |||
| this->str_buf_[0] = c; | |||
| this->pos_ = 1; | |||
| return 0; | |||
| @@ -367,15 +372,21 @@ Status CsvOp::CsvParser::InitCsvParser() { | |||
| {{State::START_OF_FILE, Message::MS_DELIM}, | |||
| {State::DELIM, | |||
| [this](CsvParser &, char c) -> int { | |||
| TensorRow row(column_default_.size(), nullptr); | |||
| std::vector<std::string> file_path(column_default_.size(), file_path_); | |||
| row.setPath(file_path); | |||
| this->tensor_table_ = std::make_unique<TensorQTable>(); | |||
| this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr)); | |||
| this->tensor_table_->push_back(row); | |||
| return this->PutRecord(c); | |||
| }}}, | |||
| {{State::START_OF_FILE, Message::MS_QUOTE}, | |||
| {State::QUOTE, | |||
| [this](CsvParser &, char c) -> int { | |||
| TensorRow row(column_default_.size(), nullptr); | |||
| std::vector<std::string> file_path(column_default_.size(), file_path_); | |||
| row.setPath(file_path); | |||
| this->tensor_table_ = std::make_unique<TensorQTable>(); | |||
| this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr)); | |||
| this->tensor_table_->push_back(row); | |||
| this->pos_ = 0; | |||
| return 0; | |||
| }}}, | |||
| @@ -458,7 +469,10 @@ Status CsvOp::CsvParser::InitCsvParser() { | |||
| {State::UNQUOTE, | |||
| [this](CsvParser &, char c) -> int { | |||
| if (this->total_rows_ > this->start_offset_ && this->total_rows_ <= this->end_offset_) { | |||
| this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr)); | |||
| TensorRow row(column_default_.size(), nullptr); | |||
| std::vector<std::string> file_path(column_default_.size(), file_path_); | |||
| row.setPath(file_path); | |||
| this->tensor_table_->push_back(row); | |||
| } | |||
| this->str_buf_[0] = c; | |||
| this->pos_ = 1; | |||
| @@ -468,7 +482,10 @@ Status CsvOp::CsvParser::InitCsvParser() { | |||
| {State::DELIM, | |||
| [this](CsvParser &, char c) -> int { | |||
| if (this->total_rows_ > this->start_offset_ && this->total_rows_ <= this->end_offset_) { | |||
| this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr)); | |||
| TensorRow row(column_default_.size(), nullptr); | |||
| std::vector<std::string> file_path(column_default_.size(), file_path_); | |||
| row.setPath(file_path); | |||
| this->tensor_table_->push_back(row); | |||
| } | |||
| return this->PutRecord(c); | |||
| }}}, | |||
| @@ -476,7 +493,10 @@ Status CsvOp::CsvParser::InitCsvParser() { | |||
| {State::QUOTE, | |||
| [this](CsvParser &, char c) -> int { | |||
| if (this->total_rows_ > this->start_offset_ && this->total_rows_ <= this->end_offset_) { | |||
| this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr)); | |||
| TensorRow row(column_default_.size(), nullptr); | |||
| std::vector<std::string> file_path(column_default_.size(), file_path_); | |||
| row.setPath(file_path); | |||
| this->tensor_table_->push_back(row); | |||
| } | |||
| return 0; | |||
| }}}, | |||
| @@ -497,7 +517,7 @@ Status CsvOp::Reset() { | |||
| Status CsvOp::LoadFile(const std::string &file, const int64_t start_offset, const int64_t end_offset, | |||
| const int32_t worker_id) { | |||
| CsvParser csv_parser(worker_id, jagged_buffer_connector_, rows_per_buffer_, field_delim_, column_default_list_); | |||
| CsvParser csv_parser(worker_id, jagged_buffer_connector_, rows_per_buffer_, field_delim_, column_default_list_, file); | |||
| csv_parser.SetStartOffset(start_offset); | |||
| csv_parser.SetEndOffset(end_offset); | |||
| std::ifstream ifs; | |||
| @@ -512,7 +532,7 @@ Status CsvOp::LoadFile(const std::string &file, const int64_t start_offset, cons | |||
| csv_parser.Reset(); | |||
| try { | |||
| while (ifs.good()) { | |||
| // when ifstream reachs the end of file, the function get() return std::char_traits<char>::eof() | |||
| // when ifstream reaches the end of file, the function get() return std::char_traits<char>::eof() | |||
| // which is a 32-bit -1, it's not equal to the 8-bit -1 on Euler OS. So instead of char, we use | |||
| // int to receive its return value. | |||
| int chr = ifs.get(); | |||
| @@ -799,7 +819,7 @@ Status CsvOp::CalculateNumRowsPerShard() { | |||
| } | |||
| int64_t CsvOp::CountTotalRows(const std::string &file) { | |||
| CsvParser csv_parser(0, jagged_buffer_connector_, rows_per_buffer_, field_delim_, column_default_list_); | |||
| CsvParser csv_parser(0, jagged_buffer_connector_, rows_per_buffer_, field_delim_, column_default_list_, file); | |||
| std::ifstream ifs; | |||
| ifs.open(file, std::ifstream::in); | |||
| if (!ifs.is_open()) { | |||
| @@ -64,7 +64,7 @@ class CsvOp : public ParallelOp { | |||
| CsvParser() = delete; | |||
| CsvParser(int32_t worker_id, std::shared_ptr<JaggedConnector> connector, int64_t rows_per_buffer, char field_delim, | |||
| std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default); | |||
| std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, std::string file_path); | |||
| ~CsvParser() = default; | |||
| @@ -142,6 +142,7 @@ class CsvOp : public ParallelOp { | |||
| std::unique_ptr<TensorQTable> tensor_table_; | |||
| std::unique_ptr<DataBuffer> cur_buffer_; | |||
| std::string err_message_; | |||
| std::string file_path_; | |||
| }; | |||
| class Builder { | |||
| @@ -230,6 +230,7 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr, | |||
| } | |||
| } | |||
| (*trow) = TensorRow(row_id, {std::move(image), std::move(label)}); | |||
| trow->setPath({folder_path_ + (pairPtr->first), std::string("")}); | |||
| return Status::OK(); | |||
| } | |||
| @@ -219,6 +219,7 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string | |||
| } | |||
| } | |||
| (*trow) = TensorRow(row_id, {std::move(image), std::move(label)}); | |||
| trow->setPath({data.first, file_}); | |||
| return Status::OK(); | |||
| } | |||
| @@ -284,6 +284,8 @@ Status MindRecordOp::GetBufferFromReader(std::unique_ptr<DataBuffer> *fetched_bu | |||
| if (task_type == mindrecord::TaskType::kPaddedTask) { | |||
| TensorRow tensor_row; | |||
| RETURN_IF_NOT_OK(LoadTensorRow(&tensor_row, {}, mindrecord::json(), task_type)); | |||
| std::vector<std::string> file_path(tensor_row.size(), dataset_file_[0]); | |||
| tensor_row.setPath(file_path); | |||
| tensor_table->push_back(std::move(tensor_row)); | |||
| } | |||
| if (tupled_buffer.empty()) break; | |||
| @@ -293,6 +295,8 @@ Status MindRecordOp::GetBufferFromReader(std::unique_ptr<DataBuffer> *fetched_bu | |||
| mindrecord::json columns_json = std::get<1>(tupled_row); | |||
| TensorRow tensor_row; | |||
| RETURN_IF_NOT_OK(LoadTensorRow(&tensor_row, columns_blob, columns_json, task_type)); | |||
| std::vector<std::string> file_path(tensor_row.size(), dataset_file_[0]); | |||
| tensor_row.setPath(file_path); | |||
| tensor_table->push_back(std::move(tensor_row)); | |||
| } | |||
| } | |||
| @@ -82,6 +82,8 @@ MnistOp::MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per | |||
| row_cnt_(0), | |||
| folder_path_(folder_path), | |||
| rows_per_buffer_(rows_per_buffer), | |||
| image_path_({}), | |||
| label_path_({}), | |||
| data_schema_(std::move(data_schema)) { | |||
| io_block_queues_.Init(num_workers, queue_size); | |||
| } | |||
| @@ -191,6 +193,7 @@ Status MnistOp::LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pa | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar(mnist_pair.second, &label)); | |||
| (*trow) = TensorRow(row_id, {std::move(image), std::move(label)}); | |||
| trow->setPath({image_path_[row_id], label_path_[row_id]}); | |||
| return Status::OK(); | |||
| } | |||
| @@ -346,6 +349,8 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(), | |||
| reinterpret_cast<unsigned char *>(pixels), &image)); | |||
| image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j])); | |||
| image_path_.push_back(image_names_[index]); | |||
| label_path_.push_back(label_names_[index]); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -251,6 +251,8 @@ class MnistOp : public ParallelOp, public RandomAccessOp { | |||
| std::vector<MnistLabelPair> image_label_pairs_; | |||
| std::vector<std::string> image_names_; | |||
| std::vector<std::string> label_names_; | |||
| std::vector<std::string> image_path_; | |||
| std::vector<std::string> label_path_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -147,9 +147,6 @@ Status TextFileOp::Reset() { | |||
| } | |||
| Status TextFileOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTable> *tensor_table, int64_t row) { | |||
| TensorRow tRow(1, nullptr); | |||
| (*tensor_table)->push_back(std::move(tRow)); | |||
| std::shared_ptr<Tensor> tensor; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar(line, &tensor)); | |||
| (**tensor_table)[row][0] = std::move(tensor); | |||
| @@ -183,6 +180,9 @@ Status TextFileOp::LoadFile(const std::string &file, const int64_t start_offset, | |||
| continue; | |||
| } | |||
| TensorRow tRow(1, nullptr); | |||
| tRow.setPath({file}); | |||
| tensor_table->push_back(std::move(tRow)); | |||
| RETURN_IF_NOT_OK(LoadTensor(line, &tensor_table, rows_each_buffer)); | |||
| rows_each_buffer++; | |||
| rows_total++; | |||
| @@ -599,6 +599,11 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off | |||
| std::string errMsg = "Invalid file, failed to parse tfrecord file : " + serialized_example; | |||
| RETURN_STATUS_UNEXPECTED(errMsg); | |||
| } | |||
| int32_t num_columns = data_schema_->NumColumns(); | |||
| TensorRow newRow(num_columns, nullptr); | |||
| std::vector<std::string> file_path(num_columns, filename); | |||
| newRow.setPath(file_path); | |||
| new_tensor_table->push_back(std::move(newRow)); | |||
| RETURN_IF_NOT_OK(LoadExample(&tf_file, &new_tensor_table, rows_read)); | |||
| rows_read++; | |||
| } | |||
| @@ -629,9 +634,6 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off | |||
| Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, std::unique_ptr<TensorQTable> *tensor_table, | |||
| int64_t row) { | |||
| int32_t num_columns = data_schema_->NumColumns(); | |||
| TensorRow newRow(num_columns, nullptr); | |||
| (*tensor_table)->push_back(std::move(newRow)); | |||
| for (int32_t col = 0; col < num_columns; ++col) { | |||
| const ColDescriptor current_col = data_schema_->column(col); | |||
| const dataengine::Features &example_features = tf_file->features(); | |||
| @@ -213,6 +213,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Ten | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target)); | |||
| (*trow) = TensorRow(row_id, {std::move(image), std::move(target)}); | |||
| trow->setPath({kImageFile, kTargetFile}); | |||
| } else if (task_type_ == TaskType::Detection) { | |||
| std::shared_ptr<Tensor> image; | |||
| TensorRow annotation; | |||
| @@ -223,6 +224,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Ten | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); | |||
| RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation)); | |||
| trow->setId(row_id); | |||
| trow->setPath({kImageFile, kAnnotationFile, kAnnotationFile, kAnnotationFile, kAnnotationFile}); | |||
| trow->push_back(std::move(image)); | |||
| trow->insert(trow->end(), annotation.begin(), annotation.end()); | |||
| } | |||
| @@ -159,6 +159,8 @@ Status ZipOp::fillBuffer(TensorQTable *const table) { | |||
| return Status::OK(); | |||
| } | |||
| // else we got a row so pack it into the tensor table. | |||
| // Currently we don't support printing error info after zip | |||
| new_row.setPath({}); | |||
| table->push_back(std::move(new_row)); | |||
| } | |||
| return Status::OK(); | |||
| @@ -141,6 +141,12 @@ class Status { | |||
| StatusCode get_code() const; | |||
| int GetLineOfCode() const { return line_of_code_; } | |||
| std::string SetErrDescription(const std::string &err_description); | |||
| std::string GetErrDescription() const { return err_description_; } | |||
| friend std::ostream &operator<<(std::ostream &os, const Status &s); | |||
| explicit operator bool() const { return (get_code() == StatusCode::kOK); } | |||
| @@ -165,6 +171,9 @@ class Status { | |||
| private: | |||
| StatusCode code_; | |||
| int line_of_code_; | |||
| std::string file_name_; | |||
| std::string err_description_; | |||
| std::string err_msg_; | |||
| }; | |||
| @@ -25,11 +25,10 @@ namespace mindspore { | |||
| namespace dataset { | |||
| Status CFuncOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| Status ret = Status(StatusCode::kOK, "CFunc Call Succeed"); | |||
| try { | |||
| *output = c_func_ptr_(input); | |||
| } catch (const std::exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in CFuncOp"); | |||
| RETURN_STATUS_UNEXPECTED("Error raised, " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -55,9 +55,9 @@ Status ComposeOp::Compute(const TensorRow &inputs, TensorRow *outputs) { | |||
| ComposeOp::ComposeOp(const std::vector<std::shared_ptr<TensorOp>> &ops) : ops_(ops) { | |||
| if (ops_.empty()) { | |||
| MS_LOG(ERROR) << "op_list is empty this might lead to Segmentation Fault."; | |||
| MS_LOG(ERROR) << "Compose: op_list is empty, this might lead to Segmentation Fault."; | |||
| } else if (ops_.size() == 1) { | |||
| MS_LOG(WARNING) << "op_list has only 1 op. Compose is probably not needed."; | |||
| MS_LOG(WARNING) << "Compose: op_list has only 1 op. Compose is probably not needed."; | |||
| } | |||
| } | |||
| @@ -34,17 +34,17 @@ Status ConcatenateOp::OutputShape(const std::vector<TensorShape> &inputs, std::v | |||
| std::vector<TensorShape> inputs_copy; | |||
| inputs_copy.push_back(inputs[0].Squeeze()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs.at(0).Rank() == 1, "Only 1D input tensors supported"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs.at(0).Rank() == 1, "Concatenate: only 1D input supported"); | |||
| outputs.clear(); | |||
| dsize_t output_shape = 0; | |||
| output_shape = output_shape + inputs.at(0).NumOfElements(); | |||
| if (prepend_ != nullptr) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(prepend_->shape().Rank() == 1, "Only 1D prepend tensors supported"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(prepend_->shape().Rank() == 1, "Concatenate: only 1D prepend supported"); | |||
| output_shape = output_shape + prepend_->shape().NumOfElements(); | |||
| } | |||
| if (append_ != nullptr) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(append_->shape().Rank() == 1, "Only 1D append tensors supported"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(append_->shape().Rank() == 1, "Concatenate: only 1D append supported"); | |||
| output_shape = output_shape + append_->shape().NumOfElements(); | |||
| } | |||
| @@ -43,7 +43,7 @@ Status OneHotEncodingUnsigned(const std::shared_ptr<Tensor> &input, std::shared_ | |||
| RETURN_IF_NOT_OK(input->GetItemAt<uint64_t>(&class_idx, {index})); | |||
| } | |||
| if (class_idx >= static_cast<uint64_t>(num_classes)) { | |||
| RETURN_STATUS_UNEXPECTED("One_hot index values are not in range"); | |||
| RETURN_STATUS_UNEXPECTED("OneHot: OneHot index values are not in range"); | |||
| } | |||
| if (input->type() == DataType::DE_UINT64) { | |||
| RETURN_IF_NOT_OK((*output)->SetItemAt<uint64_t>({index, static_cast<dsize_t>(class_idx)}, 1)); | |||
| @@ -54,7 +54,7 @@ Status OneHotEncodingUnsigned(const std::shared_ptr<Tensor> &input, std::shared_ | |||
| } else if (input->type() == DataType::DE_UINT8) { | |||
| RETURN_IF_NOT_OK((*output)->SetItemAt<uint8_t>({index, static_cast<dsize_t>(class_idx)}, 1)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("One hot unsigned only supports unsigned int as input."); | |||
| RETURN_STATUS_UNEXPECTED("OneHot: OneHot unsigned only supports unsigned int as input."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -68,7 +68,7 @@ Status OneHotEncodingSigned(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| RETURN_IF_NOT_OK(input->GetItemAt<int64_t>(&class_idx, {index})); | |||
| } | |||
| if (class_idx >= static_cast<int64_t>(num_classes)) { | |||
| RETURN_STATUS_UNEXPECTED("One_hot index values are not in range"); | |||
| RETURN_STATUS_UNEXPECTED("OneHot: OneHot index values are not in range"); | |||
| } | |||
| if (input->type() == DataType::DE_INT64) { | |||
| RETURN_IF_NOT_OK((*output)->SetItemAt<int64_t>({index, static_cast<dsize_t>(class_idx)}, 1)); | |||
| @@ -79,7 +79,7 @@ Status OneHotEncodingSigned(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| } else if (input->type() == DataType::DE_INT8) { | |||
| RETURN_IF_NOT_OK((*output)->SetItemAt<int8_t>({index, static_cast<dsize_t>(class_idx)}, 1)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("One hot signed only supports signed int as input."); | |||
| RETURN_STATUS_UNEXPECTED("OneHot: OneHot signed only supports signed int as input."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -88,10 +88,10 @@ Status OneHotEncoding(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou | |||
| input->Squeeze(); | |||
| if (input->Rank() > 1) { // We expect the input to be int he first dimension | |||
| RETURN_STATUS_UNEXPECTED("One hot only supports scalars or 1D shape Tensors."); | |||
| RETURN_STATUS_UNEXPECTED("OneHot: OneHot only supports scalars or 1D input."); | |||
| } | |||
| if (!input->type().IsInt()) { | |||
| RETURN_STATUS_UNEXPECTED("One hot does not support input of this type."); | |||
| RETURN_STATUS_UNEXPECTED("OneHot: OneHot does not support input of this type."); | |||
| } | |||
| try { | |||
| dsize_t num_elements = 1; | |||
| @@ -111,7 +111,7 @@ Status OneHotEncoding(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou | |||
| *output = out; | |||
| return Status::OK(); | |||
| } catch (const std::exception &e) { | |||
| std::string err_msg = "Unexpected error in OneHotOp: "; | |||
| std::string err_msg = "Error raised in OneHot operation: "; | |||
| err_msg += e.what(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -123,9 +123,10 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output | |||
| const TensorShape &input_shape = input->shape(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!((fill_type == DataType::DE_STRING) && (input_type != DataType::DE_STRING)), | |||
| "Types do not match"); | |||
| "Fill: fill datatype does not match the input datatype."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(fill_value->shape() == TensorShape({}), "fill_value is not a scalar"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(fill_value->shape() == TensorShape({}), | |||
| "Fill: the shape of fill_value is not a scalar."); | |||
| std::shared_ptr<Tensor> out, fill_output; | |||
| @@ -225,7 +226,7 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output | |||
| break; | |||
| } | |||
| case DataType::DE_UNKNOWN: { | |||
| RETURN_STATUS_UNEXPECTED("FillOp does not support input of this type."); | |||
| RETURN_STATUS_UNEXPECTED("Fill: unknown input datatype."); | |||
| break; | |||
| } | |||
| } | |||
| @@ -283,7 +284,7 @@ void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| Cast<T, double>(input, output); | |||
| break; | |||
| case DataType::DE_UNKNOWN: | |||
| MS_LOG(ERROR) << "Unknown data type."; | |||
| MS_LOG(ERROR) << "TypeCast: unknown datatype."; | |||
| break; | |||
| } | |||
| } | |||
| @@ -331,7 +332,7 @@ Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o | |||
| break; | |||
| case DataType::DE_UNKNOWN: | |||
| // sanity check, unreachable code. | |||
| RETURN_STATUS_UNEXPECTED("TypeCast does not support input of this type."); | |||
| RETURN_STATUS_UNEXPECTED("TypeCast: TypeCast does not support input of this type."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -350,7 +351,7 @@ Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| float float16_max = static_cast<float>(std::numeric_limits<float16>::max()); | |||
| float float16_min = static_cast<float>(std::numeric_limits<float16>::lowest()); | |||
| if (element > float16_max || element < float16_min) { | |||
| RETURN_STATUS_UNEXPECTED("Value " + std::to_string(element) + " is outside of valid float16 range [" + | |||
| RETURN_STATUS_UNEXPECTED("ToFloat16: value " + std::to_string(element) + " is outside of valid float16 range [" + | |||
| std::to_string(float16_max) + ", " + std::to_string(float16_min) + "]."); | |||
| } | |||
| @@ -370,7 +371,7 @@ Status PadEnd(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, | |||
| } | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(src->type().IsNumeric() == pad_val->type().IsNumeric(), | |||
| "Source and pad_value tensors are not of the same type."); | |||
| "PadEnd: Source and pad_value are not of the same type."); | |||
| if (pad_val->type().IsNumeric()) { | |||
| std::shared_ptr<Tensor> float_pad_value; | |||
| RETURN_IF_NOT_OK(TypeCast(pad_val, &float_pad_value, DataType(DataType::DE_FLOAT32))); | |||
| @@ -385,11 +386,11 @@ Status PadEnd(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, | |||
| Status PadEndNumeric(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, | |||
| const std::vector<dsize_t> &pad_shape, float pad_val) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr && dst != nullptr, "tensor can't be nullptr"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr && dst != nullptr, "PadEnd: input or output can't be nullptr"); | |||
| if (src->Rank() == 0 || src->shape().AsVector() == pad_shape) { | |||
| (*dst) = src; // if no padding, copy the pointer | |||
| } else { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "Pad to diff rank not allowed"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "PadEnd: invalid pad shape."); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(pad_shape), src->type(), dst)); | |||
| auto tensor_type = src->type().value(); | |||
| if (pad_val == 0) { // if pad with zero, don't care what type it is | |||
| @@ -419,7 +420,7 @@ Status PadEndNumeric(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> | |||
| } else if (tensor_type == DataType::DE_FLOAT64) { | |||
| RETURN_IF_NOT_OK((*dst)->Fill<double>(pad_val)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Incorrect/Unknown tensor type"); | |||
| RETURN_STATUS_UNEXPECTED("PadEnd: Incorrect/Unknown datatype"); | |||
| } | |||
| std::vector<dsize_t> cur_ind(src->Rank(), 0); | |||
| RETURN_IF_NOT_OK(PadEndNumericHelper(src, *dst, cur_ind, 0)); | |||
| @@ -512,7 +513,7 @@ Status MaskHelper(const std::shared_ptr<Tensor> &input, const std::shared_ptr<Te | |||
| *out_itr = (*in_itr <= value); | |||
| break; | |||
| default: | |||
| RETURN_STATUS_UNEXPECTED("Unknown relational operator."); | |||
| RETURN_STATUS_UNEXPECTED("Mask: unknown relational operator."); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| @@ -521,8 +522,8 @@ Status MaskHelper(const std::shared_ptr<Tensor> &input, const std::shared_ptr<Te | |||
| Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::shared_ptr<Tensor> &value, | |||
| RelationalOp op) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type().IsNumeric() == value->type().IsNumeric(), | |||
| "Cannot convert constant value to the type of the input tensor."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(value->shape() == TensorShape::CreateScalar(), "Value is not a scalar"); | |||
| "Mask: input datatype does not match the value datatype."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(value->shape() == TensorShape::CreateScalar(), "Mask: value shape is not a scalar"); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_BOOL), output)); | |||
| @@ -575,7 +576,7 @@ Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu | |||
| RETURN_IF_NOT_OK(MaskHelper<std::string_view>(input, *output, casted_value, op)); | |||
| break; | |||
| case DataType::DE_UNKNOWN: | |||
| RETURN_STATUS_UNEXPECTED("Unsupported input type."); | |||
| RETURN_STATUS_UNEXPECTED("Mask: unsupported input datatype."); | |||
| break; | |||
| } | |||
| return Status::OK(); | |||
| @@ -584,7 +585,7 @@ Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu | |||
| Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr<Tensor> prepend, | |||
| std::shared_ptr<Tensor> append) { | |||
| axis = Tensor::HandleNeg(axis, input[0]->shape().Rank()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(axis == 0, "Only axis=0 is supported"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(axis == 0, "Concatenate: only 1D input supported"); | |||
| TensorShape t = TensorShape::CreateScalar(); | |||
| @@ -593,20 +594,22 @@ Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std:: | |||
| TensorRow tensor_list; | |||
| if (prepend != nullptr) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == prepend->type(), "Tensor types do not match"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(prepend->shape().Rank() == 1, "Only 1D tensors supported"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == prepend->type(), | |||
| "Concatenate: input datatype does not match the prepend datatype."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(prepend->shape().Rank() == 1, "Concatenate: only 1D input supported"); | |||
| tensor_list.emplace_back(prepend); | |||
| } | |||
| for (dsize_t i = 0; i < input.size(); i++) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == input[i]->type(), "Tensor types do not match"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input[i]->shape().Rank() == 1, "Only 1D tensors supported"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == input[i]->type(), "Concatenate: inconsistent datatype of input."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input[i]->shape().Rank() == 1, "Concatenate: only 1D input supported"); | |||
| tensor_list.emplace_back(input[i]); | |||
| } | |||
| if (append != nullptr) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == append->type(), "Tensor types do not match"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(append->shape().Rank() == 1, "Only 1D tensors supported"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == append->type(), | |||
| "Concatenate: input datatype does not match the append datatype."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(append->shape().Rank() == 1, "Concatenate: only 1D append supported"); | |||
| tensor_list.emplace_back(append); | |||
| } | |||
| @@ -658,7 +661,7 @@ Status BatchTensorToCVTensorVector(const std::shared_ptr<Tensor> &input, | |||
| TensorShape remaining({-1}); | |||
| std::vector<int64_t> index(tensor_shape.size(), 0); | |||
| if (tensor_shape.size() <= 1) { | |||
| RETURN_STATUS_UNEXPECTED("Tensor must be at least 2-D in order to unpack."); | |||
| RETURN_STATUS_UNEXPECTED("MixUpBatch: input must be at least 2-D in order to unpack."); | |||
| } | |||
| TensorShape element_shape(std::vector<int64_t>(tensor_shape.begin() + 1, tensor_shape.end())); | |||
| @@ -670,7 +673,7 @@ Status BatchTensorToCVTensorVector(const std::shared_ptr<Tensor> &input, | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(element_shape, input->type(), start_addr_of_index, &out)); | |||
| std::shared_ptr<CVTensor> cv_out = CVTensor::AsCVTensor(std::move(out)); | |||
| if (!cv_out->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor."); | |||
| RETURN_STATUS_UNEXPECTED("MixUpBatch: allocate memory failed."); | |||
| } | |||
| output->push_back(cv_out); | |||
| } | |||
| @@ -683,7 +686,7 @@ Status BatchTensorToTensorVector(const std::shared_ptr<Tensor> &input, std::vect | |||
| TensorShape remaining({-1}); | |||
| std::vector<int64_t> index(tensor_shape.size(), 0); | |||
| if (tensor_shape.size() <= 1) { | |||
| RETURN_STATUS_UNEXPECTED("Tensor must be at least 2-D in order to unpack."); | |||
| RETURN_STATUS_UNEXPECTED("CutMixBatch: input must be at least 2-D in order to unpack."); | |||
| } | |||
| TensorShape element_shape(std::vector<int64_t>(tensor_shape.begin() + 1, tensor_shape.end())); | |||
| @@ -700,7 +703,7 @@ Status BatchTensorToTensorVector(const std::shared_ptr<Tensor> &input, std::vect | |||
| Status TensorVectorToBatchTensor(const std::vector<std::shared_ptr<Tensor>> &input, std::shared_ptr<Tensor> *output) { | |||
| if (input.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("TensorVectorToBatchTensor: Received an empty vector."); | |||
| RETURN_STATUS_UNEXPECTED("CutMixBatch: the input is empty."); | |||
| } | |||
| std::vector<int64_t> tensor_shape = input.front()->shape().AsVector(); | |||
| tensor_shape.insert(tensor_shape.begin(), input.size()); | |||
| @@ -782,7 +785,7 @@ Status UniqueHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor | |||
| Status Unique(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, | |||
| std::shared_ptr<Tensor> *output_idx, std::shared_ptr<Tensor> *output_cnt) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, "Only 1D tensors supported."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, "Unique: only 1D input supported."); | |||
| if (input->type() == DataType::DE_INT64) { | |||
| RETURN_IF_NOT_OK(UniqueHelper<int64_t>(input, output, output_idx, output_cnt)); | |||
| } else if (input->type() == DataType::DE_INT32) { | |||
| @@ -806,7 +809,7 @@ Status Unique(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| } else if (input->type() == DataType::DE_FLOAT64) { | |||
| RETURN_IF_NOT_OK(UniqueHelper<double>(input, output, output_idx, output_cnt)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Unique op only supports numeric input."); | |||
| RETURN_STATUS_UNEXPECTED("Unique: Unique op only supports numeric input."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -24,7 +24,7 @@ namespace dataset { | |||
| Status DuplicateOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Duplicate: only support one input."); | |||
| std::shared_ptr<Tensor> out; | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input[0], &out)); | |||
| output->push_back(input[0]); | |||
| @@ -25,7 +25,7 @@ namespace dataset { | |||
| Status MaskOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| std::shared_ptr<Tensor> temp_output; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(type_.IsNumeric(), "Cannot generate a string mask. Type should be numeric."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(type_.IsNumeric(), "Mask: only support numeric datatype of input."); | |||
| RETURN_IF_NOT_OK(Mask(input, &temp_output, value_, op_)); | |||
| @@ -35,7 +35,7 @@ Status OneHotOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector | |||
| if (inputs_copy[0].Rank() == 0) outputs.emplace_back(std::vector<dsize_t>{num_classes_}); | |||
| if (inputs_copy[0].Rank() == 1) outputs.emplace_back(std::vector<dsize_t>{inputs_copy[0][0], num_classes_}); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "OneHot: invalid input shape."); | |||
| } | |||
| Status OneHotOp::to_json(nlohmann::json *out_json) { | |||
| @@ -33,7 +33,7 @@ Status PadEndOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector | |||
| for (auto s : inputs) { | |||
| outputs.emplace_back(TensorShape(output_shape_.AsVector())); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!outputs.empty(), "Input has a wrong shape"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!outputs.empty(), "PadEnd: invalid input shape."); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| @@ -24,17 +24,18 @@ namespace dataset { | |||
| Status UniqueOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Unique: only support 1D input"); | |||
| auto in_tensor = input[0]; | |||
| auto in_tensor_shape = in_tensor->shape(); | |||
| auto in_tensor_type = in_tensor->type(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in_tensor_type.IsNumeric(), "Tensor type must be numeric."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in_tensor_shape.Rank() >= 2, "Tensor must be at least 2-D in order to do unique op."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in_tensor_type.IsNumeric(), "Unique: Tensor type must be numeric."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in_tensor_shape.Rank() >= 2, | |||
| "Unique: input must be at least 2-D in order to do unique op."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| in_tensor->Size() <= std::numeric_limits<int32_t>::max(), | |||
| "UniqueOp does not support input tensor large than " + std::to_string(std::numeric_limits<int32_t>::max())); | |||
| "Unique: Unique does not support input tensor large than " + std::to_string(std::numeric_limits<int32_t>::max())); | |||
| RETURN_IF_NOT_OK(in_tensor->Reshape(TensorShape({in_tensor->Size()}))); | |||
| @@ -44,16 +44,16 @@ Status BoundingBox::ReadFromTensor(const TensorPtr &bbox_tensor, dsize_t index_o | |||
| Status BoundingBox::ValidateBoundingBoxes(const TensorRow &image_and_bbox) { | |||
| if (image_and_bbox.size() != 2) { | |||
| return Status(StatusCode::kBoundingBoxInvalidShape, __LINE__, __FILE__, | |||
| "Requires Image and Bounding Boxes, likely missed bounding boxes."); | |||
| "BoundingBox: invalid input, likely missed bounding boxes."); | |||
| } | |||
| if (image_and_bbox[1]->shape().Size() < 2) { | |||
| return Status(StatusCode::kBoundingBoxInvalidShape, __LINE__, __FILE__, | |||
| "Bounding boxes shape should have at least two dimensions."); | |||
| "BoundingBox: bounding boxes shape should have at least two dimensions."); | |||
| } | |||
| uint32_t num_of_features = image_and_bbox[1]->shape()[1]; | |||
| if (num_of_features < 4) { | |||
| return Status(StatusCode::kBoundingBoxInvalidShape, __LINE__, __FILE__, | |||
| "Bounding boxes should be have at least 4 features."); | |||
| "BoundingBox: bounding boxes should be have at least 4 features."); | |||
| } | |||
| std::vector<std::shared_ptr<BoundingBox>> bbox_list; | |||
| RETURN_IF_NOT_OK(GetListOfBoundingBoxes(image_and_bbox[1], &bbox_list)); | |||
| @@ -62,11 +62,11 @@ Status BoundingBox::ValidateBoundingBoxes(const TensorRow &image_and_bbox) { | |||
| for (auto &bbox : bbox_list) { | |||
| if ((bbox->x() + bbox->width() > img_w) || (bbox->y() + bbox->height() > img_h)) { | |||
| return Status(StatusCode::kBoundingBoxOutOfBounds, __LINE__, __FILE__, | |||
| "At least one of the bounding boxes is out of bounds of the image."); | |||
| "BoundingBox: bounding boxes is out of bounds of the image"); | |||
| } | |||
| if (static_cast<int>(bbox->x()) < 0 || static_cast<int>(bbox->y()) < 0) { | |||
| return Status(StatusCode::kBoundingBoxOutOfBounds, __LINE__, __FILE__, | |||
| "At least one of the bounding boxes has negative min_x or min_y."); | |||
| "BoundingBox: the coordinates of the bounding boxes has negative value."); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| @@ -133,7 +133,7 @@ Status BoundingBox::UpdateBBoxesForCrop(TensorPtr *bbox_list, size_t *bbox_count | |||
| // Update this bbox and select it to move to the final output tensor | |||
| correct_ind.push_back(i); | |||
| // adjust BBox corners by bringing into new CropBox if beyond | |||
| // Also reseting/adjusting for boxes to lie within CropBox instead of Image - subtract CropBox Xmin/YMin | |||
| // Also resetting/adjusting for boxes to lie within CropBox instead of Image - subtract CropBox Xmin/YMin | |||
| bbox_float bb_Xmin = bbox->x() - std::min(static_cast<bbox_float>(0.0), (bbox->x() - CB_Xmin)) - CB_Xmin; | |||
| bbox_float bb_Ymin = bbox->y() - std::min(static_cast<bbox_float>(0.0), (bbox->y() - CB_Ymin)) - CB_Ymin; | |||
| @@ -32,18 +32,19 @@ const int32_t CenterCropOp::kDefWidth = 0; | |||
| Status CenterCropOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| std::string err_msg; | |||
| std::string err_head = "CenterCrop: "; | |||
| dsize_t rank = input->shape().Rank(); | |||
| err_msg += (rank < 2 || rank > 3) ? "Rank received::" + std::to_string(rank) + " Expected: 2 or 3 \t" : ""; | |||
| err_msg += (rank < 2 || rank > 3) ? "rank received::" + std::to_string(rank) + " Expected: 2 or 3 \t" : ""; | |||
| err_msg += (crop_het_ <= 0 || crop_wid_ <= 0) ? "crop size needs to be positive integers\t" : ""; | |||
| if (err_msg.length() != 0) RETURN_STATUS_UNEXPECTED(common::SafeCStr(err_msg)); | |||
| if (err_msg.length() != 0) RETURN_STATUS_UNEXPECTED(err_head + err_msg); | |||
| int32_t top = crop_het_ - input->shape()[0]; // number of pixels to pad (top and bottom) | |||
| int32_t left = crop_wid_ - input->shape()[1]; | |||
| std::shared_ptr<Tensor> pad_image; | |||
| CHECK_FAIL_RETURN_UNEXPECTED((top < input->shape()[0] * 3 && left < input->shape()[1] * 3), | |||
| "CenterCropOp padding size is too big, it's more than 3 times the original size."); | |||
| "CenterCrop: CenterCropOp padding size is more than 3 times the original size."); | |||
| if (top > 0 && left > 0) { // padding only | |||
| return Pad(input, output, top / 2 + top % 2, top / 2, left / 2 + left % 2, left / 2, BorderType::kConstant); | |||
| @@ -71,7 +72,7 @@ Status CenterCropOp::OutputShape(const std::vector<TensorShape> &inputs, std::ve | |||
| if (inputs[0].Rank() == 2) outputs.emplace_back(out); | |||
| if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "CenterCrop: invalid input shape."); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -27,12 +27,13 @@ namespace dataset { | |||
| Status CropOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 2, "The shape size " + std::to_string(input->shape().Size()) + | |||
| " of input tensor is invalid"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| input->shape().Size() >= 2, | |||
| "Crop: the shape size " + std::to_string(input->shape().Size()) + " of input is invalid."); | |||
| int32_t input_h = static_cast<int>(input->shape()[0]); | |||
| int32_t input_w = static_cast<int>(input->shape()[1]); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(y_ + height_ <= input_h, "Crop height dimensions exceed image dimensions"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(x_ + width_ <= input_w, "Crop width dimensions exceed image dimensions"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(y_ + height_ <= input_h, "Crop: Crop height dimension exceeds image dimensions."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(x_ + width_ <= input_w, "Crop: Crop width dimension exceeds image dimensions."); | |||
| return Crop(input, output, x_, y_, height_, width_); | |||
| } | |||
| @@ -43,7 +44,7 @@ Status CropOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<T | |||
| if (inputs[0].Rank() == 2) outputs.emplace_back(out); | |||
| if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "Crop: invalid input shape."); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -50,7 +50,7 @@ void CutMixBatchOp::GetCropBox(int height, int width, float lam, int *x, int *y, | |||
| Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| if (input.size() < 2) { | |||
| RETURN_STATUS_UNEXPECTED("Both images and labels columns are required for this operation."); | |||
| RETURN_STATUS_UNEXPECTED("CutMixBatch: both image and label columns are required."); | |||
| } | |||
| std::vector<std::shared_ptr<Tensor>> images; | |||
| @@ -60,22 +60,24 @@ Status CutMixBatchOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| // Check inputs | |||
| if (image_shape.size() != 4 || image_shape[0] != label_shape[0]) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: You must make sure images are HWC or CHW and batched before calling CutMixBatch."); | |||
| "CutMixBatch: please make sure images are HWC or CHW " | |||
| "and batched before calling CutMixBatch."); | |||
| } | |||
| if (!input.at(1)->type().IsInt()) { | |||
| RETURN_STATUS_UNEXPECTED("CutMixBatch: Wrong labels type. The second column (labels) must only include int types."); | |||
| } | |||
| if (label_shape.size() != 2 && label_shape.size() != 3) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC where N is the batch " | |||
| "size, L is the number of labels in each row, " | |||
| "and C is the number of classes. labels must be in one-hot format and in a batch."); | |||
| "CutMixBatch: wrong labels shape. " | |||
| "The second column (labels) must have a shape of NC or NLC where N is the batch size, " | |||
| "L is the number of labels in each row, and C is the number of classes. " | |||
| "labels must be in one-hot format and in a batch."); | |||
| } | |||
| if ((image_shape[1] != 1 && image_shape[1] != 3) && image_batch_format_ == ImageBatchFormat::kNCHW) { | |||
| RETURN_STATUS_UNEXPECTED("CutMixBatch: Image doesn't match the given image format."); | |||
| RETURN_STATUS_UNEXPECTED("CutMixBatch: image doesn't match the NCHW format."); | |||
| } | |||
| if ((image_shape[3] != 1 && image_shape[3] != 3) && image_batch_format_ == ImageBatchFormat::kNHWC) { | |||
| RETURN_STATUS_UNEXPECTED("CutMixBatch: Image doesn't match the given image format."); | |||
| RETURN_STATUS_UNEXPECTED("CutMixBatch: image doesn't match the NHWC format."); | |||
| } | |||
| // Move images into a vector of Tensors | |||
| @@ -38,12 +38,12 @@ Status DecodeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| IO_CHECK(input, output); | |||
| // check the input tensor shape | |||
| if (input->Rank() != 1) { | |||
| RETURN_STATUS_UNEXPECTED("DecodeOp error: invalid input shape, only support 1D input."); | |||
| RETURN_STATUS_UNEXPECTED("Decode: invalid input shape, only support 1D input."); | |||
| } | |||
| if (is_rgb_format_) { // RGB colour mode | |||
| return Decode(input, output); | |||
| } else { // BGR colour mode | |||
| RETURN_STATUS_UNEXPECTED("Decode BGR is deprecated"); | |||
| RETURN_STATUS_UNEXPECTED("Decode: only support RGB image."); | |||
| } | |||
| } | |||
| Status DecodeOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) { | |||
| @@ -52,7 +52,7 @@ Status DecodeOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector | |||
| TensorShape out({-1, -1, 3}); // we don't know what is output image size, but we know it should be 3 channels | |||
| if (inputs[0].Rank() == 1) outputs.emplace_back(out); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "Decode: invalid input shape."); | |||
| } | |||
| Status DecodeOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) { | |||
| @@ -33,7 +33,7 @@ Status HwcToChwOp::OutputShape(const std::vector<TensorShape> &inputs, std::vect | |||
| TensorShape out = TensorShape{in[2], in[0], in[1]}; | |||
| if (inputs[0].Rank() == 3) outputs.emplace_back(out); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "HwcToChw: invalid input shape."); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -73,10 +73,10 @@ Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in flip op: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Flip: " + std::string(e.what())); | |||
| } | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor, the input data is null"); | |||
| RETURN_STATUS_UNEXPECTED("Flip: allocate memory failed."); | |||
| } | |||
| } | |||
| @@ -92,17 +92,17 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| int32_t output_width, double fx, double fy, InterpolationMode mode) { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Resize: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3 && input_cv->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Input Tensor is not in shape of <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("Resize: input is not in shape of <H,W,C> or <H,W>"); | |||
| } | |||
| cv::Mat in_image = input_cv->mat(); | |||
| // resize image too large or too small | |||
| if (output_height == 0 || output_height > in_image.rows * 1000 || output_width == 0 || | |||
| output_width > in_image.cols * 1000) { | |||
| std::string err_msg = | |||
| "The resizing width or height 1) is too big, it's up to " | |||
| "Resize: the resizing width or height 1) is too big, it's up to " | |||
| "1000 times the original image; 2) can not be 0."; | |||
| return Status(StatusCode::kShapeMisMatch, err_msg); | |||
| } | |||
| @@ -118,7 +118,7 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in image resize: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Resize: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -139,12 +139,12 @@ Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Decode: load image failed."); | |||
| } | |||
| try { | |||
| cv::Mat img_mat = cv::imdecode(input_cv->mat(), cv::IMREAD_COLOR | cv::IMREAD_IGNORE_ORIENTATION); | |||
| if (img_mat.data == nullptr) { | |||
| std::string err = "Error in decoding\t"; | |||
| std::string err = "Decode: image decode failed."; | |||
| RETURN_STATUS_UNEXPECTED(err); | |||
| } | |||
| cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB)); | |||
| @@ -153,7 +153,7 @@ Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in image Decode: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Decode: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -216,7 +216,7 @@ static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_sca | |||
| try { | |||
| num_lines_read = jpeg_read_scanlines(cinfo, &scanline_ptr, 1); | |||
| } catch (std::runtime_error &e) { | |||
| RETURN_STATUS_UNEXPECTED("jpeg_read_scanlines error."); | |||
| RETURN_STATUS_UNEXPECTED("Decode: jpeg_read_scanlines error."); | |||
| } | |||
| if (cinfo->out_color_space == JCS_CMYK && num_lines_read > 0) { | |||
| for (int i = 0; i < crop_w; ++i) { | |||
| @@ -243,11 +243,11 @@ static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_sca | |||
| int copy_status = memcpy_s(buffer, buffer_size, scanline_ptr + offset, stride); | |||
| if (copy_status != 0) { | |||
| jpeg_destroy_decompress(cinfo); | |||
| RETURN_STATUS_UNEXPECTED("memcpy failed"); | |||
| RETURN_STATUS_UNEXPECTED("Decode: memcpy failed"); | |||
| } | |||
| } else { | |||
| jpeg_destroy_decompress(cinfo); | |||
| std::string err_msg = "failed to read scanline"; | |||
| std::string err_msg = "Decode: failed to read scanline"; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| buffer += stride; | |||
| @@ -271,7 +271,7 @@ static Status JpegSetColorSpace(jpeg_decompress_struct *cinfo) { | |||
| return Status::OK(); | |||
| default: | |||
| jpeg_destroy_decompress(cinfo); | |||
| std::string err_msg = "wrong number of components"; | |||
| std::string err_msg = "Decode: image decompress failed."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| } | |||
| @@ -306,7 +306,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| crop_h = cinfo.output_height; | |||
| } else if (crop_w == 0 || static_cast<unsigned int>(crop_w + crop_x) > cinfo.output_width || crop_h == 0 || | |||
| static_cast<unsigned int>(crop_h + crop_y) > cinfo.output_height) { | |||
| return DestroyDecompressAndReturnError("Crop window is not valid"); | |||
| return DestroyDecompressAndReturnError("Crop: invalid crop size."); | |||
| } | |||
| const int mcu_size = cinfo.min_DCT_scaled_size; | |||
| unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size; | |||
| @@ -341,7 +341,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float rescale, float shift) { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Rescale: load image failed."); | |||
| } | |||
| cv::Mat input_image = input_cv->mat(); | |||
| std::shared_ptr<CVTensor> output_cv; | |||
| @@ -350,7 +350,7 @@ Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *ou | |||
| input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift); | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in image rescale: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Rescale: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -358,18 +358,18 @@ Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *ou | |||
| Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h) { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Crop: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3 && input_cv->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Shape not <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("Crop: invalid image Shape, only support <H,W,C> or <H,W>"); | |||
| } | |||
| // account for integer overflow | |||
| if (y < 0 || (y + h) > input_cv->shape()[0] || (y + h) < 0) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid y coordinate value for crop"); | |||
| RETURN_STATUS_UNEXPECTED("Crop: invalid y coordinate value for crop"); | |||
| } | |||
| // account for integer overflow | |||
| if (x < 0 || (x + w) > input_cv->shape()[1] || (x + w) < 0) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid x coordinate value for crop"); | |||
| RETURN_STATUS_UNEXPECTED("Crop: invalid x coordinate value for crop"); | |||
| } | |||
| try { | |||
| TensorShape shape{h, w}; | |||
| @@ -382,7 +382,7 @@ Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in crop: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Crop: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -390,7 +390,7 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) | |||
| try { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("HwcToChw: load image failed."); | |||
| } | |||
| if (input_cv->Rank() == 2) { | |||
| // If input tensor is 2D, we assume we have hw dimensions | |||
| @@ -400,7 +400,7 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->shape().Size() < 2 || input_cv->shape().Size() > 3 || | |||
| (input_cv->shape().Size() == 3 && num_channels != 3 && num_channels != 1)) { | |||
| RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3 nor 1"); | |||
| RETURN_STATUS_UNEXPECTED("HwcToChw: invalid image shape: number of channels does not equal 3 nor 1"); | |||
| } | |||
| cv::Mat output_img; | |||
| @@ -417,7 +417,7 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) | |||
| *output = std::move(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in ChannelSwap: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("HwcToChw: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -425,10 +425,14 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te | |||
| int crop_width, int crop_height, ImageFormat image_format) { | |||
| if (image_format == ImageFormat::HWC) { | |||
| if ((*input)->Rank() != 3 || ((*input)->shape()[2] != 1 && (*input)->shape()[2] != 3)) { | |||
| RETURN_STATUS_UNEXPECTED("MaskWithTensor: Image shape doesn't match the given image_format."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: MaskWithTensor failed: " | |||
| "input shape doesn't match <H,W,C> format."); | |||
| } | |||
| if (sub_mat->Rank() != 3 || (sub_mat->shape()[2] != 1 && sub_mat->shape()[2] != 3)) { | |||
| RETURN_STATUS_UNEXPECTED("MaskWithTensor: sub_mat shape doesn't match the given image_format."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: MaskWithTensor failed: " | |||
| "sub_mat shape doesn't match <H,W,C> format."); | |||
| } | |||
| int number_of_channels = (*input)->shape()[2]; | |||
| for (int i = 0; i < crop_width; i++) { | |||
| @@ -440,10 +444,14 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te | |||
| } | |||
| } else if (image_format == ImageFormat::CHW) { | |||
| if ((*input)->Rank() != 3 || ((*input)->shape()[0] != 1 && (*input)->shape()[0] != 3)) { | |||
| RETURN_STATUS_UNEXPECTED("MaskWithTensor: Image shape doesn't match the given image_format."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: MaskWithTensor failed: " | |||
| "input shape doesn't match <C,H,W> format."); | |||
| } | |||
| if (sub_mat->Rank() != 3 || (sub_mat->shape()[0] != 1 && sub_mat->shape()[0] != 3)) { | |||
| RETURN_STATUS_UNEXPECTED("MaskWithTensor: sub_mat shape doesn't match the given image_format."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: MaskWithTensor failed: " | |||
| "sub_mat shape doesn't match <C,H,W> format."); | |||
| } | |||
| int number_of_channels = (*input)->shape()[0]; | |||
| for (int i = 0; i < crop_width; i++) { | |||
| @@ -455,10 +463,14 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te | |||
| } | |||
| } else if (image_format == ImageFormat::HW) { | |||
| if ((*input)->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("MaskWithTensor: Image shape doesn't match the given image_format."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: MaskWithTensor failed: " | |||
| "input shape doesn't match <H,W> format."); | |||
| } | |||
| if (sub_mat->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("MaskWithTensor: sub_mat shape doesn't match the given image_format."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: MaskWithTensor failed: " | |||
| "sub_mat shape doesn't match <H,W> format."); | |||
| } | |||
| for (int i = 0; i < crop_width; i++) { | |||
| for (int j = 0; j < crop_height; j++) { | |||
| @@ -466,7 +478,9 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te | |||
| } | |||
| } | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("MaskWithTensor: Image format must be CHW, HWC, or HW."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: MaskWithTensor failed: " | |||
| "image format must be CHW, HWC, or HW."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -474,7 +488,9 @@ Status MaskWithTensor(const std::shared_ptr<Tensor> &sub_mat, std::shared_ptr<Te | |||
| Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared_ptr<Tensor> *dest_tensor, | |||
| const std::vector<int64_t> &source_indx, const std::vector<int64_t> &dest_indx) { | |||
| if (source_tensor->type() != (*dest_tensor)->type()) | |||
| RETURN_STATUS_UNEXPECTED("CopyTensorValue: source and destination tensor must have the same type."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: CopyTensorValue failed: " | |||
| "source and destination tensor must have the same type."); | |||
| if (source_tensor->type() == DataType::DE_UINT8) { | |||
| uint8_t pixel_value; | |||
| RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx)); | |||
| @@ -484,7 +500,9 @@ Status CopyTensorValue(const std::shared_ptr<Tensor> &source_tensor, std::shared | |||
| RETURN_IF_NOT_OK(source_tensor->GetItemAt(&pixel_value, source_indx)); | |||
| RETURN_IF_NOT_OK((*dest_tensor)->SetItemAt(dest_indx, pixel_value)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("CopyTensorValue: Tensor type is not supported. Tensor type must be float32 or uint8."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "CutMixBatch: CopyTensorValue failed: " | |||
| "Tensor type is not supported. Tensor type must be float32 or uint8."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -494,7 +512,9 @@ Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input)); | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->shape().Size() != 3 || num_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "SwapRedBlue: invalid input shape, " | |||
| "number of channels does not equal 3"); | |||
| } | |||
| std::shared_ptr<CVTensor> output_cv; | |||
| RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv)); | |||
| @@ -503,7 +523,7 @@ Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in ChangeMode: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("SwapRedBlue: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -512,16 +532,16 @@ Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso | |||
| try { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("CropAndResize: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3 && input_cv->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Shape not <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("CropAndResize: image shape is not <H,W,C> or <H,W>"); | |||
| } | |||
| // image too large or too small | |||
| if (crop_height == 0 || crop_width == 0 || target_height == 0 || target_height > crop_height * 1000 || | |||
| target_width == 0 || target_height > crop_width * 1000) { | |||
| std::string err_msg = | |||
| "The resizing width or height 1) is too big, it's up to " | |||
| "CropAndResize: the resizing width or height 1) is too big, it's up to " | |||
| "1000 times the original image; 2) can not be 0."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -537,7 +557,7 @@ Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso | |||
| *output = std::static_pointer_cast<Tensor>(cvt_out); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in CropAndResize: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("CropAndResize: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -546,11 +566,11 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| try { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Rotate: load image failed."); | |||
| } | |||
| cv::Mat input_img = input_cv->mat(); | |||
| if (input_img.cols > (MAX_INT_PRECISION * 2) || input_img.rows > (MAX_INT_PRECISION * 2)) { | |||
| RETURN_STATUS_UNEXPECTED("Image too large center not precise"); | |||
| RETURN_STATUS_UNEXPECTED("Rotate: image is too large and center not precise"); | |||
| } | |||
| // default to center of image | |||
| if (fx == -1 && fy == -1) { | |||
| @@ -584,7 +604,7 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| } | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in image rotation: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -592,20 +612,23 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, | |||
| const std::shared_ptr<Tensor> &mean, const std::shared_ptr<Tensor> &std) { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!(input_cv->mat().data && input_cv->Rank() == 3)) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Normalize: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Normalize: only support 3 channels image."); | |||
| } | |||
| cv::Mat in_image = input_cv->mat(); | |||
| std::shared_ptr<CVTensor> output_cv; | |||
| RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv)); | |||
| mean->Squeeze(); | |||
| if (mean->type() != DataType::DE_FLOAT32 || mean->Rank() != 1 || mean->shape()[0] != 3) { | |||
| std::string err_msg = "Mean tensor should be of size 3 and type float."; | |||
| std::string err_msg = "Normalize: mean should be of size 3 and type float."; | |||
| return Status(StatusCode::kShapeMisMatch, err_msg); | |||
| } | |||
| std->Squeeze(); | |||
| if (std->type() != DataType::DE_FLOAT32 || std->Rank() != 1 || std->shape()[0] != 3) { | |||
| std::string err_msg = "Std tensor should be of size 3 and type float."; | |||
| std::string err_msg = "Normalize: std tensor should be of size 3 and type float."; | |||
| return Status(StatusCode::kShapeMisMatch, err_msg); | |||
| } | |||
| try { | |||
| @@ -614,7 +637,7 @@ Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| std::vector<cv::Mat> rgb; | |||
| cv::split(in_image, rgb); | |||
| if (rgb.size() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Input image is not in RGB."); | |||
| RETURN_STATUS_UNEXPECTED("Normalize: input image is not in RGB."); | |||
| } | |||
| for (uint8_t i = 0; i < 3; i++) { | |||
| float mean_c, std_c; | |||
| @@ -626,7 +649,7 @@ Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in Normalize: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Normalize: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -634,7 +657,7 @@ Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor | |||
| const std::shared_ptr<Tensor> &mean, const std::shared_ptr<Tensor> &std, const std::string &dtype) { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!(input_cv->mat().data && input_cv->Rank() == 3)) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("NormalizePad: load image failed."); | |||
| } | |||
| DataType tensor_type = DataType(DataType::DE_FLOAT32); | |||
| int compute_type = CV_32F; | |||
| @@ -650,12 +673,12 @@ Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor | |||
| RETURN_IF_NOT_OK(CVTensor::CreateEmpty(new_shape, tensor_type, &output_cv)); | |||
| mean->Squeeze(); | |||
| if (mean->type() != DataType::DE_FLOAT32 || mean->Rank() != 1 || mean->shape()[0] != 3) { | |||
| std::string err_msg = "Mean tensor should be of size 3 and type float."; | |||
| std::string err_msg = "NormalizePad: mean tensor should be of size 3 and type float."; | |||
| return Status(StatusCode::kShapeMisMatch, err_msg); | |||
| } | |||
| std->Squeeze(); | |||
| if (std->type() != DataType::DE_FLOAT32 || std->Rank() != 1 || std->shape()[0] != 3) { | |||
| std::string err_msg = "Std tensor should be of size 3 and type float."; | |||
| std::string err_msg = "NormalizePad: std tensor should be of size 3 and type float."; | |||
| return Status(StatusCode::kShapeMisMatch, err_msg); | |||
| } | |||
| try { | |||
| @@ -664,7 +687,7 @@ Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor | |||
| std::vector<cv::Mat> rgb; | |||
| cv::split(in_image, rgb); | |||
| if (rgb.size() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Input image is not in RGB."); | |||
| RETURN_STATUS_UNEXPECTED("NormalizePad: input image is not in RGB."); | |||
| } | |||
| for (uint8_t i = 0; i < 3; i++) { | |||
| float mean_c, std_c; | |||
| @@ -677,7 +700,7 @@ Status NormalizePad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in NormalizePad"); | |||
| RETURN_STATUS_UNEXPECTED("NormalizePad: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -686,18 +709,20 @@ Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| cv::Mat input_img = input_cv->mat(); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("AdjustBrightness: load image failed."); | |||
| } | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->Rank() != 3 || num_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "AdjustBrightness: image shape is incorrect: " | |||
| "number of channels does not equal 3"); | |||
| } | |||
| std::shared_ptr<CVTensor> output_cv; | |||
| RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv)); | |||
| output_cv->mat() = input_img * alpha; | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in adjust brightness: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("AdjustBrightness: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -707,11 +732,13 @@ Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tens | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| cv::Mat input_img = input_cv->mat(); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("AdjustContrast: "); | |||
| } | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->Rank() != 3 || num_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "AdjustContrast: image shape is incorrect: " | |||
| "number of channels does not equal 3"); | |||
| } | |||
| cv::Mat gray, output_img; | |||
| cv::cvtColor(input_img, gray, CV_RGB2GRAY); | |||
| @@ -724,7 +751,7 @@ Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tens | |||
| output_cv->mat() = output_img * (1.0 - alpha) + input_img * alpha; | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in adjust contrast: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("AdjustContrast: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -734,10 +761,10 @@ Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor | |||
| try { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("AutoContrast: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3 && input_cv->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Shape not <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("AutoContrast: image shape is not <H,W,C> or <H,W>"); | |||
| } | |||
| // Reshape to extend dimension if rank is 2 for algorithm to work. then reshape output to be of rank 2 like input | |||
| if (input_cv->Rank() == 2) { | |||
| @@ -746,7 +773,7 @@ Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor | |||
| // Get number of channels and image matrix | |||
| std::size_t num_of_channels = input_cv->shape()[2]; | |||
| if (num_of_channels != 1 && num_of_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Number of channels is not 1 or 3."); | |||
| RETURN_STATUS_UNEXPECTED("AutoContrast: the number of channels is not 1 or 3."); | |||
| } | |||
| cv::Mat image = input_cv->mat(); | |||
| // Separate the image to channels | |||
| @@ -802,7 +829,7 @@ Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor | |||
| (*output) = std::static_pointer_cast<Tensor>(output_cv); | |||
| (*output)->Reshape(input->shape()); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in auto contrast: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("AutoContrast: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -812,11 +839,13 @@ Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| cv::Mat input_img = input_cv->mat(); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("AdjustSaturation: load image failed."); | |||
| } | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->Rank() != 3 || num_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "AdjustSaturation: image shape is incorrect: " | |||
| "number of channels does not equal 3"); | |||
| } | |||
| std::shared_ptr<CVTensor> output_cv; | |||
| RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv)); | |||
| @@ -827,25 +856,24 @@ Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te | |||
| output_cv->mat() = output_img * (1.0 - alpha) + input_img * alpha; | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in adjust saturation: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("AdjustSaturation: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &hue) { | |||
| if (hue > 0.5 || hue < -0.5) { | |||
| MS_LOG(ERROR) << "Hue factor is not in [-0.5, 0.5]."; | |||
| RETURN_STATUS_UNEXPECTED("hue_factor is not in [-0.5, 0.5]."); | |||
| RETURN_STATUS_UNEXPECTED("AdjustHue: hue value is not in [-0.5, 0.5]."); | |||
| } | |||
| try { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| cv::Mat input_img = input_cv->mat(); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("AdjustHue: load image failed."); | |||
| } | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->Rank() != 3 || num_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); | |||
| RETURN_STATUS_UNEXPECTED("AdjustHue: number of channels does not equal 3"); | |||
| } | |||
| std::shared_ptr<CVTensor> output_cv; | |||
| RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv)); | |||
| @@ -863,7 +891,7 @@ Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| cv::cvtColor(output_img, output_cv->mat(), CV_HSV2RGB_FULL); | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in adjust hue: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("AdjustHue: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -872,10 +900,10 @@ Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o | |||
| try { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Equalize: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3 && input_cv->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Shape not <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("Equalize: image shape is not <H,W,C> or <H,W>"); | |||
| } | |||
| // For greyscale images, extend dimension if rank is 2 and reshape output to be of rank 2. | |||
| if (input_cv->Rank() == 2) { | |||
| @@ -884,7 +912,7 @@ Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o | |||
| // Get number of channels and image matrix | |||
| std::size_t num_of_channels = input_cv->shape()[2]; | |||
| if (num_of_channels != 1 && num_of_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Number of channels is not 1 or 3."); | |||
| RETURN_STATUS_UNEXPECTED("Equalize: number of channels is not 1 or 3."); | |||
| } | |||
| cv::Mat image = input_cv->mat(); | |||
| // Separate the image to channels | |||
| @@ -904,7 +932,7 @@ Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o | |||
| (*output) = std::static_pointer_cast<Tensor>(output_cv); | |||
| (*output)->Reshape(input->shape()); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in equalize: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Equalize: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -915,15 +943,18 @@ Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outp | |||
| try { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->mat().data == nullptr || input_cv->Rank() != 3 || num_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("bad CV Tensor input for erase"); | |||
| if (input_cv->mat().data == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Erase: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3 || num_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Erase: number of channels is not 1 or 3."); | |||
| } | |||
| cv::Mat input_img = input_cv->mat(); | |||
| int32_t image_h = input_cv->shape()[0]; | |||
| int32_t image_w = input_cv->shape()[1]; | |||
| // check if erase size is bigger than image itself | |||
| if (box_height > image_h || box_width > image_w) { | |||
| RETURN_STATUS_UNEXPECTED("input box size too large for image erase"); | |||
| RETURN_STATUS_UNEXPECTED("Erase: box size is too large for image erase"); | |||
| } | |||
| // for random color | |||
| @@ -966,7 +997,7 @@ Status Erase(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outp | |||
| *output = std::static_pointer_cast<Tensor>(input); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in erasing: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Erase: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -994,7 +1025,7 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in pad: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("Pad: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -1003,7 +1034,10 @@ Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input)); | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->shape().Size() != 3 || num_channels != 4) { | |||
| std::string err_msg = "Number of channels does not equal 4, got : " + std::to_string(num_channels); | |||
| std::string err_msg = | |||
| "RgbaToRgb: Number of channels does not equal 4, " | |||
| "got : " + | |||
| std::to_string(num_channels); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3}); | |||
| @@ -1013,7 +1047,7 @@ Status RgbaToRgb(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in RgbaToRgb: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("RgbaToRgb: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -1022,7 +1056,10 @@ Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input)); | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->shape().Size() != 3 || num_channels != 4) { | |||
| std::string err_msg = "Number of channels does not equal 4, got : " + std::to_string(num_channels); | |||
| std::string err_msg = | |||
| "RgbaToBgr: number of channels does not equal 4, " | |||
| "got : " + | |||
| std::to_string(num_channels); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| TensorShape out_shape = TensorShape({input_cv->shape()[0], input_cv->shape()[1], 3}); | |||
| @@ -1032,7 +1069,7 @@ Status RgbaToBgr(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in RgbaToBgr: " + std::string(e.what())); | |||
| RETURN_STATUS_UNEXPECTED("RgbaToBgr: " + std::string(e.what())); | |||
| } | |||
| } | |||
| @@ -30,15 +30,15 @@ Status InvertOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| cv::Mat input_img = input_cv->mat(); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Invert: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Shape not <H,W,C>"); | |||
| RETURN_STATUS_UNEXPECTED("Invert: image shape is not <H,W,C>"); | |||
| } | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (num_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("The shape is incorrect: num of channels != 3"); | |||
| RETURN_STATUS_UNEXPECTED("Invert: image shape is incorrect: num of channels != 3"); | |||
| } | |||
| std::shared_ptr<CVTensor> output_cv; | |||
| RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv)); | |||
| @@ -49,7 +49,7 @@ Status InvertOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| } | |||
| catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in invert"); | |||
| RETURN_STATUS_UNEXPECTED("Invert: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -95,7 +95,7 @@ static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_sca | |||
| try { | |||
| num_lines_read = jpeg_read_scanlines(cinfo, &scanline_ptr, 1); | |||
| } catch (std::runtime_error &e) { | |||
| RETURN_STATUS_UNEXPECTED("jpeg_read_scanlines error."); | |||
| RETURN_STATUS_UNEXPECTED("Decode: jpeg_read_scanlines error."); | |||
| } | |||
| if (cinfo->out_color_space == JCS_CMYK && num_lines_read > 0) { | |||
| for (int i = 0; i < crop_w; ++i) { | |||
| @@ -122,11 +122,11 @@ static Status JpegReadScanlines(jpeg_decompress_struct *const cinfo, int max_sca | |||
| auto copy_status = memcpy_s(buffer, buffer_size, scanline_ptr + offset, stride); | |||
| if (copy_status != 0) { | |||
| jpeg_destroy_decompress(cinfo); | |||
| RETURN_STATUS_UNEXPECTED("memcpy failed"); | |||
| RETURN_STATUS_UNEXPECTED("Decode: memcpy_s failed"); | |||
| } | |||
| } else { | |||
| jpeg_destroy_decompress(cinfo); | |||
| std::string err_msg = "failed to read scanline"; | |||
| std::string err_msg = "Decode: failed to decompress image."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| buffer += stride; | |||
| @@ -150,7 +150,7 @@ static Status JpegSetColorSpace(jpeg_decompress_struct *cinfo) { | |||
| return Status::OK(); | |||
| default: | |||
| jpeg_destroy_decompress(cinfo); | |||
| std::string err_msg = "wrong number of components"; | |||
| std::string err_msg = "Decode: failed to decompress image."; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| } | |||
| @@ -185,7 +185,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| crop_h = cinfo.output_height; | |||
| } else if (crop_w == 0 || static_cast<unsigned int>(crop_w + crop_x) > cinfo.output_width || crop_h == 0 || | |||
| static_cast<unsigned int>(crop_h + crop_y) > cinfo.output_height) { | |||
| return DestroyDecompressAndReturnError("Crop window is not valid"); | |||
| return DestroyDecompressAndReturnError("Decode: invalid crop size"); | |||
| } | |||
| const int mcu_size = cinfo.min_DCT_scaled_size; | |||
| unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size; | |||
| @@ -231,26 +231,26 @@ Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| if (IsNonEmptyJPEG(input)) { | |||
| return JpegCropAndDecode(input, output); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Decode only supports jpeg for android"); | |||
| RETURN_STATUS_UNEXPECTED("Decode: Decode only supports jpeg for android"); | |||
| } | |||
| } | |||
| Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int x, int y, int w, int h) { | |||
| if (input->Rank() != 3 && input->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Shape not <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("Crop: image shape is not <H,W,C> or <H,W>"); | |||
| } | |||
| if (input->type() != DataType::DE_FLOAT32 && input->type() != DataType::DE_UINT8) { | |||
| RETURN_STATUS_UNEXPECTED("Only float32, uint8 support in Crop"); | |||
| RETURN_STATUS_UNEXPECTED("Crop: image datatype is not float32 or uint8"); | |||
| } | |||
| // account for integer overflow | |||
| if (y < 0 || (y + h) > input->shape()[0] || (y + h) < 0) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid y coordinate value for crop"); | |||
| RETURN_STATUS_UNEXPECTED("Crop: invalid y coordinate value for crop"); | |||
| } | |||
| // account for integer overflow | |||
| if (x < 0 || (x + w) > input->shape()[1] || (x + w) < 0) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid x coordinate value for crop"); | |||
| RETURN_STATUS_UNEXPECTED("Crop: invalid x coordinate value for crop"); | |||
| } | |||
| try { | |||
| @@ -277,12 +277,12 @@ Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu | |||
| lite_mat_cut.Init(w, h, lite_mat_rgb.channel_, reinterpret_cast<void *>(buffer), GetLiteCVDataType(input->type())); | |||
| bool ret = Crop(lite_mat_rgb, lite_mat_cut, x, y, w, h); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Crop failed in lite cv"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Crop: image crop failed."); | |||
| *output = output_tensor; | |||
| return Status::OK(); | |||
| } catch (std::runtime_error &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in crop."); | |||
| RETURN_STATUS_UNEXPECTED("Crop: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -310,21 +310,21 @@ Status GetJpegImageInfo(const std::shared_ptr<Tensor> &input, int *img_width, in | |||
| Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, | |||
| const std::shared_ptr<Tensor> &mean, const std::shared_ptr<Tensor> &std) { | |||
| if (input->Rank() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Input tensor rank isn't 3"); | |||
| RETURN_STATUS_UNEXPECTED("Normalize: image shape is not <H,W,C>."); | |||
| } | |||
| if (input->type() != DataType::DE_UINT8 && input->type() != DataType::DE_FLOAT32) { | |||
| RETURN_STATUS_UNEXPECTED("Only uint8, float32 support in Normalize"); | |||
| RETURN_STATUS_UNEXPECTED("Normalize: image datatype is not uint8 or float32."); | |||
| } | |||
| mean->Squeeze(); | |||
| if (mean->type() != DataType::DE_FLOAT32 || mean->Rank() != 1 || mean->shape()[0] != 3) { | |||
| std::string err_msg = "Mean tensor should be of size 3 and type float."; | |||
| std::string err_msg = "Normalize: mean should be of size 3 and type float."; | |||
| return Status(StatusCode::kShapeMisMatch, err_msg); | |||
| } | |||
| std->Squeeze(); | |||
| if (std->type() != DataType::DE_FLOAT32 || std->Rank() != 1 || std->shape()[0] != 3) { | |||
| std::string err_msg = "Std tensor should be of size 3 and type float."; | |||
| std::string err_msg = "Normalize: std should be of size 3 and type float."; | |||
| return Status(StatusCode::kShapeMisMatch, err_msg); | |||
| } | |||
| // convert mean, std back to vector | |||
| @@ -357,16 +357,16 @@ Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| LiteMat lite_mat_float; | |||
| // change input to float | |||
| ret = ConvertTo(lite_mat_rgb, lite_mat_float, 1.0); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Conversion of lite cv to float failed"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Normalize: convert to float datatype failed."); | |||
| ret = SubStractMeanNormalize(lite_mat_float, lite_mat_norm, vec_mean, vec_std); | |||
| } else { // float32 | |||
| ret = SubStractMeanNormalize(lite_mat_rgb, lite_mat_norm, vec_mean, vec_std); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Normalize in lite cv failed"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Normalize: normalize failed."); | |||
| *output = output_tensor; | |||
| } catch (std::runtime_error &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in normalize."); | |||
| RETURN_STATUS_UNEXPECTED("Normalize: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -374,16 +374,16 @@ Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||
| Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t output_height, | |||
| int32_t output_width, double fx, double fy, InterpolationMode mode) { | |||
| if (input->Rank() != 3 && input->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Input Tensor is not in shape of <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("Resize: input image is not in shape of <H,W,C> or <H,W>"); | |||
| } | |||
| if (input->type() != DataType::DE_UINT8) { | |||
| RETURN_STATUS_UNEXPECTED("Only uint8 support in Resize"); | |||
| RETURN_STATUS_UNEXPECTED("Resize: image datatype is not uint8."); | |||
| } | |||
| // resize image too large or too small | |||
| if (output_height == 0 || output_height > input->shape()[0] * 1000 || output_width == 0 || | |||
| output_width > input->shape()[1] * 1000) { | |||
| std::string err_msg = | |||
| "The resizing width or height 1) is too big, it's up to " | |||
| "Resize: the resizing width or height 1) is too big, it's up to " | |||
| "1000 times the original image; 2) can not be 0."; | |||
| return Status(StatusCode::kShapeMisMatch, err_msg); | |||
| } | |||
| @@ -412,11 +412,11 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| GetLiteCVDataType(input->type())); | |||
| bool ret = ResizeBilinear(lite_mat_rgb, lite_mat_resize, output_width, output_height); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Resize failed in lite cv"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Resize: bilinear resize failed."); | |||
| *output = output_tensor; | |||
| } catch (std::runtime_error &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in image resize."); | |||
| RETURN_STATUS_UNEXPECTED("Resize: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -425,15 +425,17 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output | |||
| const int32_t &pad_bottom, const int32_t &pad_left, const int32_t &pad_right, const BorderType &border_types, | |||
| uint8_t fill_r, uint8_t fill_g, uint8_t fill_b) { | |||
| if (input->Rank() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Input Tensor is not in shape of <H,W,C>"); | |||
| RETURN_STATUS_UNEXPECTED("Pad: input image is not in shape of <H,W,C>"); | |||
| } | |||
| if (input->type() != DataType::DE_FLOAT32 && input->type() != DataType::DE_UINT8) { | |||
| RETURN_STATUS_UNEXPECTED("Only float32, uint8 support in Pad"); | |||
| RETURN_STATUS_UNEXPECTED("Pad: image datatype is not uint8 or float32."); | |||
| } | |||
| if (pad_top < 0 || pad_bottom < 0 || pad_left < 0 || pad_right < 0) { | |||
| RETURN_STATUS_UNEXPECTED("The pad, top, bottom, left, right must be greater than 0"); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Pad: " | |||
| "the top, bottom, left, right of pad must be greater than 0."); | |||
| } | |||
| try { | |||
| @@ -456,11 +458,11 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output | |||
| bool ret = Pad(lite_mat_rgb, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right, | |||
| PaddBorderType::PADD_BORDER_CONSTANT, fill_r, fill_g, fill_b); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Pad failed in lite cv"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Pad: pad failed."); | |||
| *output = output_tensor; | |||
| } catch (std::runtime_error &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in image Pad."); | |||
| RETURN_STATUS_UNEXPECTED("Pad: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -518,11 +520,11 @@ static Status RotateAngleWithOutMirror(const std::shared_ptr<Tensor> &input, std | |||
| GetLiteCVDataType(input->type())); | |||
| bool ret = Affine(lite_mat_rgb, lite_mat_affine, M, dsize, UINT8_C3(0, 0, 0)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Rotate failed in lite cv"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Rotate: rotate failed."); | |||
| *output = output_tensor; | |||
| } catch (std::runtime_error &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in image Rotate."); | |||
| RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -588,11 +590,11 @@ static Status RotateAngleWithMirror(const std::shared_ptr<Tensor> &input, std::s | |||
| GetLiteCVDataType(input->type())); | |||
| bool ret = Affine(lite_mat_rgb, lite_mat_affine, M, dsize, UINT8_C3(0, 0, 0)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Rotate failed in lite cv"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret, "Rotate: rotate failed."); | |||
| *output = output_tensor; | |||
| } catch (std::runtime_error &e) { | |||
| RETURN_STATUS_UNEXPECTED("Error in image Rotate."); | |||
| RETURN_STATUS_UNEXPECTED("Rotate: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -606,11 +608,11 @@ static bool IsMirror(int orientation) { | |||
| // rotate the image by EXIF orientation | |||
| Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const uint64_t orientation) { | |||
| if (input->Rank() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Input Tensor is not in shape of <H,W,C>"); | |||
| RETURN_STATUS_UNEXPECTED("Rotate: input image is not in shape of <H,W,C>"); | |||
| } | |||
| if (input->type() != DataType::DE_FLOAT32 && input->type() != DataType::DE_UINT8) { | |||
| RETURN_STATUS_UNEXPECTED("Only float32, uint8 support in Pad"); | |||
| RETURN_STATUS_UNEXPECTED("Rotate: image datatype is not float32 or uint8."); | |||
| } | |||
| if (!IsMirror(orientation)) { | |||
| @@ -55,7 +55,7 @@ Status ComputeUpperAndLowerPercentiles(std::vector<int32_t> *hist, int32_t hi_p, | |||
| } | |||
| } catch (const std::exception &e) { | |||
| const char *err_msg = e.what(); | |||
| std::string err_message = "Error in ComputeUpperAndLowerPercentiles: "; | |||
| std::string err_message = "AutoContrast: ComputeUpperAndLowerPercentiles failed: "; | |||
| err_message += err_msg; | |||
| RETURN_STATUS_UNEXPECTED(err_message); | |||
| } | |||
| @@ -73,7 +73,7 @@ Status GenerateRealNumber(float_t a, float_t b, std::mt19937 *rnd, float_t *resu | |||
| *result = distribution(*rnd); | |||
| } catch (const std::exception &e) { | |||
| const char *err_msg = e.what(); | |||
| std::string err_message = "Error in GenerateRealNumber: "; | |||
| std::string err_message = "RandomAffine: GenerateRealNumber failed: "; | |||
| err_message += err_msg; | |||
| RETURN_STATUS_UNEXPECTED(err_message); | |||
| } | |||
| @@ -29,7 +29,7 @@ MixUpBatchOp::MixUpBatchOp(float alpha) : alpha_(alpha) { rnd_.seed(GetSeed()); | |||
| Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| if (input.size() < 2) { | |||
| RETURN_STATUS_UNEXPECTED("Both images and labels columns are required for this operation."); | |||
| RETURN_STATUS_UNEXPECTED("MixUpBatch: input lack of images or labels"); | |||
| } | |||
| std::vector<std::shared_ptr<CVTensor>> images; | |||
| @@ -39,19 +39,23 @@ Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| // Check inputs | |||
| if (image_shape.size() != 4 || image_shape[0] != label_shape[0]) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "MixUpBatch:You must make sure images are HWC or CHW and batched before calling MixUpBatch."); | |||
| "MixUpBatch: " | |||
| "please make sure images are HWC or CHW and batched before calling MixUpBatch."); | |||
| } | |||
| if (!input.at(1)->type().IsInt()) { | |||
| RETURN_STATUS_UNEXPECTED("MixUpBatch: Wrong labels type. The second column (labels) must only include int types."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "MixUpBatch: wrong labels type. " | |||
| "The second column (labels) must only include int types."); | |||
| } | |||
| if (label_shape.size() != 2 && label_shape.size() != 3) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC where N is the batch " | |||
| "size, L is the number of labels in each row, " | |||
| "and C is the number of classes. labels must be in one-hot format and in a batch."); | |||
| "MixUpBatch: wrong labels shape. " | |||
| "The second column (labels) must have a shape of NC or NLC where N is the batch size, " | |||
| "L is the number of labels in each row, and C is the number of classes. " | |||
| "labels must be in one-hot format and in a batch."); | |||
| } | |||
| if ((image_shape[1] != 1 && image_shape[1] != 3) && (image_shape[3] != 1 && image_shape[3] != 3)) { | |||
| RETURN_STATUS_UNEXPECTED("MixUpBatch: Images must be in the shape of HWC or CHW."); | |||
| RETURN_STATUS_UNEXPECTED("MixUpBatch: images must be in the shape of HWC or CHW."); | |||
| } | |||
| // Move images into a vector of CVTensors | |||
| @@ -107,7 +111,7 @@ Status MixUpBatchOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| input.at(0)->type(), start_addr_of_index, &out)); | |||
| std::shared_ptr<CVTensor> rand_image = CVTensor::AsCVTensor(std::move(out)); | |||
| if (!rand_image->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("MixUpBatch: allocate memory failed."); | |||
| } | |||
| images[i]->mat() = lam * images[i]->mat() + (1 - lam) * rand_image->mat(); | |||
| } | |||
| @@ -29,11 +29,11 @@ namespace dataset { | |||
| NormalizeOp::NormalizeOp(float mean_r, float mean_g, float mean_b, float std_r, float std_g, float std_b) { | |||
| Status s = Tensor::CreateFromVector<float>({mean_r, mean_g, mean_b}, &mean_); | |||
| if (s.IsError()) { | |||
| MS_LOG(ERROR) << "Could not create mean tensor."; | |||
| MS_LOG(ERROR) << "Normalize: invalid mean value."; | |||
| } | |||
| s = Tensor::CreateFromVector<float>({std_r, std_g, std_b}, &std_); | |||
| if (s.IsError()) { | |||
| MS_LOG(ERROR) << "Could not create std tensor."; | |||
| MS_LOG(ERROR) << "Normalize: invalid std value."; | |||
| } | |||
| } | |||
| @@ -26,11 +26,11 @@ NormalizePadOp::NormalizePadOp(float mean_r, float mean_g, float mean_b, float s | |||
| std::string dtype) { | |||
| Status s = Tensor::CreateFromVector<float>({mean_r, mean_g, mean_b}, &mean_); | |||
| if (s.IsError()) { | |||
| MS_LOG(ERROR) << "Could not create mean tensor."; | |||
| MS_LOG(ERROR) << "NormalizePad: invalid mean value."; | |||
| } | |||
| s = Tensor::CreateFromVector<float>({std_r, std_g, std_b}, &std_); | |||
| if (s.IsError()) { | |||
| MS_LOG(ERROR) << "Could not create std tensor."; | |||
| MS_LOG(ERROR) << "NormalizePad: invalid std value."; | |||
| } | |||
| dtype_ = dtype; | |||
| } | |||
| @@ -48,7 +48,7 @@ Status PadOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<Te | |||
| TensorShape out({-1, -1, 3}); // we don't know what is output image size, but we know it should be 3 channels | |||
| if (inputs[0].Rank() == 1) outputs.emplace_back(out); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "Pad: invalid input shape"); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -29,10 +29,10 @@ Status PosterizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| uint8_t mask_value = ~((uint8_t)(1 << (8 - bit_)) - 1); | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Posterize: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3 && input_cv->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Input Tensor is not in shape of <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("Posterize: input image is not in shape of <H,W,C> or <H,W>"); | |||
| } | |||
| std::vector<uint8_t> lut_vector; | |||
| for (std::size_t i = 0; i < 256; i++) { | |||
| @@ -41,7 +41,9 @@ Status PosterizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| cv::Mat in_image = input_cv->mat(); | |||
| cv::Mat output_img; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(in_image.depth() == CV_8U || in_image.depth() == CV_8S, | |||
| "Input image data type can not be float, but got " + input->type().ToString()); | |||
| "Posterize: input image data type can not be float, " | |||
| "but got " + | |||
| input->type().ToString()); | |||
| cv::LUT(in_image, lut_vector, output_img); | |||
| std::shared_ptr<CVTensor> result_tensor; | |||
| RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &result_tensor)); | |||
| @@ -26,7 +26,7 @@ RandomColorOp::RandomColorOp(float t_lb, float t_ub) : rnd_(GetSeed()), dist_(t_ | |||
| Status RandomColorOp::Compute(const std::shared_ptr<Tensor> &in, std::shared_ptr<Tensor> *out) { | |||
| IO_CHECK(in, out); | |||
| if (in->Rank() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("image must have 3 channels"); | |||
| RETURN_STATUS_UNEXPECTED("RandomColor: image must have 3 channels"); | |||
| } | |||
| // 0.5 pixel precision assuming an 8 bit image | |||
| const auto eps = 0.00195; | |||
| @@ -46,7 +46,7 @@ RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t targ | |||
| Status RandomCropAndResizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 2, "The shape of input is abnormal"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 2, "RandomCropAndResize: the image is not <H,W,C> or <H,W>"); | |||
| int h_in = input->shape()[0]; | |||
| int w_in = input->shape()[1]; | |||
| @@ -64,14 +64,14 @@ Status RandomCropAndResizeOp::OutputShape(const std::vector<TensorShape> &inputs | |||
| if (inputs[0].Rank() == 2) outputs.emplace_back(out); | |||
| if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "RandomCropAndResize: invalid input shape"); | |||
| } | |||
| Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) { | |||
| *crop_width = w_in; | |||
| *crop_height = h_in; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "Width is 0"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(h_in != 0, "Height is 0"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(aspect_lb_ > 0, "Aspect lower bound must be greater than zero"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(w_in != 0, "RandomCropAndResize: Width is 0"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(h_in != 0, "RandomCropAndResize: Height is 0"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(aspect_lb_ > 0, "RandomCropAndResize: aspect lower bound must be greater than zero"); | |||
| for (int32_t i = 0; i < max_iter_; i++) { | |||
| double const sample_scale = rnd_scale_(rnd_); | |||
| // In case of non-symmetrical aspect ratios, use uniform distribution on a logarithmic sample_scale. | |||
| @@ -29,7 +29,8 @@ namespace dataset { | |||
| Status RandomCropAndResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| RETURN_IF_NOT_OK(BoundingBox::ValidateBoundingBoxes(input)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Size() >= 2, "The shape of input is not >= 2"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Size() >= 2, | |||
| "RandomCropAndResizeWithBBox: image shape is not <H,W,C> or <H,W>."); | |||
| output->resize(2); | |||
| (*output)[1] = std::move(input[1]); // move boxes over to output | |||
| @@ -29,7 +29,7 @@ RandomCropDecodeResizeOp::RandomCropDecodeResizeOp(int32_t target_height, int32_ | |||
| Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| if (input == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("input tensor is null"); | |||
| RETURN_STATUS_UNEXPECTED("RandomCropDecodeResize: input image is empty."); | |||
| } | |||
| if (!IsNonEmptyJPEG(input)) { | |||
| DecodeOp op(true); | |||
| @@ -60,11 +60,11 @@ Status RandomCropOp::ImagePadding(const std::shared_ptr<Tensor> &input, std::sha | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pad_top_ < input->shape()[0] * 3 && pad_bottom_ < input->shape()[0] * 3 && | |||
| pad_left_ < input->shape()[1] * 3 && pad_right_ < input->shape()[1] * 3, | |||
| "RandomCropBBoxOp padding size is too big, it's more than 3 times the original size."); | |||
| "RandomCrop: padding size is too big, it's more than 3 times the original size."); | |||
| RETURN_IF_NOT_OK( | |||
| Pad(input, pad_image, pad_top_, pad_bottom_, pad_left_, pad_right_, border_type_, fill_r_, fill_g_, fill_b_)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((*pad_image)->shape().Size() >= 2, "Abnormal shape"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((*pad_image)->shape().Size() >= 2, "RandomCrop: invalid shape of image after pad."); | |||
| *padded_image_h = (*pad_image)->shape()[0]; | |||
| *padded_image_w = (*pad_image)->shape()[1]; | |||
| @@ -95,7 +95,7 @@ Status RandomCropOp::ImagePadding(const std::shared_ptr<Tensor> &input, std::sha | |||
| if (*padded_image_h < crop_height_ || *padded_image_w < crop_width_ || crop_height_ == 0 || crop_width_ == 0) { | |||
| return Status(StatusCode::kShapeMisMatch, __LINE__, __FILE__, | |||
| "Crop size is greater than the image dimensions or is zero."); | |||
| "RandomCrop: crop size is greater than the image dimensions or is zero."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -136,7 +136,7 @@ Status RandomCropOp::OutputShape(const std::vector<TensorShape> &inputs, std::ve | |||
| if (inputs[0].Rank() == 2) outputs.emplace_back(out); | |||
| if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "RandomCrop: invalid input shape."); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -77,7 +77,7 @@ Status RandomRotationOp::OutputShape(const std::vector<TensorShape> &inputs, std | |||
| if (inputs[0].Rank() == 2) outputs.emplace_back(out); | |||
| if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "RandomRotation: invalid input shape."); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -27,7 +27,10 @@ namespace dataset { | |||
| Status RandomSelectSubpolicyOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| TensorRow in_row = input; | |||
| size_t rand_num = rand_int_(gen_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(rand_num < policy_.size(), "invalid rand_num:" + std::to_string(rand_num)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(rand_num < policy_.size(), | |||
| "RandomSelectSubpolicy: " | |||
| "get rand number failed:" + | |||
| std::to_string(rand_num)); | |||
| for (auto &sub : policy_[rand_num]) { | |||
| if (rand_double_(gen_) <= sub.second) { | |||
| RETURN_IF_NOT_OK(sub.first->Compute(in_row, output)); | |||
| @@ -88,7 +91,7 @@ Status RandomSelectSubpolicyOp::OutputType(const std::vector<DataType> &inputs, | |||
| RandomSelectSubpolicyOp::RandomSelectSubpolicyOp(const std::vector<Subpolicy> &policy) | |||
| : gen_(GetSeed()), policy_(policy), rand_int_(0, policy.size() - 1), rand_double_(0, 1) { | |||
| if (policy_.empty()) { | |||
| MS_LOG(ERROR) << "policy in RandomSelectSubpolicyOp is empty."; | |||
| MS_LOG(ERROR) << "RandomSelectSubpolicy: policy in RandomSelectSubpolicyOp is empty."; | |||
| } | |||
| is_deterministic_ = false; | |||
| } | |||
| @@ -30,7 +30,7 @@ Status RandomSolarizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shar | |||
| uint8_t threshold_min_ = threshold_[0], threshold_max_ = threshold_[1]; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(threshold_min_ <= threshold_max_, | |||
| "threshold_min must be smaller or equal to threshold_max."); | |||
| "RandomSolarize: min of threshold is greater than max of threshold."); | |||
| uint8_t threshold_min = std::uniform_int_distribution(threshold_min_, threshold_max_)(rnd_); | |||
| uint8_t threshold_max = std::uniform_int_distribution(threshold_min_, threshold_max_)(rnd_); | |||
| @@ -29,18 +29,19 @@ const InterpolationMode ResizeOp::kDefInterpolation = InterpolationMode::kLinear | |||
| Status ResizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 2, "The shape size " + std::to_string(input->shape().Size()) + | |||
| " of input tensor is invalid"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| input->shape().Size() >= 2, | |||
| "Resize: image shape " + std::to_string(input->shape().Size()) + " is not <H,W,C> or <H,W>."); | |||
| int32_t output_h, output_w = 0; | |||
| int32_t input_h = static_cast<int>(input->shape()[0]); | |||
| int32_t input_w = static_cast<int>(input->shape()[1]); | |||
| if (size2_ == 0) { | |||
| if (input_h < input_w) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input_h != 0, "The input height is 0"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input_h != 0, "Resize: the input height is 0."); | |||
| output_h = size1_; | |||
| output_w = static_cast<int>(std::lround(static_cast<float>(input_w) / input_h * output_h)); | |||
| } else { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input_w != 0, "The input width is 0"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input_w != 0, "Resize: the input width is 0."); | |||
| output_w = size1_; | |||
| output_h = static_cast<int>(std::lround(static_cast<float>(input_h) / input_w * output_w)); | |||
| } | |||
| @@ -65,7 +66,7 @@ Status ResizeOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector | |||
| if (inputs[0].Rank() == 2) outputs.emplace_back(out); | |||
| if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); | |||
| if (!outputs.empty()) return Status::OK(); | |||
| return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); | |||
| return Status(StatusCode::kUnexpectedError, "Resize: invalid input wrong shape."); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -26,8 +26,9 @@ RotateOp::RotateOp(int angle_id) : angle_id_(angle_id) {} | |||
| Status RotateOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 2, "The shape size " + std::to_string(input->shape().Size()) + | |||
| " of input tensor is invalid"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| input->shape().Size() >= 2, | |||
| "Rotate: image shape " + std::to_string(input->shape().Size()) + " is not <H,W,C> or <H,W>."); | |||
| #ifdef ENABLE_ANDROID | |||
| Rotate(input, output, angle_id_); | |||
| #endif | |||
| @@ -31,17 +31,17 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| cv::Mat input_img = input_cv->mat(); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Sharpness: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 3 && input_cv->Rank() != 2) { | |||
| RETURN_STATUS_UNEXPECTED("Shape not <H,W,C> or <H,W>"); | |||
| RETURN_STATUS_UNEXPECTED("Sharpness: image shape is not <H,W,C> or <H,W>"); | |||
| } | |||
| /// Get number of channels and image matrix | |||
| std::size_t num_of_channels = input_cv->shape()[2]; | |||
| if (num_of_channels != 1 && num_of_channels != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Number of channels is not 1 or 3."); | |||
| RETURN_STATUS_UNEXPECTED("Sharpness: number of channels is not 1 or 3."); | |||
| } | |||
| /// creating a smoothing filter. 1, 1, 1, | |||
| @@ -76,7 +76,7 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| } | |||
| catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("OpenCV error in random sharpness"); | |||
| RETURN_STATUS_UNEXPECTED("Sharpness: " + std::string(e.what())); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -30,22 +30,22 @@ Status SolarizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr | |||
| uint8_t threshold_min_ = threshold_[0], threshold_max_ = threshold_[1]; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(threshold_min_ <= threshold_max_, | |||
| "threshold_min must be smaller or equal to threshold_max."); | |||
| "Solarize: threshold_min must be smaller or equal to threshold_max."); | |||
| try { | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input); | |||
| cv::Mat input_img = input_cv->mat(); | |||
| if (!input_cv->mat().data) { | |||
| RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); | |||
| RETURN_STATUS_UNEXPECTED("Solarize: load image failed."); | |||
| } | |||
| if (input_cv->Rank() != 2 && input_cv->Rank() != 3) { | |||
| RETURN_STATUS_UNEXPECTED("Shape not of either <H,W,C> or <H,W> format."); | |||
| RETURN_STATUS_UNEXPECTED("Solarize: image shape is not of either <H,W,C> or <H,W>."); | |||
| } | |||
| if (input_cv->Rank() == 3) { | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (num_channels != 3 && num_channels != 1) { | |||
| RETURN_STATUS_UNEXPECTED("Number of channels is not 1 or 3."); | |||
| RETURN_STATUS_UNEXPECTED("Solarize: number of channels is not 1 or 3."); | |||
| } | |||
| } | |||
| @@ -73,7 +73,7 @@ Status SolarizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr | |||
| catch (const cv::Exception &e) { | |||
| const char *cv_err_msg = e.what(); | |||
| std::string err_message = "Error in SolarizeOp: "; | |||
| std::string err_message = "Solarize: "; | |||
| err_message += cv_err_msg; | |||
| RETURN_STATUS_UNEXPECTED(err_message); | |||
| } | |||
| @@ -81,8 +81,6 @@ Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| } | |||
| } | |||
| } catch (const py::error_already_set &e) { | |||
| MS_LOG(ERROR) << "Pyfunc error, " << e.what() << ". Under sink mode, progress will late exit after 30s " | |||
| << "for resource release and thread safe"; | |||
| ret = Status(StatusCode::kPyFuncException, e.what()); | |||
| } | |||
| } | |||
| @@ -79,11 +79,11 @@ BasicTokenizerOp::BasicTokenizerOp(const bool &lower_case, const bool &keep_whit | |||
| Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text, | |||
| const std::unordered_set<std::string> &unused_words, | |||
| std::string *outupt) { | |||
| std::string *output) { | |||
| icu::ErrorCode error; | |||
| const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCCasefoldInstance failed."); | |||
| outupt->clear(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "BasicTokenizer: getNFKCCasefoldInstance failed."); | |||
| output->clear(); | |||
| // 1. get start and end offsets of not case fold strs | |||
| std::queue<std::pair<int, int>> offsets; // offsets of not used words | |||
| @@ -123,7 +123,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text | |||
| std::string temp; | |||
| icu::StringByteSink<std::string> sink(&temp); | |||
| nfkc_case_fold->normalizeUTF8(0, icu::StringPiece(process_text.data(), process_text.size()), sink, nullptr, error); | |||
| *outupt += temp + preserve_token; | |||
| *output += temp + preserve_token; | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -131,7 +131,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text | |||
| Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor> &input, | |||
| std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "BasicTokenizer: input is not string datatype."); | |||
| std::vector<std::string> strs(input->Size()); | |||
| int i = 0; | |||
| for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) { | |||
| @@ -142,9 +142,9 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor | |||
| Status BasicTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "BasicTokenizer: input only support one column data."); | |||
| if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); | |||
| RETURN_STATUS_UNEXPECTED("BasicTokenizer: the input should be scalar with string datatype"); | |||
| } | |||
| std::shared_ptr<Tensor> cur_input; | |||
| std::shared_ptr<Tensor> processed_tensor; | |||
| @@ -29,16 +29,16 @@ namespace dataset { | |||
| Status CaseFoldOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "CaseFold: input is not string datatype."); | |||
| icu::ErrorCode error; | |||
| const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCCasefoldInstance failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "CaseFold: getNFKCCasefoldInstance failed."); | |||
| std::vector<std::string> strs(input->Size()); | |||
| int i = 0; | |||
| for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) { | |||
| icu::StringByteSink<std::string> sink(&strs[i++]); | |||
| nfkc_case_fold->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "CaseFold: normalizeUTF8 failed."); | |||
| } | |||
| return Tensor::CreateFromVector(strs, input->shape(), output); | |||
| } | |||
| @@ -33,11 +33,11 @@ JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::strin | |||
| Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "JiebaTokenizer: input only support one column data."); | |||
| RETURN_UNEXPECTED_IF_NULL(jieba_parser_); | |||
| if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor."); | |||
| RETURN_STATUS_UNEXPECTED("JiebaTokenizer: the input should be scalar with string datatype."); | |||
| } | |||
| std::string_view sentence_v; | |||
| @@ -83,7 +83,7 @@ Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| Status JiebaTokenizerOp::AddWord(const std::string &word, int freq) { | |||
| RETURN_UNEXPECTED_IF_NULL(jieba_parser_); | |||
| if (jieba_parser_->InsertUserWord(word, freq, "") == false) { | |||
| return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "add word error"); | |||
| return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "AddWord: add word failed."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -27,23 +27,24 @@ LookupOp::LookupOp(std::shared_ptr<Vocab> vocab, WordIdType default_id, const Da | |||
| Status LookupOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| RETURN_UNEXPECTED_IF_NULL(vocab_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "None string tensor received."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Lookup: input is not string datatype."); | |||
| std::vector<WordIdType> word_ids; | |||
| word_ids.reserve(input->Size()); | |||
| for (auto itr = input->begin<std::string_view>(); itr != input->end<std::string_view>(); itr++) { | |||
| WordIdType word_id = vocab_->Lookup(std::string(*itr)); | |||
| word_ids.emplace_back(word_id == Vocab::kNoTokenExists ? default_id_ : word_id); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| word_ids.back() != Vocab::kNoTokenExists, | |||
| "Invalid data, token: \"" + std::string(*itr) + "\" doesn't exist in vocab and no unknown token is specified."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(word_ids.back() != Vocab::kNoTokenExists, | |||
| "Lookup: invalid data, token: \"" + std::string(*itr) + | |||
| "\" doesn't exist in vocab and no unknown token is specified."); | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(word_ids, input->shape(), output)); | |||
| // type cast to user's requirements if what user wants isn't int32_t | |||
| if ((*output)->type() != type_) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(type_.IsNumeric(), | |||
| "Lookup doesn't support string to string lookup. data_type needs to be numeric"); | |||
| "Lookup: Lookup doesn't support string to string lookup. " | |||
| "data_type needs to be numeric"); | |||
| std::shared_ptr<Tensor> cast_to; | |||
| RETURN_IF_NOT_OK(TypeCast(*output, &cast_to, type_)); | |||
| *output = cast_to; | |||
| @@ -35,7 +35,8 @@ NgramOp::NgramOp(const std::vector<int32_t> &ngrams, int32_t l_len, int32_t r_le | |||
| Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, | |||
| "Ngram: input is not a 1D data with string datatype."); | |||
| std::vector<int32_t> offsets; // offsets for each str | |||
| std::vector<std::string> res; // holds the result of ngrams | |||
| std::string str_buffer; // concat all pad tokens with string interleaved with separators | |||
| @@ -54,13 +55,13 @@ Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te | |||
| for (int i = 0; i < r_len_; i++) offsets.push_back((str_buffer += r_pad_with_sp_).size()); | |||
| for (auto n : ngrams_) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "n gram needs to be a positive number.\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "Ngram: ngrams needs to be a positive number.\n"); | |||
| int32_t start_ind = l_len_ - std::min(l_len_, n - 1); | |||
| int32_t end_ind = offsets.size() - r_len_ + std::min(r_len_, n - 1); | |||
| if (end_ind - start_ind <= n) { | |||
| res.emplace_back(std::string()); // push back empty string | |||
| } else { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Ngram: get offsets failed."); | |||
| for (int i = start_ind; i < end_ind - n; i++) { | |||
| res.emplace_back(str_buffer.substr(offsets[i], offsets[i + n] - offsets[i] - separator_.size())); | |||
| @@ -79,8 +80,8 @@ void NgramOp::Print(std::ostream &out) const { | |||
| } | |||
| Status NgramOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() == NumInput(), "incorrect num of inputs\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs[0].Rank() == 1, "ngram only works with 1-dim data\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() == NumInput(), "Ngram: incorrect num of inputs\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs[0].Rank() == 1, "Ngram: ngram only works with 1-dim data\n"); | |||
| dsize_t num_elements = ngrams_.size(); | |||
| for (int32_t n : ngrams_) { | |||
| // here since rank == 1, NumOfElements == shape[0]. add padding length to string | |||
| @@ -89,7 +90,7 @@ Status NgramOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector< | |||
| num_elements += std::max(len_with_padding - n, 0); | |||
| } | |||
| outputs.emplace_back(TensorShape({num_elements})); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(outputs.size() == NumOutput(), "incorrect num of outputs\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(outputs.size() == NumOutput(), "Ngram: incorrect num of outputs\n"); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| @@ -29,7 +29,7 @@ namespace dataset { | |||
| const NormalizeForm NormalizeUTF8Op::kDefNormalizeForm = NormalizeForm::kNfkc; | |||
| Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "NormalizeUTF8: input is not string datatype."); | |||
| icu::ErrorCode error; | |||
| const icu::Normalizer2 *normalize = nullptr; | |||
| @@ -40,26 +40,26 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share | |||
| } | |||
| case NormalizeForm::kNfc: { | |||
| normalize = icu::Normalizer2::getNFCInstance(error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8: getNFCInstance failed."); | |||
| break; | |||
| } | |||
| case NormalizeForm::kNfkc: { | |||
| normalize = icu::Normalizer2::getNFKCInstance(error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8: getNFKCInstance failed."); | |||
| break; | |||
| } | |||
| case NormalizeForm::kNfd: { | |||
| normalize = icu::Normalizer2::getNFDInstance(error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8: getNFDInstance failed."); | |||
| break; | |||
| } | |||
| case NormalizeForm::kNfkd: { | |||
| normalize = icu::Normalizer2::getNFKDInstance(error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8: getNFKDInstance failed."); | |||
| break; | |||
| } | |||
| default: { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected normalize form."); | |||
| RETURN_STATUS_UNEXPECTED("NormalizeUTF8: unknown normalize form."); | |||
| break; | |||
| } | |||
| } | |||
| @@ -68,7 +68,7 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share | |||
| for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) { | |||
| icu::StringByteSink<std::string> sink(&strs[i++]); | |||
| normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8 failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8: NormalizeUTF8 failed."); | |||
| } | |||
| return Tensor::CreateFromVector(strs, input->shape(), output); | |||
| } | |||
| @@ -25,7 +25,7 @@ namespace dataset { | |||
| Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text, | |||
| std::string *out) const { | |||
| CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "RegexReplace: icu init failed."); | |||
| UErrorCode icu_error = U_ZERO_ERROR; | |||
| icu::UnicodeString unicode_text = icu::UnicodeString::fromUTF8(text); | |||
| matcher->reset(unicode_text); | |||
| @@ -35,18 +35,19 @@ Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std: | |||
| } else { | |||
| unicode_out = matcher->replaceFirst(replace_, icu_error); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace: RegexReplace failed."); | |||
| unicode_out.toUTF8String(*out); | |||
| return Status::OK(); | |||
| } | |||
| Status RegexReplaceOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "RegexReplace: input is not string datatype."); | |||
| UErrorCode icu_error = U_ZERO_ERROR; | |||
| icu::RegexMatcher matcher(pattern_, 0, icu_error); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), | |||
| "Create icu RegexMatcher failed, you may input one error pattern."); | |||
| "RegexReplace: create icu RegexMatcher failed, " | |||
| "you may input one error pattern."); | |||
| std::vector<std::string> strs(input->Size()); | |||
| int i = 0; | |||
| for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) { | |||
| @@ -27,10 +27,11 @@ const bool RegexTokenizerOp::kDefWithOffsets = false; | |||
| Status RegexTokenizerOp::GetUnicodeSubstr(const icu::UnicodeString &input, const int &start, const int &len, | |||
| std::string *out_utf8, icu::UnicodeString *out_unicode) const { | |||
| CHECK_FAIL_RETURN_UNEXPECTED((out_utf8 != nullptr || out_unicode != nullptr), "Wrong input"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((out_utf8 != nullptr || out_unicode != nullptr), "RegexTokenizer: get token failed."); | |||
| int total_len = input.length(); | |||
| int end = start + len; | |||
| CHECK_FAIL_RETURN_UNEXPECTED((start >= 0 && len > 0 && end <= total_len), "Out of range"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((start >= 0 && len > 0 && end <= total_len), | |||
| "RegexTokenizer: token offsets is out of range"); | |||
| icu::UnicodeString temp; | |||
| input.extract(start, len, temp); | |||
| if (out_utf8 != nullptr) { | |||
| @@ -48,9 +49,11 @@ Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std | |||
| UErrorCode status = U_ZERO_ERROR; | |||
| out_tokens->clear(); | |||
| icu::RegexMatcher token_matcher(delim_pattern_, 0, status); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "Create icu RegexMatcher failed, you may input one error pattern"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), | |||
| "RegexTokenizer: create ICU RegexMatcher failed, you may input one error pattern"); | |||
| icu::RegexMatcher delim_matcher(keep_delim_pattern_, 0, status); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "Create icu RegexMatcher failed, you may input one error pattern"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), | |||
| "RegexTokenizer: create ICU RegexMatcher failed, you may input one error pattern"); | |||
| icu::UnicodeString utext(icu::UnicodeString::fromUTF8(text)); | |||
| token_matcher.reset(utext); | |||
| @@ -60,9 +63,9 @@ Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std | |||
| status = U_ZERO_ERROR; | |||
| while (token_matcher.find(status) && U_SUCCESS(status)) { | |||
| int deli_start_index = token_matcher.start(status); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "Get RegexMatcher matched start index failed"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "RegexTokenizer: get RegexMatcher matched start index failed"); | |||
| int deli_end_index = token_matcher.end(status); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "Get RegexMatcher matched start index failed"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(status), "RegexTokenizer: get RegexMatcher matched start index failed"); | |||
| // Add non-empty token | |||
| int token_len = deli_start_index - token_start_index; | |||
| @@ -109,9 +112,11 @@ Status RegexTokenizerOp::GetRegexTokens(const std::string &text, std::vector<std | |||
| Status RegexTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "RegexTokenizer: input should be one column data"); | |||
| if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "RegexTokenizer: the input shape should be scalar and " | |||
| "the input datatype should be string."); | |||
| } | |||
| std::string_view text; | |||
| std::vector<std::string> tokens; | |||
| @@ -30,7 +30,8 @@ SentencePieceTokenizerOp::SentencePieceTokenizerOp(const std::shared_ptr<Sentenc | |||
| : vocab_(vocab), load_type_(load_type), out_type_(out_type) { | |||
| auto status = processor_.LoadFromSerializedProto(vocab_.get()->model_proto()); | |||
| if (!status.ok()) { | |||
| model_status_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "parser vocab model filed."); | |||
| model_status_ = | |||
| Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "SentencePieceTokenizer: parser vocab model filed."); | |||
| } else { | |||
| model_status_ = Status::OK(); | |||
| } | |||
| @@ -43,7 +44,9 @@ SentencePieceTokenizerOp::SentencePieceTokenizerOp(const std::string &model_path | |||
| (void)GetModelRealPath(model_path, model_filename); | |||
| auto status = processor_.Load(file_path_); | |||
| if (!status.ok()) { | |||
| model_status_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "load vocab model filed."); | |||
| std::string err_msg = "SentencePieceTokenizer: "; | |||
| err_msg += "load vocab model file: " + file_path_ + " failed."; | |||
| model_status_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } else { | |||
| model_status_ = Status::OK(); | |||
| } | |||
| @@ -56,7 +59,8 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s | |||
| } | |||
| if (input->Rank() != 0 || input->type() != DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("Input tensor should be scalar string tensor."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "SentencePieceTokenizer: the input shape should be scalar and the input datatype should be string."); | |||
| } | |||
| std::string_view sentence_v; | |||
| @@ -67,14 +71,14 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s | |||
| std::vector<std::string> pieces; | |||
| auto status = processor_.Encode(sentence, &pieces); | |||
| if (!status.ok()) { | |||
| RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error."); | |||
| RETURN_STATUS_UNEXPECTED("SentencePieceTokenizer: Encode sentence failed."); | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(pieces, output)); | |||
| } else { | |||
| std::vector<int> ids; | |||
| auto status = processor_.Encode(sentence, &ids); | |||
| if (!status.ok()) { | |||
| RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error."); | |||
| RETURN_STATUS_UNEXPECTED("SentencePieceTokenizer: Encode sentence failed."); | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(ids, output)); | |||
| } | |||
| @@ -84,15 +88,20 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s | |||
| Status SentencePieceTokenizerOp::GetModelRealPath(const std::string &model_path, const std::string &filename) { | |||
| char real_path[PATH_MAX] = {0}; | |||
| if (file_path_.size() >= PATH_MAX) { | |||
| RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "SentencePieceTokenizer: Sentence piece model path is invalid for path length longer than 4096."); | |||
| } | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| if (_fullpath(real_path, common::SafeCStr(model_path), PATH_MAX) == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "SentencePieceTokenizer: Sentence piece model path is invalid for path length longer than 4096."); | |||
| } | |||
| #else | |||
| if (realpath(common::SafeCStr(model_path), real_path) == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "SentencePieceTokenizer: " | |||
| "Sentence piece model path: " + | |||
| model_path + " is not existed or permission denied."); | |||
| } | |||
| #endif | |||
| std::string abs_path = real_path; | |||
| @@ -19,8 +19,9 @@ namespace mindspore { | |||
| namespace dataset { | |||
| Status SlidingWindowOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| IO_CHECK(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, "SlidingWindosOp supports 1D Tensors only for now."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(axis_ == 0 || axis_ == -1, "axis supports 0 or -1 only for now."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Rank() == 1, | |||
| "SlidingWindow: SlidingWindow supports 1D input only for now."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(axis_ == 0 || axis_ == -1, "SlidingWindow: axis supports 0 or -1 only for now."); | |||
| std::vector<TensorShape> input_shape = {input->shape()}; | |||
| std::vector<TensorShape> output_shape = {TensorShape({})}; | |||
| @@ -31,7 +32,7 @@ Status SlidingWindowOp::Compute(const std::shared_ptr<Tensor> &input, std::share | |||
| } | |||
| Status SlidingWindowOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() == NumInput(), "incorrect num of inputs\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() == NumInput(), "SlidingWindow: incorrect number of inputs\n"); | |||
| int32_t axis = Tensor::HandleNeg(axis_, inputs[0].Size()); | |||
| TensorShape input_shape = inputs[0]; | |||
| std::vector<dsize_t> output_shape_initializer; | |||
| @@ -50,7 +51,7 @@ Status SlidingWindowOp::OutputShape(const std::vector<TensorShape> &inputs, std: | |||
| outputs.pop_back(); | |||
| outputs.emplace_back(TensorShape(output_shape_initializer)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(outputs.size() == NumOutput(), "incorrect num of outputs\n"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(outputs.size() == NumOutput(), "SlidingWindow: incorrect number of outputs\n"); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| @@ -37,7 +37,7 @@ ToNumberOp::ToNumberOp(const DataType &cast_to_type) : cast_to_type_(cast_to_typ | |||
| ToNumberOp::ToNumberOp(const std::string &cast_to_type) : cast_to_type_(DataType(cast_to_type)) {} | |||
| Status ToNumberOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tenosrs should have type string."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "ToNumber: input should be string datatype."); | |||
| switch (cast_to_type_.value()) { | |||
| case DataType::DE_INT8: | |||
| @@ -74,7 +74,10 @@ Status ToNumberOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr | |||
| RETURN_IF_NOT_OK(ToDouble(input, output)); | |||
| break; | |||
| default: | |||
| RETURN_STATUS_UNEXPECTED("Unsupported cast type: " + cast_to_type_.ToString()); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "ToNumber: " | |||
| "unsupported cast type: " + | |||
| cast_to_type_.ToString()); | |||
| } | |||
| return Status::OK(); | |||
| @@ -100,14 +103,18 @@ Status ToNumberOp::ToSignedIntegral(const std::shared_ptr<Tensor> &input, std::s | |||
| } catch (const std::out_of_range &) { | |||
| is_cast_out_of_range = true; | |||
| } catch (const std::invalid_argument &) { | |||
| RETURN_STATUS_UNEXPECTED("It is invalid to convert " + std::string(*it) + " to a number."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "ToNumber: " | |||
| "it is invalid to convert \"" + | |||
| std::string(*it) + "\" to a number."); | |||
| } | |||
| if (result > std::numeric_limits<T>::max() || result < std::numeric_limits<T>::min() || is_cast_out_of_range) { | |||
| std::string error_message = "String input " + std::string(*it) + " will be out of bounds if casted to " + | |||
| cast_to_type_.ToString() + ". The valid range is: [" + | |||
| std::to_string(std::numeric_limits<T>::min()) + ", " + | |||
| std::to_string(std::numeric_limits<T>::max()) + "]."; | |||
| std::string error_message = | |||
| "ToNumber: " | |||
| "string input " + | |||
| std::string(*it) + " will be out of bounds if cast to " + cast_to_type_.ToString() + ". The valid range is: [" + | |||
| std::to_string(std::numeric_limits<T>::min()) + ", " + std::to_string(std::numeric_limits<T>::max()) + "]."; | |||
| RETURN_STATUS_UNEXPECTED(error_message); | |||
| } | |||
| @@ -143,14 +150,18 @@ Status ToNumberOp::ToUnsignedIntegral(const std::shared_ptr<Tensor> &input, std: | |||
| } catch (const std::out_of_range &) { | |||
| is_cast_out_of_range = true; | |||
| } catch (const std::invalid_argument &) { | |||
| RETURN_STATUS_UNEXPECTED("It is invalid to convert " + std::string(*it) + " to an unsigned integer."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "ToNumber: " | |||
| "It is invalid to convert \"" + | |||
| std::string(*it) + "\" to an unsigned integer."); | |||
| } | |||
| if (result > std::numeric_limits<T>::max() || result < std::numeric_limits<T>::min() || is_cast_out_of_range) { | |||
| std::string error_message = "String input " + std::string(*it) + " will be out of bounds if casted to " + | |||
| cast_to_type_.ToString() + ". The valid range is: [" + | |||
| std::to_string(std::numeric_limits<T>::min()) + ", " + | |||
| std::to_string(std::numeric_limits<T>::max()) + "]."; | |||
| std::string error_message = | |||
| "ToNumber: " | |||
| "string input " + | |||
| std::string(*it) + " will be out of bounds if cast to " + cast_to_type_.ToString() + ". The valid range is: [" + | |||
| std::to_string(std::numeric_limits<T>::min()) + ", " + std::to_string(std::numeric_limits<T>::max()) + "]."; | |||
| RETURN_STATUS_UNEXPECTED(error_message); | |||
| } | |||
| @@ -185,15 +196,20 @@ Status ToNumberOp::ToFloat(const std::shared_ptr<Tensor> &input, std::shared_ptr | |||
| } catch (const std::out_of_range &) { | |||
| is_cast_out_of_range = true; | |||
| } catch (const std::invalid_argument &) { | |||
| RETURN_STATUS_UNEXPECTED("It is invalid to convert " + std::string(*it) + " to an unsigned integer."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "ToNumber: " | |||
| "it is invalid to convert \"" + | |||
| std::string(*it) + "\" to an unsigned integer."); | |||
| } | |||
| if (result > std::numeric_limits<float>::max() || result < std::numeric_limits<float>::lowest() || | |||
| is_cast_out_of_range) { | |||
| std::string error_message = "String input " + std::string(*it) + " will be out of bounds if casted to " + | |||
| cast_to_type_.ToString() + ". The valid range is: [" + | |||
| std::to_string(std::numeric_limits<float>::lowest()) + ", " + | |||
| std::to_string(std::numeric_limits<float>::max()) + "]."; | |||
| std::string error_message = | |||
| "ToNumber: " | |||
| "string input " + | |||
| std::string(*it) + " will be out of bounds if cast to " + cast_to_type_.ToString() + ". The valid range is: [" + | |||
| std::to_string(std::numeric_limits<float>::lowest()) + ", " + | |||
| std::to_string(std::numeric_limits<float>::max()) + "]."; | |||
| RETURN_STATUS_UNEXPECTED(error_message); | |||
| } | |||
| @@ -218,15 +234,20 @@ Status ToNumberOp::ToDouble(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| } catch (const std::out_of_range &) { | |||
| is_cast_out_of_range = true; | |||
| } catch (const std::invalid_argument &) { | |||
| RETURN_STATUS_UNEXPECTED("It is invalid to convert " + std::string(*it) + " to an unsigned integer."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "ToNumber: " | |||
| "it is invalid to convert \"" + | |||
| std::string(*it) + "\" to an unsigned integer."); | |||
| } | |||
| if (result > std::numeric_limits<double>::max() || result < std::numeric_limits<double>::lowest() || | |||
| is_cast_out_of_range) { | |||
| std::string error_message = "String input " + std::string(*it) + " will be out of bounds if casted to " + | |||
| cast_to_type_.ToString() + ". The valid range is: [" + | |||
| std::to_string(std::numeric_limits<double>::lowest()) + ", " + | |||
| std::to_string(std::numeric_limits<double>::max()) + "]."; | |||
| std::string error_message = | |||
| "ToNumber: " | |||
| "string input " + | |||
| std::string(*it) + " will be out of bounds if cast to " + cast_to_type_.ToString() + ". The valid range is: [" + | |||
| std::to_string(std::numeric_limits<double>::lowest()) + ", " + | |||
| std::to_string(std::numeric_limits<double>::max()) + "]."; | |||
| RETURN_STATUS_UNEXPECTED(error_message); | |||
| } | |||
| @@ -25,11 +25,11 @@ namespace dataset { | |||
| Status TruncateSequencePairOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 2, "Number of inputs should be two."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 2, "TruncateSequencePair: Expected two inputs."); | |||
| std::shared_ptr<Tensor> seq1 = input[0]; | |||
| std::shared_ptr<Tensor> seq2 = input[1]; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(seq1->shape().Rank() == 1 && seq2->shape().Rank() == 1, | |||
| "Both sequences should be of rank 1."); | |||
| "TruncateSequencePair: both data column should be of rank 1."); | |||
| dsize_t length1 = seq1->shape()[0]; | |||
| dsize_t length2 = seq2->shape()[0]; | |||
| dsize_t outLength1 = length1; | |||
| @@ -31,16 +31,18 @@ const bool UnicodeCharTokenizerOp::kDefWithOffsets = false; | |||
| Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "UnicodeCharTokenizer: input should be one column data."); | |||
| if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "UnicodeCharTokenizer: " | |||
| "the input shape should be scalar and the input datatype should be string."); | |||
| } | |||
| std::string_view str; | |||
| RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {})); | |||
| RuneStrArray runes; | |||
| if (!DecodeRunesInString(str.data(), str.size(), runes)) { | |||
| RETURN_STATUS_UNEXPECTED("Decode utf8 string failed."); | |||
| RETURN_STATUS_UNEXPECTED("UnicodeCharTokenizer: Decode utf8 string failed."); | |||
| } | |||
| std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor; | |||
| std::vector<std::string> splits(runes.size()); | |||
| @@ -36,15 +36,17 @@ const bool UnicodeScriptTokenizerOp::kDefWithOffsets = false; | |||
| Status UnicodeScriptTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "UnicodeScriptTokenizer: input should be one column data."); | |||
| if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "UnicodeScriptTokenizer: " | |||
| "the input shape should be scalar and the input datatype should be string."); | |||
| } | |||
| std::string_view str; | |||
| RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {})); | |||
| RuneStrArray runes; | |||
| if (!DecodeRunesInString(str.data(), str.size(), runes)) { | |||
| RETURN_STATUS_UNEXPECTED("Decode utf8 string failed."); | |||
| RETURN_STATUS_UNEXPECTED("UnicodeScriptTokenizer: Decode utf8 string failed."); | |||
| } | |||
| std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor; | |||
| @@ -63,9 +65,9 @@ Status UnicodeScriptTokenizerOp::Compute(const TensorRow &input, TensorRow *outp | |||
| status.reset(); | |||
| script = USCRIPT_INVALID_CODE; | |||
| } | |||
| // 1) Seperate UTF-8 strings of different UScriptCode values | |||
| // 1) Separate UTF-8 strings of different UScriptCode values | |||
| // (such as: "Chinese中国" should be splited to ["Chinese", "中国"]) | |||
| // 2) Seperate whitespace and non-whitespace UTF-8 strings | |||
| // 2) Separate whitespace and non-whitespace UTF-8 strings | |||
| // (such as: " ." should be split to [" ", "."]) | |||
| if (len > 0 && (script != last_script || is_space != was_space)) { | |||
| // 3) If keep_whitespace_ is false, all the whitespace characters will be discard | |||
| @@ -35,16 +35,17 @@ const bool WhitespaceTokenizerOp::kDefWithOffsets = false; | |||
| Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "WhitespaceTokenizer: input should be one column data."); | |||
| if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "WhitespaceTokenizer: the input shape should be scalar and the input datatype should be string."); | |||
| } | |||
| std::string_view str; | |||
| RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {})); | |||
| RuneStrArray runes; | |||
| if (!DecodeRunesInString(str.data(), str.size(), runes)) { | |||
| RETURN_STATUS_UNEXPECTED("Decode utf8 string failed."); | |||
| RETURN_STATUS_UNEXPECTED("WhitespaceTokenizer: Decode utf8 string failed."); | |||
| } | |||
| std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor; | |||
| @@ -37,7 +37,7 @@ WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, | |||
| Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start, | |||
| bool *out_found, int *out_end) const { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && start < input_token.size(), "Out of range"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && start < input_token.size(), "WordpieceTokenizer: LookupWord Out of range"); | |||
| *out_found = false; | |||
| for (int i = runes.size() - 1; i >= 0; i--) { | |||
| *out_end = runes[i].offset + runes[i].len; | |||
| @@ -96,7 +96,7 @@ Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uin | |||
| } | |||
| RuneStrArray runes; | |||
| if (!DecodeRunesInString(input_token.data(), input_token.size(), runes)) { | |||
| RETURN_STATUS_UNEXPECTED("Decode utf8 string failed."); | |||
| RETURN_STATUS_UNEXPECTED("WordpieceTokenizer: Decode utf8 string failed."); | |||
| } | |||
| int end = 0; | |||
| for (int start = 0; start < input_token.size();) { | |||
| @@ -117,7 +117,8 @@ Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uin | |||
| Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| IO_CHECK_VECTOR(input, output); | |||
| if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "WordpieceTokenizer: The input shape should be 1D scalar the input datatype should be string."); | |||
| } | |||
| dsize_t count = 0; | |||
| std::vector<std::string> out_tokens; | |||
| @@ -73,7 +73,8 @@ Status SentencePieceVocab::BuildFromFile(const std::vector<std::string> &path_li | |||
| std::string model_proto; | |||
| sentencepiece::util::Status s_status = sentencepiece::SentencePieceTrainer::Train(unorder_map, nullptr, &model_proto); | |||
| if (!s_status.ok()) { | |||
| return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, s_status.message()); | |||
| std::string err_msg = "SentencePieceVocab: " + std::string(s_status.message()); | |||
| return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| vocab->get()->set_model_proto(model_proto); | |||
| @@ -85,15 +86,20 @@ Status SentencePieceVocab::SaveModel(const std::shared_ptr<SentencePieceVocab> * | |||
| char real_path[PATH_MAX] = {0}; | |||
| if (path.size() >= PATH_MAX) { | |||
| RETURN_STATUS_UNEXPECTED("sentence model path is invalid."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "SentencePieceVocab: sentence model path is invalid for " | |||
| "path length longer than 4096."); | |||
| } | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| if (_fullpath(real_path, common::SafeCStr(path), PATH_MAX) == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("sentence model path is invalid."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "SentencePieceVocab: sentence model path is invalid for " | |||
| "path length longer than 4096."); | |||
| } | |||
| #else | |||
| if (realpath(common::SafeCStr(path), real_path) == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("sentence model path is invalid."); | |||
| RETURN_STATUS_UNEXPECTED("SentencePieceVocab: sentence model path: " + path + | |||
| " is not existed or permission denied."); | |||
| } | |||
| #endif | |||
| @@ -80,8 +80,7 @@ Status Vocab::BuildFromUnorderedMap(const std::unordered_map<WordType, WordIdTyp | |||
| std::unordered_map<WordType, WordIdType> word2id; | |||
| for (auto p : words) { | |||
| if (p.second < 0) { | |||
| MS_LOG(ERROR) << "index can not be negetive, but got " << p.second; | |||
| RETURN_STATUS_UNEXPECTED("index can not be negetive, but got " + std::to_string(p.second)); | |||
| RETURN_STATUS_UNEXPECTED("from_dict: index can not be negetive, but got " + std::to_string(p.second)); | |||
| } | |||
| word2id[p.first] = p.second; | |||
| } | |||
| @@ -97,8 +96,7 @@ Status Vocab::BuildFromVector(const std::vector<WordType> &words, const std::vec | |||
| WordIdType word_id = prepend_special ? static_cast<WordIdType>(special_tokens.size()) : 0; | |||
| for (auto word : words) { | |||
| if (word2id.find(word) != word2id.end()) { | |||
| MS_LOG(ERROR) << "word_list contains duplicate word: " + word + "."; | |||
| RETURN_STATUS_UNEXPECTED("word_list contains duplicate word: " + word + "."); | |||
| RETURN_STATUS_UNEXPECTED("from_list: word_list contains duplicate word: " + word + "."); | |||
| } | |||
| word2id[word] = word_id++; | |||
| } | |||
| @@ -107,8 +105,10 @@ Status Vocab::BuildFromVector(const std::vector<WordType> &words, const std::vec | |||
| for (auto special_token : special_tokens) { | |||
| if (word2id.find(special_token) != word2id.end()) { | |||
| MS_LOG(ERROR) << "special_tokens and word_list contain duplicate word: " + special_token + "."; | |||
| RETURN_STATUS_UNEXPECTED("special_tokens and word_list contain duplicate word: " + special_token + "."); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "from_list: " | |||
| "special_tokens and word_list contain duplicate word: " + | |||
| special_token + "."); | |||
| } | |||
| word2id[special_token] = word_id++; | |||
| } | |||
| @@ -122,14 +122,14 @@ Status Vocab::BuildFromFileCpp(const std::string &path, const std::string &delim | |||
| std::shared_ptr<Vocab> *vocab) { | |||
| // Validate parameters | |||
| if (path.empty()) { | |||
| MS_LOG(ERROR) << "vocab file path is not set!"; | |||
| RETURN_STATUS_UNEXPECTED("vocab file path is not set!"); | |||
| RETURN_STATUS_UNEXPECTED("from_file: vocab file path is not set!"); | |||
| } | |||
| if (vocab_size < 0 && vocab_size != -1) { | |||
| MS_LOG(ERROR) << "vocab_size shoule be either -1 or positive integer, but got " << vocab_size; | |||
| RETURN_STATUS_UNEXPECTED("vocab_size shoule be either -1 or positive integer, but got " + | |||
| std::to_string(vocab_size)); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "from_file: " | |||
| "vocab_size should be either -1 or positive integer, but got " + | |||
| std::to_string(vocab_size)); | |||
| } | |||
| std::string duplicate_sp; | |||
| @@ -141,8 +141,10 @@ Status Vocab::BuildFromFileCpp(const std::string &path, const std::string &delim | |||
| } | |||
| } | |||
| if (!duplicate_sp.empty()) { | |||
| MS_LOG(ERROR) << "special_tokens contains duplicate word: " << duplicate_sp; | |||
| RETURN_STATUS_UNEXPECTED("special_tokens contains duplicate word: " + duplicate_sp); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "from_file: " | |||
| "special_tokens contains duplicate word: " + | |||
| duplicate_sp); | |||
| } | |||
| std::unordered_set<std::string> specials; | |||
| @@ -154,8 +156,7 @@ Status Vocab::BuildFromFileCpp(const std::string &path, const std::string &delim | |||
| std::unordered_map<WordType, WordIdType> word2id; | |||
| std::fstream handle(path, std::ios::in); | |||
| if (!handle.good() || !handle.is_open()) { | |||
| MS_LOG(ERROR) << "fail to open:" + path; | |||
| RETURN_STATUS_UNEXPECTED("fail to open:" + path); | |||
| RETURN_STATUS_UNEXPECTED("from_file: fail to open: " + path); | |||
| } | |||
| std::string word; | |||
| while (std::getline(handle, word)) { | |||
| @@ -164,12 +165,13 @@ Status Vocab::BuildFromFileCpp(const std::string &path, const std::string &delim | |||
| word = word.substr(0, word.find_first_of(delimiter)); | |||
| } | |||
| if (word2id.find(word) != word2id.end()) { | |||
| MS_LOG(ERROR) << "word_list contains duplicate word:" + word + "."; | |||
| RETURN_STATUS_UNEXPECTED("word_list contains duplicate word:" + word + "."); | |||
| RETURN_STATUS_UNEXPECTED("from_file: word_list contains duplicate word:" + word + "."); | |||
| } | |||
| if (specials.find(word) != specials.end()) { | |||
| MS_LOG(ERROR) << "special_tokens and word_list contain duplicate word: " << word; | |||
| RETURN_STATUS_UNEXPECTED("special_tokens and word_list contain duplicate word: " + word); | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "from_file: " | |||
| "special_tokens and word_list contain duplicate word: " + | |||
| word); | |||
| } | |||
| word2id[word] = word_id++; | |||
| // break if enough row is read, if vocab_size is smaller than 0 | |||
| @@ -197,15 +199,16 @@ Status Vocab::BuildFromFile(const std::string &path, const std::string &delimite | |||
| WordIdType word_id = prepend_special ? static_cast<WordIdType>(special_tokens.size()) : 0; | |||
| std::unordered_map<WordType, WordIdType> word2id; | |||
| std::fstream handle(path, std::ios::in); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(handle.good() && handle.is_open(), "fail to open:" + path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(handle.good() && handle.is_open(), "from_file: fail to open:" + path); | |||
| std::string word; | |||
| while (std::getline(handle, word)) { | |||
| if (!delimiter.empty()) { | |||
| // if delimiter is not found, find_first_of would return std::string::npos which is -1 | |||
| word = word.substr(0, word.find_first_of(delimiter)); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(word2id.find(word) == word2id.end(), "duplicate word:" + word + "."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(specials.find(word) == specials.end(), word + " is already in special_tokens."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(word2id.find(word) == word2id.end(), "from_file: duplicate word:" + word + "."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(specials.find(word) == specials.end(), | |||
| "from_file: " + word + " is already in special_tokens."); | |||
| word2id[word] = word_id++; | |||
| // break if enough row is read, if vocab_size is smaller than 0 | |||
| if (word2id.size() == vocab_size) break; | |||
| @@ -76,13 +76,20 @@ std::string CodeAsString(const StatusCode c) { | |||
| return std::string(s); | |||
| } | |||
| Status::Status(StatusCode c) noexcept : code_(c), err_msg_(CodeAsString(c)) {} | |||
| Status::Status(StatusCode c) noexcept | |||
| : code_(c), err_msg_(CodeAsString(c)), line_of_code_(-1), file_name_(""), err_description_("") {} | |||
| Status::Status() noexcept : code_(StatusCode::kOK), err_msg_("") {} | |||
| Status::Status() noexcept | |||
| : code_(StatusCode::kOK), err_msg_(""), line_of_code_(-1), file_name_(""), err_description_("") {} | |||
| Status::~Status() noexcept {} | |||
| Status::Status(const Status &s) : code_(s.code_), err_msg_(s.err_msg_) {} | |||
| Status::Status(const Status &s) | |||
| : code_(s.code_), | |||
| err_msg_(s.err_msg_), | |||
| line_of_code_(s.line_of_code_), | |||
| file_name_(s.file_name_), | |||
| err_description_(s.err_description_) {} | |||
| Status &Status::operator=(const Status &s) { | |||
| if (this == &s) { | |||
| @@ -90,12 +97,19 @@ Status &Status::operator=(const Status &s) { | |||
| } | |||
| code_ = s.code_; | |||
| err_msg_ = s.err_msg_; | |||
| line_of_code_ = s.line_of_code_; | |||
| file_name_ = s.file_name_; | |||
| err_description_ = s.err_description_; | |||
| return *this; | |||
| } | |||
| Status::Status(Status &&s) noexcept { | |||
| code_ = s.code_; | |||
| s.code_ = StatusCode::kOK; | |||
| line_of_code_ = s.line_of_code_; | |||
| s.line_of_code_ = -1; | |||
| file_name_ = std::move(s.file_name_); | |||
| err_description_ = std::move(s.err_description_); | |||
| err_msg_ = std::move(s.err_msg_); | |||
| } | |||
| @@ -105,14 +119,22 @@ Status &Status::operator=(Status &&s) noexcept { | |||
| } | |||
| code_ = s.code_; | |||
| s.code_ = StatusCode::kOK; | |||
| line_of_code_ = s.line_of_code_; | |||
| s.line_of_code_ = -1; | |||
| file_name_ = std::move(s.file_name_); | |||
| err_description_ = std::move(s.err_description_); | |||
| err_msg_ = std::move(s.err_msg_); | |||
| return *this; | |||
| } | |||
| Status::Status(const StatusCode code, const std::string &msg) : code_(code), err_msg_(msg) {} | |||
| Status::Status(const StatusCode code, const std::string &msg) | |||
| : code_(code), err_msg_(msg), line_of_code_(-1), file_name_(""), err_description_(msg) {} | |||
| Status::Status(const StatusCode code, int line_of_code, const char *file_name, const std::string &extra) { | |||
| code_ = code; | |||
| line_of_code_ = line_of_code; | |||
| file_name_ = std::string(file_name); | |||
| err_description_ = extra; | |||
| std::ostringstream ss; | |||
| #ifndef ENABLE_ANDROID | |||
| ss << "Thread ID " << this_thread::get_id() << " " << CodeAsString(code) << ". "; | |||
| @@ -127,13 +149,6 @@ Status::Status(const StatusCode code, int line_of_code, const char *file_name, c | |||
| ss << "File : " << file_name << "\n"; | |||
| } | |||
| err_msg_ = ss.str(); | |||
| if (code == StatusCode::kUnexpectedError) { | |||
| MS_LOG(ERROR) << err_msg_; | |||
| } else if (code == StatusCode::kNetWorkError) { | |||
| MS_LOG(WARNING) << err_msg_; | |||
| } else { | |||
| MS_LOG(INFO) << err_msg_; | |||
| } | |||
| } | |||
| std::ostream &operator<<(std::ostream &os, const Status &s) { | |||
| @@ -141,6 +156,25 @@ std::ostream &operator<<(std::ostream &os, const Status &s) { | |||
| return os; | |||
| } | |||
| std::string Status::SetErrDescription(const std::string &err_description) { | |||
| err_description_ = err_description; | |||
| std::ostringstream ss; | |||
| #ifndef ENABLE_ANDROID | |||
| ss << "Thread ID " << this_thread::get_id() << " " << CodeAsString(code_) << ". "; | |||
| if (!err_description_.empty()) { | |||
| ss << err_description_; | |||
| } | |||
| ss << "\n"; | |||
| #endif | |||
| if (line_of_code_ > 0 && !file_name_.empty()) { | |||
| ss << "Line of code : " << line_of_code_ << "\n"; | |||
| ss << "File : " << file_name_ << "\n"; | |||
| } | |||
| err_msg_ = ss.str(); | |||
| return err_msg_; | |||
| } | |||
| std::string Status::ToString() const { return err_msg_; } | |||
| StatusCode Status::get_code() const { return code_; } | |||
| @@ -141,6 +141,12 @@ class Status { | |||
| StatusCode get_code() const; | |||
| int GetLineOfCode() const { return line_of_code_; } | |||
| std::string SetErrDescription(const std::string &err_description); | |||
| std::string GetErrDescription() const { return err_description_; } | |||
| friend std::ostream &operator<<(std::ostream &os, const Status &s); | |||
| explicit operator bool() const { return (get_code() == StatusCode::kOK); } | |||
| @@ -165,6 +171,9 @@ class Status { | |||
| private: | |||
| StatusCode code_; | |||
| int line_of_code_; | |||
| std::string file_name_; | |||
| std::string err_description_; | |||
| std::string err_msg_; | |||
| }; | |||
| @@ -58,13 +58,20 @@ void Task::operator()() { | |||
| } | |||
| // Some error codes are ignored, e.g. interrupt. Others we just shutdown the group. | |||
| if (rc_.IsError() && !rc_.IsInterrupted()) { | |||
| if (rc_.get_code() == StatusCode::kNetWorkError) { | |||
| MS_LOG(WARNING) << rc_; | |||
| } else { | |||
| MS_LOG(ERROR) << rc_; | |||
| } | |||
| ShutdownGroup(); | |||
| } | |||
| } catch (const std::bad_alloc &e) { | |||
| rc_ = Status(StatusCode::kOutOfMemory, __LINE__, __FILE__, e.what()); | |||
| MS_LOG(ERROR) << rc_; | |||
| ShutdownGroup(); | |||
| } catch (const std::exception &e) { | |||
| rc_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, e.what()); | |||
| MS_LOG(ERROR) << rc_; | |||
| ShutdownGroup(); | |||
| } | |||
| } | |||
| @@ -152,19 +159,19 @@ Status Task::Join(WaitFlag blocking) { | |||
| // Because hostPush hung in DeviceQueueOp, wait 5 seconds and destroy the tdt | |||
| if (wait_times > 5 && my_name_.find("DeviceQueueOp") != std::string::npos) { | |||
| MS_LOG(WARNING) << "Wait " << wait_times << " seconds, " | |||
| << "the task: " << my_name_ << " will be destoryed by TdtHostDestory."; | |||
| << "the task: " << my_name_ << " will be destroyed by TdtHostDestory."; | |||
| int32_t destory_status = tdt::TdtHostDestroy(); | |||
| if (destory_status != TDT_OK_CODE) { | |||
| MS_LOG(WARNING) << "Destory tsd failed, status = " << destory_status << "."; | |||
| MS_LOG(WARNING) << "Destroy tsd failed, status = " << destory_status << "."; | |||
| } else { | |||
| MS_LOG(INFO) << "Destory tsd success."; | |||
| MS_LOG(INFO) << "Destroy tsd success."; | |||
| } | |||
| // just wait 30 seconds | |||
| // case1: cpu usage 100%, DeviceQueueOp thread may destory without thrd_ future | |||
| // case1: cpu usage 100%, DeviceQueueOp thread may destroy without thrd_ future | |||
| if (wait_times > 30) { | |||
| MS_LOG(WARNING) << MyName() << " Thread ID " << ss.str() | |||
| << " is not responding. Maybe it's destoryed, task stop."; | |||
| << " is not responding. Maybe it's destroyed, task stop."; | |||
| break; | |||
| } | |||
| } | |||
| @@ -169,7 +169,6 @@ class DictIterator(Iterator): | |||
| return {k: self._transform_tensor(t) for k, t in self._iterator.GetNextAsMap().items()} | |||
| except RuntimeError as err: | |||
| ## maybe "Out of memory" / "MemoryError" error | |||
| logger.error("Got runtime err: {}.".format(err)) | |||
| err_info = str(err) | |||
| if err_info.find("Out of memory") >= 0 or err_info.find("MemoryError") >= 0: | |||
| logger.error("Memory error occurred, process will exit.") | |||
| @@ -161,6 +161,7 @@ TEST_F(MindDataTestSolarizeOp, TestOp6) { | |||
| Status s = op->Compute(test_input_tensor, &test_output_tensor); | |||
| EXPECT_TRUE(s.IsError()); | |||
| EXPECT_NE(s.ToString().find("threshold_min must be smaller or equal to threshold_max."), std::string::npos); | |||
| EXPECT_NE(s.ToString().find("Solarize: threshold_min must be smaller or equal to threshold_max."), | |||
| std::string::npos); | |||
| ASSERT_TRUE(s.get_code() == StatusCode::kUnexpectedError); | |||
| } | |||
| @@ -260,7 +260,7 @@ def test_bounding_box_augment_invalid_bounds_c(): | |||
| dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) | |||
| check_bad_bbox(dataVoc2, test_op, InvalidBBoxType.HeightOverflow, "bounding boxes is out of bounds of the image") | |||
| dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) | |||
| check_bad_bbox(dataVoc2, test_op, InvalidBBoxType.NegativeXY, "min_x") | |||
| check_bad_bbox(dataVoc2, test_op, InvalidBBoxType.NegativeXY, "negative value") | |||
| dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) | |||
| check_bad_bbox(dataVoc2, test_op, InvalidBBoxType.WrongShape, "4 features") | |||
| @@ -148,7 +148,7 @@ def test_center_crop_errors(): | |||
| try: | |||
| test_center_crop_op(16777216, 16777216) | |||
| except RuntimeError as e: | |||
| assert "Unexpected error. CenterCropOp padding size is too big, it's more than 3 times the original size." in \ | |||
| assert "CenterCropOp padding size is more than 3 times the original size." in \ | |||
| str(e) | |||
| @@ -300,7 +300,7 @@ def test_py_vision_with_c_transforms(): | |||
| test_config([py_vision.Decode(), | |||
| py_vision.CenterCrop((2)), np.array, | |||
| c_transforms.Concatenate(0)]) | |||
| assert "Only 1D tensors supported" in str(error_info.value) | |||
| assert "only 1D input supported" in str(error_info.value) | |||
| def test_compose_with_custom_function(): | |||
| @@ -108,7 +108,7 @@ def test_concatenate_op_type_mismatch(): | |||
| with pytest.raises(RuntimeError) as error_info: | |||
| for _ in data: | |||
| pass | |||
| assert "Tensor types do not match" in str(error_info.value) | |||
| assert "input datatype does not match" in str(error_info.value) | |||
| def test_concatenate_op_type_mismatch2(): | |||
| @@ -123,7 +123,7 @@ def test_concatenate_op_type_mismatch2(): | |||
| with pytest.raises(RuntimeError) as error_info: | |||
| for _ in data: | |||
| pass | |||
| assert "Tensor types do not match" in str(error_info.value) | |||
| assert "input datatype does not match" in str(error_info.value) | |||
| def test_concatenate_op_incorrect_dim(): | |||
| @@ -138,7 +138,7 @@ def test_concatenate_op_incorrect_dim(): | |||
| with pytest.raises(RuntimeError) as error_info: | |||
| for _ in data: | |||
| pass | |||
| assert "Only 1D tensors supported" in str(error_info.value) | |||
| assert "only 1D input supported" in str(error_info.value) | |||
| def test_concatenate_op_wrong_axis(): | |||
| @@ -373,7 +373,7 @@ def test_cutmix_batch_fail5(): | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| error_message = "Both images and labels columns are required" | |||
| error_message = "both image and label columns are required" | |||
| assert error_message in str(error.value) | |||
| @@ -400,7 +400,7 @@ def test_cutmix_batch_fail6(): | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| error_message = "CutMixBatch: Image doesn't match the given image format." | |||
| error_message = "image doesn't match the NCHW format." | |||
| assert error_message in str(error.value) | |||
| @@ -425,7 +425,7 @@ def test_cutmix_batch_fail7(): | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| error_message = "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" | |||
| error_message = "wrong labels shape. The second column (labels) must have a shape of NC or NLC" | |||
| assert error_message in str(error.value) | |||
| @@ -110,9 +110,44 @@ def test_celeba_get_dataset_size(): | |||
| size = data.get_dataset_size() | |||
| assert size == 1 | |||
| def test_celeba_dataset_exception_file_path(): | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data = ds.CelebADataset(DATA_DIR, shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.create_dict_iterator(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.CelebADataset(DATA_DIR, shuffle=False) | |||
| data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.create_dict_iterator(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.CelebADataset(DATA_DIR, shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["attr"], num_parallel_workers=1) | |||
| for _ in data.create_dict_iterator(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| if __name__ == '__main__': | |||
| test_celeba_dataset_label() | |||
| test_celeba_dataset_op() | |||
| test_celeba_dataset_ext() | |||
| test_celeba_dataset_distribute() | |||
| test_celeba_get_dataset_size() | |||
| test_celeba_dataset_exception_file_path() | |||
| @@ -208,7 +208,7 @@ def test_cifar10_exception(): | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=0) | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=88) | |||
| ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=256) | |||
| error_msg_7 = "no .bin files found" | |||
| with pytest.raises(RuntimeError, match=error_msg_7): | |||
| @@ -358,7 +358,7 @@ def test_cifar100_exception(): | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=0) | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=88) | |||
| ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=256) | |||
| error_msg_7 = "no .bin files found" | |||
| with pytest.raises(RuntimeError, match=error_msg_7): | |||
| @@ -446,6 +446,61 @@ def test_cifar_usage(): | |||
| assert ds.Cifar100Dataset(all_cifar100, usage="all").get_dataset_size() == 60000 | |||
| def test_cifar_exception_file_path(): | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data = ds.Cifar10Dataset(DATA_DIR_10) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| num_rows = 0 | |||
| for _ in data.create_dict_iterator(): | |||
| num_rows += 1 | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.Cifar10Dataset(DATA_DIR_10) | |||
| data = data.map(operations=exception_func, input_columns=["label"], num_parallel_workers=1) | |||
| num_rows = 0 | |||
| for _ in data.create_dict_iterator(): | |||
| num_rows += 1 | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.Cifar100Dataset(DATA_DIR_100) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| num_rows = 0 | |||
| for _ in data.create_dict_iterator(): | |||
| num_rows += 1 | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.Cifar100Dataset(DATA_DIR_100) | |||
| data = data.map(operations=exception_func, input_columns=["coarse_label"], num_parallel_workers=1) | |||
| num_rows = 0 | |||
| for _ in data.create_dict_iterator(): | |||
| num_rows += 1 | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.Cifar100Dataset(DATA_DIR_100) | |||
| data = data.map(operations=exception_func, input_columns=["fine_label"], num_parallel_workers=1) | |||
| num_rows = 0 | |||
| for _ in data.create_dict_iterator(): | |||
| num_rows += 1 | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| if __name__ == '__main__': | |||
| test_cifar10_content_check() | |||
| test_cifar10_basic() | |||
| @@ -461,3 +516,4 @@ if __name__ == '__main__': | |||
| test_cifar100_visualize(plot=False) | |||
| test_cifar_usage() | |||
| test_cifar_exception_file_path() | |||
| @@ -367,6 +367,42 @@ def test_clue_invalid_files(): | |||
| assert AFQMC_DIR in str(info.value) | |||
| def test_clue_exception_file_path(): | |||
| """ | |||
| Test file info in err msg when exception occur of CLUE dataset | |||
| """ | |||
| TRAIN_FILE = '../data/dataset/testCLUE/afqmc/train.json' | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train') | |||
| data = data.map(operations=exception_func, input_columns=["label"], num_parallel_workers=1) | |||
| for _ in data.create_dict_iterator(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train') | |||
| data = data.map(operations=exception_func, input_columns=["sentence1"], num_parallel_workers=1) | |||
| for _ in data.create_dict_iterator(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train') | |||
| data = data.map(operations=exception_func, input_columns=["sentence2"], num_parallel_workers=1) | |||
| for _ in data.create_dict_iterator(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| if __name__ == "__main__": | |||
| test_clue() | |||
| test_clue_num_shards() | |||
| @@ -380,3 +416,4 @@ if __name__ == "__main__": | |||
| test_clue_wsc() | |||
| test_clue_to_device() | |||
| test_clue_invalid_files() | |||
| test_clue_exception_file_path() | |||
| @@ -285,6 +285,166 @@ def test_coco_case_exception(): | |||
| except ValueError as e: | |||
| assert "CocoDataset doesn't support PKSampler" in str(e) | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection") | |||
| data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection") | |||
| data1 = data1.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection") | |||
| data1 = data1.map(operations=exception_func, input_columns=["bbox"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection") | |||
| data1 = data1.map(operations=exception_func, input_columns=["category_id"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff") | |||
| data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff") | |||
| data1 = data1.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff") | |||
| data1 = data1.map(operations=exception_func, input_columns=["segmentation"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff") | |||
| data1 = data1.map(operations=exception_func, input_columns=["iscrowd"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint") | |||
| data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint") | |||
| data1 = data1.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint") | |||
| data1 = data1.map(operations=exception_func, input_columns=["keypoints"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint") | |||
| data1 = data1.map(operations=exception_func, input_columns=["num_keypoints"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") | |||
| data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") | |||
| data1 = data1.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") | |||
| data1 = data1.map(operations=exception_func, input_columns=["bbox"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") | |||
| data1 = data1.map(operations=exception_func, input_columns=["category_id"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") | |||
| data1 = data1.map(operations=exception_func, input_columns=["area"], num_parallel_workers=1) | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| if __name__ == '__main__': | |||
| test_coco_detection() | |||
| @@ -241,6 +241,62 @@ def test_csv_dataset_exception(): | |||
| pass | |||
| assert "failed to parse file" in str(err.value) | |||
| TEST_FILE1 = '../data/dataset/testCSV/quoted.csv' | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data = ds.CSVDataset( | |||
| TEST_FILE1, | |||
| column_defaults=["", "", "", ""], | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["col1"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.CSVDataset( | |||
| TEST_FILE1, | |||
| column_defaults=["", "", "", ""], | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["col2"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.CSVDataset( | |||
| TEST_FILE1, | |||
| column_defaults=["", "", "", ""], | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["col3"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.CSVDataset( | |||
| TEST_FILE1, | |||
| column_defaults=["", "", "", ""], | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["col4"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| def test_csv_dataset_duplicate_columns(): | |||
| data = ds.CSVDataset( | |||
| @@ -14,6 +14,7 @@ | |||
| # ============================================================================== | |||
| import pytest | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.vision.c_transforms as vision | |||
| from mindspore import log as logger | |||
| DATA_DIR = "../data/dataset/testPK/data" | |||
| @@ -716,6 +717,46 @@ def test_imagefolder_zip(): | |||
| assert num_iter == 10 | |||
| def test_imagefolder_exception(): | |||
| logger.info("Test imagefolder exception") | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| def exception_func2(image, label): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data = ds.ImageFolderDataset(DATA_DIR) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.ImageFolderDataset(DATA_DIR) | |||
| data = data.map(operations=exception_func2, input_columns=["image", "label"], | |||
| output_columns=["image", "label", "label1"], | |||
| column_order=["image", "label", "label1"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.ImageFolderDataset(DATA_DIR) | |||
| data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| if __name__ == '__main__': | |||
| test_imagefolder_basic() | |||
| logger.info('test_imagefolder_basic Ended.\n') | |||
| @@ -797,3 +838,6 @@ if __name__ == '__main__': | |||
| test_imagefolder_zip() | |||
| logger.info('test_imagefolder_zip Ended.\n') | |||
| test_imagefolder_exception() | |||
| logger.info('test_imagefolder_exception Ended.\n') | |||
| @@ -15,6 +15,7 @@ | |||
| import numpy as np | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.vision.c_transforms as vision | |||
| import mindspore.dataset.transforms.c_transforms as data_trans | |||
| from mindspore import log as logger | |||
| @@ -127,6 +128,39 @@ def test_manifest_dataset_get_num_class(): | |||
| assert data1.num_classes() == 3 | |||
| def test_manifest_dataset_exception(): | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data = ds.ManifestDataset(DATA_FILE) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.ManifestDataset(DATA_FILE) | |||
| data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.ManifestDataset(DATA_FILE) | |||
| data = data.map(operations=exception_func, input_columns=["label"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| if __name__ == '__main__': | |||
| test_manifest_dataset_train() | |||
| test_manifest_dataset_eval() | |||
| @@ -135,3 +169,4 @@ if __name__ == '__main__': | |||
| test_manifest_dataset_multi_label() | |||
| test_manifest_dataset_multi_label_onehot() | |||
| test_manifest_dataset_get_num_class() | |||
| test_manifest_dataset_exception() | |||
| @@ -20,6 +20,7 @@ import pytest | |||
| import numpy as np | |||
| import matplotlib.pyplot as plt | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.vision.c_transforms as vision | |||
| from mindspore import log as logger | |||
| DATA_DIR = "../data/dataset/testMnistData" | |||
| @@ -196,7 +197,7 @@ def test_mnist_exception(): | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.MnistDataset(DATA_DIR, shuffle=False, num_parallel_workers=0) | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.MnistDataset(DATA_DIR, shuffle=False, num_parallel_workers=65) | |||
| ds.MnistDataset(DATA_DIR, shuffle=False, num_parallel_workers=256) | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.MnistDataset(DATA_DIR, shuffle=False, num_parallel_workers=-2) | |||
| @@ -204,6 +205,27 @@ def test_mnist_exception(): | |||
| with pytest.raises(TypeError, match=error_msg_7): | |||
| ds.MnistDataset(DATA_DIR, num_shards=2, shard_id="0") | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| error_msg_8 = "The corresponding data files" | |||
| with pytest.raises(RuntimeError, match=error_msg_8): | |||
| data = ds.MnistDataset(DATA_DIR) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| with pytest.raises(RuntimeError, match=error_msg_8): | |||
| data = ds.MnistDataset(DATA_DIR) | |||
| data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| with pytest.raises(RuntimeError, match=error_msg_8): | |||
| data = ds.MnistDataset(DATA_DIR) | |||
| data = data.map(operations=exception_func, input_columns=["label"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| def test_mnist_visualize(plot=False): | |||
| """ | |||
| @@ -205,6 +205,15 @@ def test_textline_dataset_exceptions(): | |||
| _ = ds.TextFileDataset("") | |||
| assert "The following patterns did not match any files" in str(error_info.value) | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| with pytest.raises(RuntimeError) as error_info: | |||
| data = ds.TextFileDataset(DATA_FILE) | |||
| data = data.map(operations=exception_func, input_columns=["text"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(error_info.value) | |||
| if __name__ == "__main__": | |||
| test_textline_dataset_one_file() | |||
| @@ -321,6 +321,45 @@ def test_tfrecord_invalid_columns(): | |||
| assert "Invalid data, failed to find column name: not_exist" in str(info.value) | |||
| def test_tfrecord_exception(): | |||
| logger.info("test_tfrecord_exception") | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| with pytest.raises(RuntimeError) as info: | |||
| schema = ds.Schema() | |||
| schema.add_column('col_1d', de_type=mstype.int64, shape=[2]) | |||
| schema.add_column('col_2d', de_type=mstype.int64, shape=[2, 2]) | |||
| schema.add_column('col_3d', de_type=mstype.int64, shape=[2, 2, 2]) | |||
| data = ds.TFRecordDataset(FILES, schema=schema, shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["col_1d"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(info.value) | |||
| with pytest.raises(RuntimeError) as info: | |||
| schema = ds.Schema() | |||
| schema.add_column('col_1d', de_type=mstype.int64, shape=[2]) | |||
| schema.add_column('col_2d', de_type=mstype.int64, shape=[2, 2]) | |||
| schema.add_column('col_3d', de_type=mstype.int64, shape=[2, 2, 2]) | |||
| data = ds.TFRecordDataset(FILES, schema=schema, shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["col_2d"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(info.value) | |||
| with pytest.raises(RuntimeError) as info: | |||
| schema = ds.Schema() | |||
| schema.add_column('col_1d', de_type=mstype.int64, shape=[2]) | |||
| schema.add_column('col_2d', de_type=mstype.int64, shape=[2, 2]) | |||
| schema.add_column('col_3d', de_type=mstype.int64, shape=[2, 2, 2]) | |||
| data = ds.TFRecordDataset(FILES, schema=schema, shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["col_3d"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(info.value) | |||
| if __name__ == '__main__': | |||
| test_tfrecord_shape() | |||
| test_tfrecord_read_all_dataset() | |||
| @@ -341,3 +380,4 @@ if __name__ == '__main__': | |||
| test_tfrecord_invalid_files() | |||
| test_tf_wrong_schema() | |||
| test_tfrecord_invalid_columns() | |||
| test_tfrecord_exception() | |||
| @@ -181,6 +181,93 @@ def test_voc_exception(): | |||
| except RuntimeError: | |||
| pass | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) | |||
| data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["bbox"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["difficult"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["truncate"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False) | |||
| data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False) | |||
| data = data.map(operations=exception_func, input_columns=["target"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False) | |||
| data = data.map(operations=vision.Decode(), input_columns=["target"], num_parallel_workers=1) | |||
| data = data.map(operations=exception_func, input_columns=["target"], num_parallel_workers=1) | |||
| for _ in data.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| def test_voc_num_classes(): | |||
| data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) | |||