| @@ -23,6 +23,7 @@ | |||
| #include "dataset/engine/datasetops/source/image_folder_op.h" | |||
| #include "dataset/engine/datasetops/source/mnist_op.h" | |||
| #include "dataset/engine/datasetops/source/voc_op.h" | |||
| #include "dataset/engine/datasetops/source/coco_op.h" | |||
| #include "dataset/core/tensor.h" | |||
| #include "dataset/engine/dataset_iterator.h" | |||
| #include "dataset/engine/datasetops/source/manifest_op.h" | |||
| @@ -65,6 +66,7 @@ static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {{kStorage, &D | |||
| {kMnist, &DEPipeline::ParseMnistOp}, | |||
| {kManifest, &DEPipeline::ParseManifestOp}, | |||
| {kVoc, &DEPipeline::ParseVOCOp}, | |||
| {kCoco, &DEPipeline::ParseCocoOp}, | |||
| {kCifar10, &DEPipeline::ParseCifar10Op}, | |||
| {kCifar100, &DEPipeline::ParseCifar100Op}, | |||
| {kCelebA, &DEPipeline::ParseCelebAOp}, | |||
| @@ -930,6 +932,16 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> * | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| if (args["task"].is_none()) { | |||
| std::string err_msg = "Error: No task specified"; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| if (args["mode"].is_none()) { | |||
| std::string err_msg = "Error: No mode specified"; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| std::shared_ptr<VOCOp::Builder> builder = std::make_shared<VOCOp::Builder>(); | |||
| (void)builder->SetDir(ToString(args["dataset_dir"])); | |||
| (void)builder->SetTask(ToString(args["task"])); | |||
| @@ -957,6 +969,47 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> * | |||
| return Status::OK(); | |||
| } | |||
| Status DEPipeline::ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) { | |||
| if (args["dataset_dir"].is_none()) { | |||
| std::string err_msg = "Error: No dataset path specified"; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| if (args["annotation_file"].is_none()) { | |||
| std::string err_msg = "Error: No annotation_file specified"; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| if (args["task"].is_none()) { | |||
| std::string err_msg = "Error: No task specified"; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| std::shared_ptr<CocoOp::Builder> builder = std::make_shared<CocoOp::Builder>(); | |||
| (void)builder->SetDir(ToString(args["dataset_dir"])); | |||
| (void)builder->SetFile(ToString(args["annotation_file"])); | |||
| (void)builder->SetTask(ToString(args["task"])); | |||
| for (auto arg : args) { | |||
| std::string key = py::str(arg.first); | |||
| py::handle value = arg.second; | |||
| if (!value.is_none()) { | |||
| if (key == "num_parallel_workers") { | |||
| (void)builder->SetNumWorkers(ToInt(value)); | |||
| } else if (key == "sampler") { | |||
| auto create = py::reinterpret_borrow<py::object>(value).attr("create"); | |||
| std::shared_ptr<Sampler> sampler = create().cast<std::shared_ptr<Sampler>>(); | |||
| (void)builder->SetSampler(std::move(sampler)); | |||
| } else if (key == "decode") { | |||
| (void)builder->SetDecode(ToBool(value)); | |||
| } | |||
| } | |||
| } | |||
| std::shared_ptr<CocoOp> op; | |||
| RETURN_IF_NOT_OK(builder->Build(&op)); | |||
| *ptr = op; | |||
| return Status::OK(); | |||
| } | |||
| Status DEPipeline::ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) { | |||
| // Required arguments | |||
| if (args["dataset_dir"].is_none()) { | |||
| @@ -58,6 +58,7 @@ enum OpName { | |||
| kMnist, | |||
| kManifest, | |||
| kVoc, | |||
| kCoco, | |||
| kCifar10, | |||
| kCifar100, | |||
| kCelebA, | |||
| @@ -142,6 +143,8 @@ class DEPipeline { | |||
| Status ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr); | |||
| Status ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr); | |||
| Status ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr); | |||
| Status ParseCifar100Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr); | |||
| @@ -56,6 +56,7 @@ | |||
| #include "dataset/engine/jagged_connector.h" | |||
| #include "dataset/engine/datasetops/source/text_file_op.h" | |||
| #include "dataset/engine/datasetops/source/voc_op.h" | |||
| #include "dataset/engine/datasetops/source/coco_op.h" | |||
| #include "dataset/engine/gnn/graph.h" | |||
| #include "dataset/kernels/data/to_float16_op.h" | |||
| #include "dataset/text/kernels/jieba_tokenizer_op.h" | |||
| @@ -214,6 +215,18 @@ void bindDatasetOps(py::module *m) { | |||
| THROW_IF_ERROR(VOCOp::GetClassIndexing(dir, task_type, task_mode, dict, &output_class_indexing)); | |||
| return output_class_indexing; | |||
| }); | |||
| (void)py::class_<CocoOp, DatasetOp, std::shared_ptr<CocoOp>>(*m, "CocoOp") | |||
| .def_static("get_class_indexing", | |||
| [](const std::string &dir, const std::string &file, const std::string &task) { | |||
| std::vector<std::pair<std::string, std::vector<int32_t>>> output_class_indexing; | |||
| THROW_IF_ERROR(CocoOp::GetClassIndexing(dir, file, task, &output_class_indexing)); | |||
| return output_class_indexing; | |||
| }) | |||
| .def_static("get_num_rows", [](const std::string &dir, const std::string &file, const std::string &task) { | |||
| int64_t count = 0; | |||
| THROW_IF_ERROR(CocoOp::CountTotalRows(dir, file, task, &count)); | |||
| return count; | |||
| }); | |||
| } | |||
| void bindTensor(py::module *m) { | |||
| (void)py::class_<GlobalContext>(*m, "GlobalContext") | |||
| @@ -576,6 +589,7 @@ PYBIND11_MODULE(_c_dataengine, m) { | |||
| .value("MNIST", OpName::kMnist) | |||
| .value("MANIFEST", OpName::kManifest) | |||
| .value("VOC", OpName::kVoc) | |||
| .value("COCO", OpName::kCoco) | |||
| .value("CIFAR10", OpName::kCifar10) | |||
| .value("CIFAR100", OpName::kCifar100) | |||
| .value("RANDOMDATA", OpName::kRandomData) | |||
| @@ -13,6 +13,7 @@ add_library(engine-datasetops-source OBJECT | |||
| image_folder_op.cc | |||
| mnist_op.cc | |||
| voc_op.cc | |||
| coco_op.cc | |||
| manifest_op.cc | |||
| cifar_op.cc | |||
| random_data_op.cc | |||
| @@ -0,0 +1,632 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "dataset/engine/datasetops/source/coco_op.h" | |||
| #include <algorithm> | |||
| #include <fstream> | |||
| #include <iomanip> | |||
| #include "common/utils.h" | |||
| #include "dataset/core/config_manager.h" | |||
| #include "dataset/core/tensor_shape.h" | |||
| #include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||
| #include "dataset/engine/db_connector.h" | |||
| #include "dataset/engine/execution_tree.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| const char kColumnImage[] = "image"; | |||
| const char kJsonImages[] = "images"; | |||
| const char kJsonImagesFileName[] = "file_name"; | |||
| const char kJsonId[] = "id"; | |||
| const char kJsonAnnotations[] = "annotations"; | |||
| const char kJsonAnnoSegmentation[] = "segmentation"; | |||
| const char kJsonAnnoCounts[] = "counts"; | |||
| const char kJsonAnnoSegmentsInfo[] = "segments_info"; | |||
| const char kJsonAnnoIscrowd[] = "iscrowd"; | |||
| const char kJsonAnnoBbox[] = "bbox"; | |||
| const char kJsonAnnoArea[] = "area"; | |||
| const char kJsonAnnoImageId[] = "image_id"; | |||
| const char kJsonAnnoNumKeypoints[] = "num_keypoints"; | |||
| const char kJsonAnnoKeypoints[] = "keypoints"; | |||
| const char kJsonAnnoCategoryId[] = "category_id"; | |||
| const char kJsonCategories[] = "categories"; | |||
| const char kJsonCategoriesIsthing[] = "isthing"; | |||
| const char kJsonCategoriesName[] = "name"; | |||
| const float kDefaultPadValue = -1.0; | |||
| const unsigned int kPadValueZero = 0; | |||
| CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| builder_num_workers_ = cfg->num_parallel_workers(); | |||
| builder_rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| builder_op_connector_size_ = cfg->op_connector_size(); | |||
| builder_task_type_ = TaskType::Detection; | |||
| } | |||
| Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) { | |||
| RETURN_IF_NOT_OK(SanityCheck()); | |||
| if (builder_sampler_ == nullptr) { | |||
| int64_t num_samples = 0; | |||
| int64_t start_index = 0; | |||
| builder_sampler_ = std::make_shared<SequentialSampler>(start_index, num_samples); | |||
| } | |||
| builder_schema_ = std::make_unique<DataSchema>(); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| switch (builder_task_type_) { | |||
| case TaskType::Detection: | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoCategoryId), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| break; | |||
| case TaskType::Stuff: | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoSegmentation), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| break; | |||
| case TaskType::Keypoint: | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoKeypoints), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoNumKeypoints), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| break; | |||
| case TaskType::Panoptic: | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoCategoryId), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK(builder_schema_->AddColumn( | |||
| ColDescriptor(std::string(kJsonAnnoArea), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); | |||
| break; | |||
| default: | |||
| RETURN_STATUS_UNEXPECTED("Invalid task type"); | |||
| } | |||
| *ptr = std::make_shared<CocoOp>(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_, | |||
| builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_, | |||
| std::move(builder_schema_), std::move(builder_sampler_)); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::Builder::SanityCheck() { | |||
| Path dir(builder_dir_); | |||
| Path file(builder_file_); | |||
| std::string err_msg; | |||
| err_msg += dir.IsDirectory() == false ? "Coco image folder path is invalid or not set\n" : ""; | |||
| err_msg += file.Exists() == false ? "Coco annotation json path is invalid or not set\n" : ""; | |||
| err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0 or negative\n" : ""; | |||
| return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, | |||
| int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<Sampler> sampler) | |||
| : ParallelOp(num_workers, queue_size), | |||
| decode_(decode), | |||
| row_cnt_(0), | |||
| buf_cnt_(0), | |||
| task_type_(task_type), | |||
| image_folder_path_(image_folder_path), | |||
| annotation_path_(annotation_path), | |||
| rows_per_buffer_(rows_per_buffer), | |||
| sampler_(std::move(sampler)), | |||
| data_schema_(std::move(data_schema)) { | |||
| // Set the column name map (base class field) | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| } | |||
| io_block_queues_.Init(num_workers_, queue_size); | |||
| } | |||
| Status CocoOp::TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys) { | |||
| for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) { | |||
| if ((*itr) > num_rows_) continue; | |||
| keys->push_back(*itr); | |||
| row_cnt_++; | |||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||
| std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); | |||
| keys->clear(); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::operator()() { | |||
| RETURN_IF_NOT_OK(LaunchThreadsAndInitOp()); | |||
| std::unique_ptr<DataBuffer> sampler_buffer; | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); | |||
| while (true) { | |||
| std::vector<int64_t> keys; | |||
| keys.reserve(rows_per_buffer_); | |||
| while (sampler_buffer->eoe() == false) { | |||
| std::shared_ptr<Tensor> sample_ids; | |||
| RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0)); | |||
| if (sample_ids->type() != DataType(DataType::DE_INT64)) { | |||
| RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64"); | |||
| } | |||
| RETURN_IF_NOT_OK(TraverseSampleIds(sample_ids, &keys)); | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); | |||
| } | |||
| if (keys.empty() == false) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| } | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||
| std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); | |||
| for (int32_t i = 0; i < num_workers_; i++) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| } | |||
| return Status::OK(); | |||
| } else { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK(wp_.Wait()); | |||
| wp_.Clear(); | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); | |||
| } | |||
| } | |||
| } | |||
| void CocoOp::Print(std::ostream &out, bool show_all) const { | |||
| // Always show the id and name as first line regardless if this summary or detailed print | |||
| out << "(" << std::setw(2) << operator_id_ << ") <CocoOp>:"; | |||
| if (!show_all) { | |||
| // Call the super class for displaying any common 1-liner info | |||
| ParallelOp::Print(out, show_all); | |||
| // Then show any custom derived-internal 1-liner info for this op | |||
| out << "\n"; | |||
| } else { | |||
| // Call the super class for displaying any common detailed info | |||
| ParallelOp::Print(out, show_all); | |||
| // Then show any custom derived-internal stuff | |||
| out << "\nNumber of rows: " << num_rows_ << "\nCOCO Directory: " << image_folder_path_ << "\n\n"; | |||
| } | |||
| } | |||
| Status CocoOp::Reset() { | |||
| RETURN_IF_NOT_OK(sampler_->Reset()); | |||
| row_cnt_ = 0; | |||
| wp_.Set(); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) { | |||
| std::shared_ptr<Tensor> image, coordinate; | |||
| auto itr = coordinate_map_.find(image_id); | |||
| if (itr == coordinate_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); | |||
| std::string kImageFile = image_folder_path_ + image_id; | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); | |||
| auto bboxRow = itr->second; | |||
| std::vector<float> bbox_row; | |||
| dsize_t bbox_row_num = static_cast<dsize_t>(bboxRow.size()); | |||
| dsize_t bbox_column_num = 0; | |||
| for (auto bbox : bboxRow) { | |||
| if (static_cast<dsize_t>(bbox.size()) > bbox_column_num) { | |||
| bbox_column_num = static_cast<dsize_t>(bbox.size()); | |||
| } | |||
| } | |||
| for (auto bbox : bboxRow) { | |||
| bbox_row.insert(bbox_row.end(), bbox.begin(), bbox.end()); | |||
| dsize_t pad_len = bbox_column_num - static_cast<dsize_t>(bbox.size()); | |||
| if (pad_len > 0) { | |||
| for (dsize_t i = 0; i < pad_len; i++) { | |||
| bbox_row.push_back(kDefaultPadValue); | |||
| } | |||
| } | |||
| } | |||
| std::vector<dsize_t> bbox_dim = {bbox_row_num, bbox_column_num}; | |||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&coordinate, data_schema_->column(1).tensorImpl(), TensorShape(bbox_dim), | |||
| data_schema_->column(1).type(), | |||
| reinterpret_cast<unsigned char *>(&bbox_row[0]))); | |||
| if (task_type_ == TaskType::Detection) { | |||
| RETURN_IF_NOT_OK(LoadDetectionTensorRow(image_id, image, coordinate, trow)); | |||
| } else if (task_type_ == TaskType::Stuff || task_type_ == TaskType::Keypoint) { | |||
| RETURN_IF_NOT_OK(LoadSimpleTensorRow(image_id, image, coordinate, trow)); | |||
| } else if (task_type_ == TaskType::Panoptic) { | |||
| RETURN_IF_NOT_OK(LoadMixTensorRow(image_id, image, coordinate, trow)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid task type."); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| // When task is Detection, user can get data with four columns: | |||
| // column ["image"] with datatype=uint8 | |||
| // column ["bbox"] with datatype=float32 | |||
| // column ["category_id"] with datatype=uint32 | |||
| // column ["iscrowd"] with datatype=uint32 | |||
| // By the way, column ["iscrowd"] is used for some testcases, like fasterRcnn. | |||
| // If "iscrowd" is not existed, user will get default value 0. | |||
| Status CocoOp::LoadDetectionTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image, | |||
| std::shared_ptr<Tensor> coordinate, TensorRow *trow) { | |||
| std::shared_ptr<Tensor> category_id, iscrowd; | |||
| std::vector<uint32_t> category_id_row; | |||
| std::vector<uint32_t> iscrowd_row; | |||
| auto itr_item = simple_item_map_.find(image_id); | |||
| if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); | |||
| std::vector<uint32_t> annotation = itr_item->second; | |||
| for (int64_t i = 0; i < annotation.size(); i++) { | |||
| if (i % 2 == 0) { | |||
| category_id_row.push_back(annotation[i]); | |||
| } else if (i % 2 == 1) { | |||
| iscrowd_row.push_back(annotation[i]); | |||
| } | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateTensor( | |||
| &category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}), | |||
| data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&category_id_row[0]))); | |||
| RETURN_IF_NOT_OK(Tensor::CreateTensor( | |||
| &iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), | |||
| data_schema_->column(3).type(), reinterpret_cast<unsigned char *>(&iscrowd_row[0]))); | |||
| (*trow) = {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)}; | |||
| return Status::OK(); | |||
| } | |||
| // When task is "Stuff"/"Keypoint", user can get data with three columns: | |||
| // column ["image"] with datatype=uint8 | |||
| // column ["segmentation"]/["keypoints"] with datatype=float32 | |||
| // column ["iscrowd"]/["num_keypoints"] with datatype=uint32 | |||
| Status CocoOp::LoadSimpleTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image, | |||
| std::shared_ptr<Tensor> coordinate, TensorRow *trow) { | |||
| std::shared_ptr<Tensor> item; | |||
| std::vector<uint32_t> item_queue; | |||
| auto itr_item = simple_item_map_.find(image_id); | |||
| if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); | |||
| item_queue = itr_item->second; | |||
| std::vector<dsize_t> bbox_dim = {static_cast<dsize_t>(item_queue.size()), 1}; | |||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&item, data_schema_->column(2).tensorImpl(), TensorShape(bbox_dim), | |||
| data_schema_->column(2).type(), | |||
| reinterpret_cast<unsigned char *>(&item_queue[0]))); | |||
| (*trow) = {std::move(image), std::move(coordinate), std::move(item)}; | |||
| return Status::OK(); | |||
| } | |||
| // When task is "Panoptic", user can get data with five columns: | |||
| // column ["image"] with datatype=uint8 | |||
| // column ["bbox"] with datatype=float32 | |||
| // column ["category_id"] with datatype=uint32 | |||
| // column ["iscrowd"] with datatype=uint32 | |||
| // column ["area"] with datattype=uint32 | |||
| Status CocoOp::LoadMixTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image, | |||
| std::shared_ptr<Tensor> coordinate, TensorRow *trow) { | |||
| std::shared_ptr<Tensor> category_id, iscrowd, area; | |||
| std::vector<uint32_t> category_id_row; | |||
| std::vector<uint32_t> iscrowd_row; | |||
| std::vector<uint32_t> area_row; | |||
| auto itr_item = simple_item_map_.find(image_id); | |||
| if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); | |||
| std::vector<uint32_t> annotation = itr_item->second; | |||
| for (int64_t i = 0; i < annotation.size(); i++) { | |||
| if (i % 3 == 0) { | |||
| category_id_row.push_back(annotation[i]); | |||
| } else if (i % 3 == 1) { | |||
| iscrowd_row.push_back(annotation[i]); | |||
| } else if (i % 3 == 2) { | |||
| area_row.push_back(annotation[i]); | |||
| } | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateTensor( | |||
| &category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}), | |||
| data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&category_id_row[0]))); | |||
| RETURN_IF_NOT_OK(Tensor::CreateTensor( | |||
| &iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), | |||
| data_schema_->column(3).type(), reinterpret_cast<unsigned char *>(&iscrowd_row[0]))); | |||
| RETURN_IF_NOT_OK(Tensor::CreateTensor( | |||
| &area, data_schema_->column(4).tensorImpl(), TensorShape({static_cast<dsize_t>(area_row.size()), 1}), | |||
| data_schema_->column(4).type(), reinterpret_cast<unsigned char *>(&area_row[0]))); | |||
| (*trow) = {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)}; | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||
| TensorRow trow; | |||
| for (const uint64_t &key : keys) { | |||
| RETURN_IF_NOT_OK(this->LoadTensorRow(image_ids_[key], &trow)); | |||
| deq->push_back(std::move(trow)); | |||
| } | |||
| (*db)->set_tensor_table(std::move(deq)); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::WorkerEntry(int32_t worker_id) { | |||
| TaskManager::FindMe()->Post(); | |||
| int64_t buffer_id = worker_id; | |||
| std::unique_ptr<IOBlock> io_block; | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||
| while (io_block != nullptr) { | |||
| if (io_block->eoe() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| buffer_id = worker_id; | |||
| } else if (io_block->eof() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| } else { | |||
| std::vector<int64_t> keys; | |||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||
| if (keys.empty() == true) return Status::OK(); | |||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||
| buffer_id += num_workers_; | |||
| } | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||
| } | |||
| RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); | |||
| } | |||
| template <typename T> | |||
| Status CocoOp::SearchNodeInJson(nlohmann::json input_tree, std::string node_name, T *output_node) { | |||
| auto node = input_tree.find(node_name); | |||
| if (node == input_tree.end()) RETURN_STATUS_UNEXPECTED("Invalid node found in json : " + node_name); | |||
| (*output_node) = *node; | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::ParseAnnotationIds() { | |||
| std::ifstream in(annotation_path_); | |||
| nlohmann::json js; | |||
| in >> js; | |||
| std::vector<std::string> image_que; | |||
| nlohmann::json image_list; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonImages), &image_list)); | |||
| RETURN_IF_NOT_OK(ImageColumnLoad(image_list, &image_que)); | |||
| nlohmann::json annotations_list; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonAnnotations), &annotations_list)); | |||
| for (auto annotation : annotations_list) { | |||
| int32_t image_id = 0, id = 0; | |||
| std::string file_name; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonAnnoImageId), &image_id)); | |||
| auto itr_file = image_index_.find(image_id); | |||
| if (itr_file == image_index_.end()) | |||
| RETURN_STATUS_UNEXPECTED("Invalid image id of annotations : " + std::to_string(image_id)); | |||
| file_name = itr_file->second; | |||
| switch (task_type_) { | |||
| case TaskType::Detection: | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id)); | |||
| RETURN_IF_NOT_OK(DetectionColumnLoad(annotation, file_name, id)); | |||
| break; | |||
| case TaskType::Stuff: | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id)); | |||
| RETURN_IF_NOT_OK(StuffColumnLoad(annotation, file_name, id)); | |||
| break; | |||
| case TaskType::Keypoint: | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id)); | |||
| RETURN_IF_NOT_OK(KeypointColumnLoad(annotation, file_name, id)); | |||
| break; | |||
| case TaskType::Panoptic: | |||
| RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id)); | |||
| break; | |||
| default: | |||
| RETURN_STATUS_UNEXPECTED("Invalid task type"); | |||
| } | |||
| } | |||
| for (auto img : image_que) { | |||
| if (coordinate_map_.find(img) != coordinate_map_.end()) image_ids_.push_back(img); | |||
| } | |||
| if (task_type_ == TaskType::Detection || task_type_ == TaskType::Panoptic) { | |||
| nlohmann::json node_categories; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonCategories), &node_categories)); | |||
| RETURN_IF_NOT_OK(CategoriesColumnLoad(node_categories)); | |||
| } | |||
| num_rows_ = image_ids_.size(); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::ImageColumnLoad(nlohmann::json image_tree, std::vector<std::string> *image_vec) { | |||
| if (image_tree.size() == 0) { | |||
| RETURN_STATUS_UNEXPECTED("No images found in " + annotation_path_); | |||
| } | |||
| for (auto img : image_tree) { | |||
| std::string file_name; | |||
| int32_t id = 0; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(img, std::string(kJsonImagesFileName), &file_name)); | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(img, std::string(kJsonId), &id)); | |||
| image_index_[id] = file_name; | |||
| image_vec->push_back(file_name); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::DetectionColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, | |||
| const int32_t &unique_id) { | |||
| std::vector<float> bbox; | |||
| nlohmann::json node_bbox; | |||
| uint32_t category_id = 0, iscrowd = 0; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoBbox), &node_bbox)); | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCategoryId), &category_id)); | |||
| auto node_iscrowd = annotation_tree.find(kJsonAnnoIscrowd); | |||
| if (node_iscrowd != annotation_tree.end()) iscrowd = *node_iscrowd; | |||
| bbox.insert(bbox.end(), node_bbox.begin(), node_bbox.end()); | |||
| coordinate_map_[image_file].push_back(bbox); | |||
| simple_item_map_[image_file].push_back(category_id); | |||
| simple_item_map_[image_file].push_back(iscrowd); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::StuffColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, | |||
| const int32_t &unique_id) { | |||
| uint32_t iscrowd = 0; | |||
| std::vector<float> bbox; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoIscrowd), &iscrowd)); | |||
| simple_item_map_[image_file].push_back(iscrowd); | |||
| nlohmann::json segmentation; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoSegmentation), &segmentation)); | |||
| if (iscrowd == 0) { | |||
| for (auto item : segmentation) { | |||
| if (bbox.size() > 0) bbox.clear(); | |||
| bbox.insert(bbox.end(), item.begin(), item.end()); | |||
| coordinate_map_[image_file].push_back(bbox); | |||
| } | |||
| } else if (iscrowd == 1) { | |||
| nlohmann::json segmentation_count; | |||
| RETURN_IF_NOT_OK(SearchNodeInJson(segmentation, std::string(kJsonAnnoCounts), &segmentation_count)); | |||
| bbox.insert(bbox.end(), segmentation_count.begin(), segmentation_count.end()); | |||
| coordinate_map_[image_file].push_back(bbox); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::KeypointColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, | |||
| const int32_t &unique_id) { | |||
| auto itr_num_keypoint = annotation_tree.find(kJsonAnnoNumKeypoints); | |||
| if (itr_num_keypoint == annotation_tree.end()) | |||
| RETURN_STATUS_UNEXPECTED("No num_keypoint found in annotations where id: " + std::to_string(unique_id)); | |||
| simple_item_map_[image_file].push_back(*itr_num_keypoint); | |||
| auto itr_keypoint = annotation_tree.find(kJsonAnnoKeypoints); | |||
| if (itr_keypoint == annotation_tree.end()) | |||
| RETURN_STATUS_UNEXPECTED("No keypoint found in annotations where id: " + std::to_string(unique_id)); | |||
| coordinate_map_[image_file].push_back(*itr_keypoint); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::PanopticColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, | |||
| const int32_t &image_id) { | |||
| auto itr_segments = annotation_tree.find(kJsonAnnoSegmentsInfo); | |||
| if (itr_segments == annotation_tree.end()) | |||
| RETURN_STATUS_UNEXPECTED("No segments_info found in annotations where image_id: " + std::to_string(image_id)); | |||
| for (auto info : *itr_segments) { | |||
| std::vector<float> bbox; | |||
| auto itr_bbox = info.find(kJsonAnnoBbox); | |||
| if (itr_bbox == info.end()) | |||
| RETURN_STATUS_UNEXPECTED("No bbox found in segments_info where image_id: " + std::to_string(image_id)); | |||
| bbox.insert(bbox.end(), itr_bbox->begin(), itr_bbox->end()); | |||
| coordinate_map_[image_file].push_back(bbox); | |||
| auto itr_category_id = info.find(kJsonAnnoCategoryId); | |||
| if (itr_category_id == info.end()) | |||
| RETURN_STATUS_UNEXPECTED("No category_id found in segments_info where image_id: " + std::to_string(image_id)); | |||
| auto itr_iscrowd = info.find(kJsonAnnoIscrowd); | |||
| if (itr_iscrowd == info.end()) | |||
| RETURN_STATUS_UNEXPECTED("No iscrowd found in segments_info where image_id: " + std::to_string(image_id)); | |||
| auto itr_area = info.find(kJsonAnnoArea); | |||
| if (itr_area == info.end()) | |||
| RETURN_STATUS_UNEXPECTED("No area found in segments_info where image_id: " + std::to_string(image_id)); | |||
| simple_item_map_[image_file].push_back(*itr_category_id); | |||
| simple_item_map_[image_file].push_back(*itr_iscrowd); | |||
| simple_item_map_[image_file].push_back(*itr_area); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::CategoriesColumnLoad(nlohmann::json categories_tree) { | |||
| if (categories_tree.size() == 0) RETURN_STATUS_UNEXPECTED("No categories found in " + annotation_path_); | |||
| for (auto category : categories_tree) { | |||
| int32_t id = 0; | |||
| std::string name; | |||
| std::vector<int32_t> label_info; | |||
| auto itr_id = category.find(kJsonId); | |||
| if (itr_id == category.end()) RETURN_STATUS_UNEXPECTED("No id found in categories of " + annotation_path_); | |||
| id = *itr_id; | |||
| label_info.push_back(id); | |||
| auto itr_name = category.find(kJsonCategoriesName); | |||
| if (itr_name == category.end()) | |||
| RETURN_STATUS_UNEXPECTED("No name found in categories where id: " + std::to_string(id)); | |||
| name = *itr_name; | |||
| if (task_type_ == TaskType::Panoptic) { | |||
| auto itr_isthing = category.find(kJsonCategoriesIsthing); | |||
| if (itr_isthing == category.end()) | |||
| RETURN_STATUS_UNEXPECTED("No isthing found in categories of " + annotation_path_); | |||
| label_info.push_back(*itr_isthing); | |||
| } | |||
| label_index_.emplace_back(std::make_pair(name, label_info)); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::InitSampler() { | |||
| RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this)); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::LaunchThreadsAndInitOp() { | |||
| if (tree_ == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("tree_ not set"); | |||
| } | |||
| RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); | |||
| RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); | |||
| RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CocoOp::WorkerEntry, this, std::placeholders::_1))); | |||
| TaskManager::FindMe()->Post(); | |||
| RETURN_IF_NOT_OK(this->ParseAnnotationIds()); | |||
| RETURN_IF_NOT_OK(this->InitSampler()); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) { | |||
| std::ifstream fs; | |||
| fs.open(path, std::ios::binary | std::ios::in); | |||
| if (fs.fail()) { | |||
| RETURN_STATUS_UNEXPECTED("Fail to open file: " + path); | |||
| } | |||
| int64_t num_elements = fs.seekg(0, std::ios::end).tellg(); | |||
| (void)fs.seekg(0, std::ios::beg); | |||
| RETURN_IF_NOT_OK( | |||
| Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type())); | |||
| (void)fs.read(reinterpret_cast<char *>((*tensor)->GetMutableBuffer()), num_elements); | |||
| fs.close(); | |||
| if (decode_ == true) { | |||
| Status rc = Decode(*tensor, tensor); | |||
| if (rc.IsError()) { | |||
| RETURN_STATUS_UNEXPECTED("fail to decode file: " + path); | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::CountTotalRows(const std::string &dir, const std::string &file, const std::string &task, | |||
| int64_t *count) { | |||
| std::shared_ptr<CocoOp> op; | |||
| RETURN_IF_NOT_OK(Builder().SetDir(dir).SetFile(file).SetTask(task).Build(&op)); | |||
| RETURN_IF_NOT_OK(op->ParseAnnotationIds()); | |||
| *count = static_cast<int64_t>(op->image_ids_.size()); | |||
| return Status::OK(); | |||
| } | |||
| Status CocoOp::GetClassIndexing(const std::string &dir, const std::string &file, const std::string &task, | |||
| std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) { | |||
| std::shared_ptr<CocoOp> op; | |||
| RETURN_IF_NOT_OK(Builder().SetDir(dir).SetFile(file).SetTask(task).Build(&op)); | |||
| RETURN_IF_NOT_OK(op->ParseAnnotationIds()); | |||
| *output_class_indexing = op->label_index_; | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,324 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_ENGINE_DATASETOPS_SOURCE_COCO_OP_H_ | |||
| #define DATASET_ENGINE_DATASETOPS_SOURCE_COC0_OP_H_ | |||
| #include <map> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "dataset/core/tensor.h" | |||
| #include "dataset/engine/data_buffer.h" | |||
| #include "dataset/engine/data_schema.h" | |||
| #include "dataset/engine/datasetops/parallel_op.h" | |||
| #include "dataset/engine/datasetops/source/io_block.h" | |||
| #include "dataset/engine/datasetops/source/sampler/sampler.h" | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include "dataset/util/path.h" | |||
| #include "dataset/util/queue.h" | |||
| #include "dataset/util/status.h" | |||
| #include "dataset/util/wait_post.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Forward declares | |||
| template <typename T> | |||
| class Queue; | |||
| using CoordinateRow = std::vector<std::vector<float>>; | |||
| class CocoOp : public ParallelOp, public RandomAccessOp { | |||
| public: | |||
| enum class TaskType { Detection = 0, Stuff = 1, Panoptic = 2, Keypoint = 3 }; | |||
| class Builder { | |||
| public: | |||
| // Constructor for Builder class of ImageFolderOp | |||
| // @param uint32_t numWrks - number of parallel workers | |||
| // @param dir - directory folder got ImageNetFolder | |||
| Builder(); | |||
| // Destructor. | |||
| ~Builder() = default; | |||
| // Setter method. | |||
| // @param const std::string & build_dir | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetDir(const std::string &build_dir) { | |||
| builder_dir_ = build_dir; | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param const std::string & build_file | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetFile(const std::string &build_file) { | |||
| builder_file_ = build_file; | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param const std::string & task_type | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetTask(const std::string &task_type) { | |||
| if (task_type == "Detection") { | |||
| builder_task_type_ = TaskType::Detection; | |||
| } else if (task_type == "Stuff") { | |||
| builder_task_type_ = TaskType::Stuff; | |||
| } else if (task_type == "Panoptic") { | |||
| builder_task_type_ = TaskType::Panoptic; | |||
| } else if (task_type == "Keypoint") { | |||
| builder_task_type_ = TaskType::Keypoint; | |||
| } | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param int32_t num_workers | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetNumWorkers(int32_t num_workers) { | |||
| builder_num_workers_ = num_workers; | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param int32_t op_connector_size | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetOpConnectorSize(int32_t op_connector_size) { | |||
| builder_op_connector_size_ = op_connector_size; | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param int32_t rows_per_buffer | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { | |||
| builder_rows_per_buffer_ = rows_per_buffer; | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param std::shared_ptr<Sampler> sampler | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetSampler(std::shared_ptr<Sampler> sampler) { | |||
| builder_sampler_ = std::move(sampler); | |||
| return *this; | |||
| } | |||
| // Setter method. | |||
| // @param bool do_decode | |||
| // @return Builder setter method returns reference to the builder. | |||
| Builder &SetDecode(bool do_decode) { | |||
| builder_decode_ = do_decode; | |||
| return *this; | |||
| } | |||
| // Check validity of input args | |||
| // @return = The error code return | |||
| Status SanityCheck(); | |||
| // The builder "Build" method creates the final object. | |||
| // @param std::shared_ptr<CocoOp> *op - DatasetOp | |||
| // @return - The error code return | |||
| Status Build(std::shared_ptr<CocoOp> *op); | |||
| private: | |||
| bool builder_decode_; | |||
| std::string builder_dir_; | |||
| std::string builder_file_; | |||
| TaskType builder_task_type_; | |||
| int32_t builder_num_workers_; | |||
| int32_t builder_op_connector_size_; | |||
| int32_t builder_rows_per_buffer_; | |||
| std::shared_ptr<Sampler> builder_sampler_; | |||
| std::unique_ptr<DataSchema> builder_schema_; | |||
| }; | |||
| // Constructor | |||
| // @param TaskType task_type - task type of Coco | |||
| // @param std::string image_folder_path - image folder path of Coco | |||
| // @param std::string annotation_path - annotation json path of Coco | |||
| // @param int32_t num_workers - number of workers reading images in parallel | |||
| // @param int32_t rows_per_buffer - number of images (rows) in each buffer | |||
| // @param int32_t queue_size - connector queue size | |||
| // @param int64_t num_samples - number of samples to read | |||
| // @param bool decode - whether to decode images | |||
| // @param std::unique_ptr<DataSchema> data_schema - the schema of the Coco dataset | |||
| // @param std::shared_ptr<Sampler> sampler - sampler tells CocoOp what to read | |||
| CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, | |||
| int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<Sampler> sampler); | |||
| // Destructor | |||
| ~CocoOp() = default; | |||
| // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector | |||
| // @param int32_t workerId - id of each worker | |||
| // @return Status - The error code return | |||
| Status WorkerEntry(int32_t worker_id) override; | |||
| // Main Loop of CocoOp | |||
| // Master thread: Fill IOBlockQueue, then goes to sleep | |||
| // Worker thread: pulls IOBlock from IOBlockQueue, work on it the put buffer to mOutConnector | |||
| // @return Status - The error code return | |||
| Status operator()() override; | |||
| // A print method typically used for debugging | |||
| // @param out | |||
| // @param show_all | |||
| void Print(std::ostream &out, bool show_all) const override; | |||
| // @param const std::string &dir - Coco image dir path | |||
| // @param const std::string &file - Coco json file path | |||
| // @param const std::string &task - task mode of Coco task | |||
| // @param int64_t numSamples - samples number of CocoDataset | |||
| // @param int64_t *count - output rows number of CocoDataset | |||
| static Status CountTotalRows(const std::string &dir, const std::string &task_type, const std::string &task_mode, | |||
| int64_t *count); | |||
| // @param const std::string &dir - Coco image dir path | |||
| // @param const std::string &file - Coco json file path | |||
| // @param const std::string &task - task mode of Coco task | |||
| // @param int64_t numSamples - samples number of CocoDataset | |||
| // @param std::map<std::string, int32_t> *output_class_indexing - output class index of CocoDataset | |||
| static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode, | |||
| std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing); | |||
| private: | |||
| // Initialize Sampler, calls sampler->Init() within | |||
| // @return Status - The error code return | |||
| Status InitSampler(); | |||
| // Load a tensor row according to image id | |||
| // @param std::string image_id - image id | |||
| // @param TensorRow row - image & target read into this tensor row | |||
| // @return Status - The error code return | |||
| Status LoadTensorRow(const std::string &image_id, TensorRow *row); | |||
| // Load a tensor row with vector which a vector to a tensor | |||
| // @param const std::string &image_id - image is | |||
| // @param std::shared_ptr<Tensor> image - image tensor | |||
| // @param std::shared_ptr<Tensor> coordinate - coordinate tensor | |||
| // @param TensorRow row - image & target read into this tensor row | |||
| // @return Status - The error code return | |||
| Status LoadDetectionTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image, | |||
| std::shared_ptr<Tensor> coordinate, TensorRow *trow); | |||
| // Load a tensor row with vector which a vector to a tensor | |||
| // @param const std::string &image_id - image is | |||
| // @param std::shared_ptr<Tensor> image - image tensor | |||
| // @param std::shared_ptr<Tensor> coordinate - coordinate tensor | |||
| // @param TensorRow row - image & target read into this tensor row | |||
| // @return Status - The error code return | |||
| Status LoadSimpleTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image, | |||
| std::shared_ptr<Tensor> coordinate, TensorRow *trow); | |||
| // Load a tensor row with vector which a vector to multi-tensor | |||
| // @param const std::string &image_id - image is | |||
| // @param std::shared_ptr<Tensor> image - image tensor | |||
| // @param std::shared_ptr<Tensor> coordinate - coordinate tensor | |||
| // @param TensorRow row - image & target read into this tensor row | |||
| // @return Status - The error code return | |||
| Status LoadMixTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image, | |||
| std::shared_ptr<Tensor> coordinate, TensorRow *trow); | |||
| // @param const std::string &path - path to the image file | |||
| // @param const ColDescriptor &col - contains tensor implementation and datatype | |||
| // @param std::shared_ptr<Tensor> tensor - return | |||
| // @return Status - The error code return | |||
| Status ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor); | |||
| // @param const std::vector<uint64_t> &keys - keys in ioblock | |||
| // @param std::unique_ptr<DataBuffer> db | |||
| // @return Status - The error code return | |||
| Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db); | |||
| // Read annotation from Annotation folder | |||
| // @return Status - The error code return | |||
| Status ParseAnnotationIds(); | |||
| // @param const std::shared_ptr<Tensor> &sample_ids - sample ids of tensor | |||
| // @param std::vector<int64_t> *keys - image id | |||
| // @return Status - The error code return | |||
| Status TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys); | |||
| // Called first when function is called | |||
| // @return Status - The error code return | |||
| Status LaunchThreadsAndInitOp(); | |||
| // Reset dataset state | |||
| // @return Status - The error code return | |||
| Status Reset() override; | |||
| // @param nlohmann::json image_tree - image tree of json | |||
| // @param std::vector<std::string> *image_vec - image id list of json | |||
| // @return Status - The error code return | |||
| Status ImageColumnLoad(nlohmann::json image_tree, std::vector<std::string> *image_vec); | |||
| // @param nlohmann::json categories_tree - categories tree of json | |||
| // return Status - The error code return | |||
| Status CategoriesColumnLoad(nlohmann::json categories_tree); | |||
| // @param nlohmann::json categories_tree - categories tree of json | |||
| // @param const std::string &image_file - current image name in annotation | |||
| // @param const int32_t &id - current unique id of annotation | |||
| // @return Status - The error code return | |||
| Status DetectionColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id); | |||
| // @param nlohmann::json categories_tree - categories tree of json | |||
| // @param const std::string &image_file - current image name in annotation | |||
| // @param const int32_t &id - current unique id of annotation | |||
| // @return Status - The error code return | |||
| Status StuffColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id); | |||
| // @param nlohmann::json categories_tree - categories tree of json | |||
| // @param const std::string &image_file - current image name in annotation | |||
| // @param const int32_t &id - current unique id of annotation | |||
| // @return Status - The error code return | |||
| Status KeypointColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id); | |||
| // @param nlohmann::json categories_tree - categories tree of json | |||
| // @param const std::string &image_file - current image name in annotation | |||
| // @param const int32_t &image_id - current unique id of annotation | |||
| // @return Status - The error code return | |||
| Status PanopticColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &image_id); | |||
| template <typename T> | |||
| Status SearchNodeInJson(nlohmann::json input_tree, std::string node_name, T *output_node); | |||
| bool decode_; | |||
| int64_t row_cnt_; | |||
| int64_t buf_cnt_; | |||
| std::string image_folder_path_; | |||
| std::string annotation_path_; | |||
| TaskType task_type_; | |||
| int32_t rows_per_buffer_; | |||
| std::shared_ptr<Sampler> sampler_; | |||
| std::unique_ptr<DataSchema> data_schema_; | |||
| WaitPost wp_; | |||
| std::vector<std::string> image_ids_; | |||
| std::map<int32_t, std::string> image_index_; | |||
| QueueList<std::unique_ptr<IOBlock>> io_block_queues_; | |||
| std::vector<std::pair<std::string, std::vector<int32_t>>> label_index_; | |||
| std::map<std::string, CoordinateRow> coordinate_map_; | |||
| std::map<std::string, std::vector<uint32_t>> simple_item_map_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_ENGINE_DATASETOPS_SOURCE_Coco_OP_H_ | |||
| @@ -20,8 +20,8 @@ can also create samplers with this module to sample data. | |||
| from .core.configuration import config | |||
| from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, \ | |||
| GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CelebADataset, TextFileDataset, \ | |||
| Schema, Shuffle, zip, RandomDataset | |||
| GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CocoDataset, CelebADataset,\ | |||
| TextFileDataset, Schema, Shuffle, zip, RandomDataset | |||
| from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \ | |||
| WeightedRandomSampler, Sampler | |||
| from .engine.serializer_deserializer import serialize, deserialize, show | |||
| @@ -30,5 +30,5 @@ from .engine.graphdata import GraphData | |||
| __all__ = ["config", "ImageFolderDatasetV2", "MnistDataset", | |||
| "MindDataset", "GeneratorDataset", "TFRecordDataset", | |||
| "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", | |||
| "VOCDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", "RandomSampler", | |||
| "VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", "RandomSampler", | |||
| "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "zip", "GraphData"] | |||
| @@ -33,5 +33,5 @@ __all__ = ["config", "ConfigurationManager", "zip", | |||
| "ImageFolderDatasetV2", "MnistDataset", | |||
| "MindDataset", "GeneratorDataset", "TFRecordDataset", | |||
| "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset", | |||
| "VOCDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", | |||
| "VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", | |||
| "RandomSampler", "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler"] | |||
| @@ -33,7 +33,7 @@ import copy | |||
| import numpy as np | |||
| from mindspore._c_dataengine import DataType, TFReaderOp, ImageFolderOp, CifarOp, MnistOp, ManifestOp, \ | |||
| MindRecordOp, TextFileOp, VOCOp, CBatchInfo | |||
| MindRecordOp, TextFileOp, VOCOp, CocoOp, CBatchInfo | |||
| from mindspore._c_expression import typing | |||
| from mindspore import log as logger | |||
| @@ -42,8 +42,9 @@ from .iterators import DictIterator, TupleIterator | |||
| from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \ | |||
| check_rename, \ | |||
| check_take, check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \ | |||
| check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \ | |||
| check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset, check_concat, check_split | |||
| check_tfrecorddataset, check_vocdataset, check_cocodataset, check_celebadataset, check_minddataset,\ | |||
| check_generatordataset, check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset, check_concat,\ | |||
| check_split | |||
| from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist | |||
| try: | |||
| @@ -3867,10 +3868,14 @@ class VOCDataset(MappableDataset): | |||
| """ | |||
| A source dataset for reading and parsing VOC dataset. | |||
| The generated dataset has two columns ['image', 'target']. | |||
| The shape of both column is [image_size] if decode flag is False, or [H, W, C] | |||
| The generated dataset has two columns : | |||
| task='Detection' : ['image', 'annotation']. | |||
| task='Segmentation' : ['image', 'target'] | |||
| The shape of both column 'image' and 'target' is [image_size] if decode flag is False, or [H, W, C] | |||
| otherwise. | |||
| The type of both tensor is uint8. | |||
| The type of both tensor 'image' and 'target' is uint8. | |||
| The type of tensor 'annotation' is uint32. | |||
| This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table | |||
| below shows what input args are allowed and their expected behavior. | |||
| @@ -4035,6 +4040,163 @@ class VOCDataset(MappableDataset): | |||
| return self.sampler.is_sharded() | |||
| class CocoDataset(MappableDataset): | |||
| """ | |||
| A source dataset for reading and parsing COCO dataset. | |||
| CocoDataset support four kinds of task: | |||
| 2017 Train/Val/Test Detection, Keypoints, Stuff, Panoptic. | |||
| The generated dataset has multi-columns : | |||
| task = 'Detection' : column [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], | |||
| ['iscrowd', dtype=uint32]]. | |||
| task = 'Stuff' : column [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd',dtype=uint32]]. | |||
| task = 'Keypoint' : column [['image', dtype=uint8], ['keypoints', dtype=float32], ['num_keypoints', dtype=uint32]]. | |||
| task = 'Panoptic' : column [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], | |||
| ['iscrowd', dtype=uint32], ['area', dtype=uint32]]. | |||
| This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table | |||
| below shows what input args are allowed and their expected behavior. | |||
| .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle' | |||
| :widths: 25 25 50 | |||
| :header-rows: 1 | |||
| * - Parameter 'sampler' | |||
| - Parameter 'shuffle' | |||
| - Expected Order Behavior | |||
| * - None | |||
| - None | |||
| - random order | |||
| * - None | |||
| - True | |||
| - random order | |||
| * - None | |||
| - False | |||
| - sequential order | |||
| * - Sampler object | |||
| - None | |||
| - order defined by sampler | |||
| * - Sampler object | |||
| - True | |||
| - not allowed | |||
| * - Sampler object | |||
| - False | |||
| - not allowed | |||
| Args: | |||
| dataset_dir (str): Path to the root directory that contains the dataset. | |||
| annotation_file (str): Path to the annotation json. | |||
| task (str): Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint' | |||
| (default='Detection') | |||
| num_samples (int, optional): The number of images to be included in the dataset | |||
| (default=None, all images). | |||
| num_parallel_workers (int, optional): Number of workers to read the data | |||
| (default=None, number set in the config). | |||
| shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected | |||
| order behavior shown in the table). | |||
| decode (bool, optional): Decode the images after reading (default=False). | |||
| sampler (Sampler, optional): Object used to choose samples from the dataset | |||
| (default=None, expected order behavior shown in the table). | |||
| num_shards (int, optional): Number of shards that the dataset should be divided | |||
| into (default=None). | |||
| shard_id (int, optional): The shard ID within num_shards (default=None). This | |||
| argument should be specified only when num_shards is also specified. | |||
| Raises: | |||
| RuntimeError: If sampler and shuffle are specified at the same time. | |||
| RuntimeError: If sampler and sharding are specified at the same time. | |||
| RuntimeError: If num_shards is specified but shard_id is None. | |||
| RuntimeError: If shard_id is specified but num_shards is None. | |||
| RuntimeError: If parse json file failed. | |||
| ValueError: If task is not in ['Detection', 'Stuff', 'Panoptic', 'Keypoint']. | |||
| ValueError: If annotation_file is not exist. | |||
| ValueError: If dataset_dir is not exist. | |||
| ValueError: If shard_id is invalid (< 0 or >= num_shards). | |||
| Examples: | |||
| >>> import mindspore.dataset as ds | |||
| >>> dataset_dir = "/path/to/coco_dataset_directory/image_folder" | |||
| >>> annotation_file = "/path/to/coco_dataset_directory/annotation_folder/annotation.json" | |||
| >>> # 1) read COCO data for Detection task | |||
| >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Detection') | |||
| >>> # 2) read COCO data for Stuff task | |||
| >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Stuff') | |||
| >>> # 3) read COCO data for Panoptic task | |||
| >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Panoptic') | |||
| >>> # 4) read COCO data for Keypoint task | |||
| >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Keypoint') | |||
| >>> # in COCO dataset, each dictionary has keys "image" and "annotation" | |||
| """ | |||
| @check_cocodataset | |||
| def __init__(self, dataset_dir, annotation_file, task="Detection", num_samples=None, num_parallel_workers=None, | |||
| shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None): | |||
| super().__init__(num_parallel_workers) | |||
| self.dataset_dir = dataset_dir | |||
| self.annotation_file = annotation_file | |||
| self.task = task | |||
| self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id) | |||
| self.num_samples = num_samples | |||
| self.decode = decode | |||
| self.shuffle_level = shuffle | |||
| self.num_shards = num_shards | |||
| self.shard_id = shard_id | |||
| def get_args(self): | |||
| args = super().get_args() | |||
| args["dataset_dir"] = self.dataset_dir | |||
| args["annotation_file"] = self.annotation_file | |||
| args["task"] = self.task | |||
| args["num_samples"] = self.num_samples | |||
| args["sampler"] = self.sampler | |||
| args["decode"] = self.decode | |||
| args["shuffle"] = self.shuffle_level | |||
| args["num_shards"] = self.num_shards | |||
| args["shard_id"] = self.shard_id | |||
| return args | |||
| def get_dataset_size(self): | |||
| """ | |||
| Get the number of batches in an epoch. | |||
| Return: | |||
| Number, number of batches. | |||
| """ | |||
| num_rows = CocoOp.get_num_rows(self.dataset_dir, self.annotation_file, self.task) | |||
| rows_per_shard = get_num_rows(num_rows, self.num_shards) | |||
| rows_from_sampler = self._get_sampler_dataset_size() | |||
| if rows_from_sampler is None: | |||
| return rows_per_shard | |||
| return min(rows_from_sampler, rows_per_shard) | |||
| def get_class_indexing(self): | |||
| """ | |||
| Get the class index. | |||
| Return: | |||
| Dict, A str-to-int mapping from label name to index. | |||
| """ | |||
| if self.task not in {"Detection", "Panoptic"}: | |||
| raise NotImplementedError("Only 'Detection' and 'Panoptic' support get_class_indexing.") | |||
| class_index = CocoOp.get_class_indexing(self.dataset_dir, self.annotation_file, self.task) | |||
| return dict(class_index) | |||
| def is_shuffled(self): | |||
| if self.shuffle_level is None: | |||
| return True | |||
| return self.shuffle_level or self.sampler.is_shuffled() | |||
| def is_sharded(self): | |||
| if self.num_shards is not None: | |||
| return self.num_shards > 1 | |||
| return self.sampler.is_sharded() | |||
| class CelebADataset(MappableDataset): | |||
| """ | |||
| A source dataset for reading and parsing CelebA dataset.Only support list_attr_celeba.txt currently. | |||
| @@ -165,6 +165,8 @@ class Iterator: | |||
| op_type = OpName.MANIFEST | |||
| elif isinstance(dataset, de.VOCDataset): | |||
| op_type = OpName.VOC | |||
| elif isinstance(dataset, de.CocoDataset): | |||
| op_type = OpName.COCO | |||
| elif isinstance(dataset, de.Cifar10Dataset): | |||
| op_type = OpName.CIFAR10 | |||
| elif isinstance(dataset, de.Cifar100Dataset): | |||
| @@ -299,6 +299,12 @@ def create_node(node): | |||
| node.get('num_samples'), node.get('num_parallel_workers'), node.get('shuffle'), | |||
| node.get('decode'), sampler, node.get('num_shards'), node.get('shard_id')) | |||
| elif dataset_op == 'CocoDataset': | |||
| sampler = construct_sampler(node.get('sampler')) | |||
| pyobj = pyclass(node['dataset_dir'], node.get('annotation_file'), node.get('task'), node.get('num_samples'), | |||
| node.get('num_parallel_workers'), node.get('shuffle'), node.get('decode'), sampler, | |||
| node.get('num_shards'), node.get('shard_id')) | |||
| elif dataset_op == 'CelebADataset': | |||
| sampler = construct_sampler(node.get('sampler')) | |||
| pyobj = pyclass(node['dataset_dir'], node.get('num_parallel_workers'), node.get('shuffle'), | |||
| @@ -522,6 +522,49 @@ def check_vocdataset(method): | |||
| return new_method | |||
| def check_cocodataset(method): | |||
| """A wrapper that wrap a parameter checker to the original Dataset(CocoDataset).""" | |||
| @wraps(method) | |||
| def new_method(*args, **kwargs): | |||
| param_dict = make_param_dict(method, args, kwargs) | |||
| nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id'] | |||
| nreq_param_bool = ['shuffle', 'decode'] | |||
| # check dataset_dir; required argument | |||
| dataset_dir = param_dict.get('dataset_dir') | |||
| if dataset_dir is None: | |||
| raise ValueError("dataset_dir is not provided.") | |||
| check_dataset_dir(dataset_dir) | |||
| # check annotation_file; required argument | |||
| annotation_file = param_dict.get('annotation_file') | |||
| if annotation_file is None: | |||
| raise ValueError("annotation_file is not provided.") | |||
| check_dataset_file(annotation_file) | |||
| # check task; required argument | |||
| task = param_dict.get('task') | |||
| if task is None: | |||
| raise ValueError("task is not provided.") | |||
| if not isinstance(task, str): | |||
| raise ValueError("task is not str type.") | |||
| if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint'}: | |||
| raise ValueError("Invalid task type") | |||
| check_param_type(nreq_param_int, param_dict, int) | |||
| check_param_type(nreq_param_bool, param_dict, bool) | |||
| check_sampler_shuffle_shard_options(param_dict) | |||
| return method(*args, **kwargs) | |||
| return new_method | |||
| def check_celebadataset(method): | |||
| """A wrapper that wrap a parameter checker to the original Dataset(CelebADataset).""" | |||
| @@ -71,6 +71,7 @@ SET(DE_UT_SRCS | |||
| jieba_tokenizer_op_test.cc | |||
| tokenizer_op_test.cc | |||
| gnn_graph_test.cc | |||
| coco_op_test.cc | |||
| ) | |||
| add_executable(de_ut_tests ${DE_UT_SRCS}) | |||
| @@ -0,0 +1,265 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <fstream> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "common/common.h" | |||
| #include "common/utils.h" | |||
| #include "dataset/core/client.h" | |||
| #include "dataset/core/global_context.h" | |||
| #include "dataset/engine/datasetops/source/coco_op.h" | |||
| #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/pk_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/random_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" | |||
| #include "dataset/util/de_error.h" | |||
| #include "dataset/util/path.h" | |||
| #include "dataset/util/status.h" | |||
| #include "gtest/gtest.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "securec.h" | |||
| namespace common = mindspore::common; | |||
| using namespace mindspore::dataset; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); | |||
| std::shared_ptr<RepeatOp> Repeat(int repeat_cnt); | |||
| std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); | |||
| class MindDataTestCocoOp : public UT::DatasetOpTesting { | |||
| protected: | |||
| }; | |||
| TEST_F(MindDataTestCocoOp, TestCocoDetection) { | |||
| // Start with an empty execution tree | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| std::string dataset_path, annotation_path; | |||
| dataset_path = datasets_root_path_ + "/testCOCO/train/"; | |||
| annotation_path = datasets_root_path_ + "/testCOCO/annotations/train.json"; | |||
| std::string task("Detection"); | |||
| std::shared_ptr<CocoOp> my_coco_op; | |||
| CocoOp::Builder builder; | |||
| Status rc = builder.SetDir(dataset_path) | |||
| .SetFile(annotation_path) | |||
| .SetTask(task) | |||
| .Build(&my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| MS_LOG(DEBUG) << "Launch tree and begin iteration."; | |||
| rc = my_tree->Prepare(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Launch(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; | |||
| //Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(DEBUG) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 6); | |||
| } | |||
| TEST_F(MindDataTestCocoOp, TestCocoStuff) { | |||
| // Start with an empty execution tree | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| std::string dataset_path, annotation_path; | |||
| dataset_path = datasets_root_path_ + "/testCOCO/train/"; | |||
| annotation_path = datasets_root_path_ + "/testCOCO/annotations/train.json"; | |||
| std::string task("Stuff"); | |||
| std::shared_ptr<CocoOp> my_coco_op; | |||
| CocoOp::Builder builder; | |||
| Status rc = builder.SetDir(dataset_path) | |||
| .SetFile(annotation_path) | |||
| .SetTask(task) | |||
| .Build(&my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| MS_LOG(DEBUG) << "Launch tree and begin iteration."; | |||
| rc = my_tree->Prepare(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Launch(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; | |||
| //Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(DEBUG) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 6); | |||
| } | |||
| TEST_F(MindDataTestCocoOp, TestCocoKeypoint) { | |||
| // Start with an empty execution tree | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| std::string dataset_path, annotation_path; | |||
| dataset_path = datasets_root_path_ + "/testCOCO/train/"; | |||
| annotation_path = datasets_root_path_ + "/testCOCO/annotations/key_point.json"; | |||
| std::string task("Keypoint"); | |||
| std::shared_ptr<CocoOp> my_coco_op; | |||
| CocoOp::Builder builder; | |||
| Status rc = builder.SetDir(dataset_path) | |||
| .SetFile(annotation_path) | |||
| .SetTask(task) | |||
| .Build(&my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| MS_LOG(DEBUG) << "Launch tree and begin iteration."; | |||
| rc = my_tree->Prepare(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Launch(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; | |||
| //Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(DEBUG) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 2); | |||
| } | |||
| TEST_F(MindDataTestCocoOp, TestCocoPanoptic) { | |||
| // Start with an empty execution tree | |||
| auto my_tree = std::make_shared<ExecutionTree>(); | |||
| std::string dataset_path, annotation_path; | |||
| dataset_path = datasets_root_path_ + "/testCOCO/train/"; | |||
| annotation_path = datasets_root_path_ + "/testCOCO/annotations/panoptic.json"; | |||
| std::string task("Panoptic"); | |||
| std::shared_ptr<CocoOp> my_coco_op; | |||
| CocoOp::Builder builder; | |||
| Status rc = builder.SetDir(dataset_path) | |||
| .SetFile(annotation_path) | |||
| .SetTask(task) | |||
| .Build(&my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssociateNode(my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->AssignRoot(my_coco_op); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| MS_LOG(DEBUG) << "Launch tree and begin iteration."; | |||
| rc = my_tree->Prepare(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| rc = my_tree->Launch(); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| // Start the loop of reading tensors from our pipeline | |||
| DatasetIterator di(my_tree); | |||
| TensorRow tensor_list; | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| int row_count = 0; | |||
| while (!tensor_list.empty()) { | |||
| MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; | |||
| //Display the tensor by calling the printer on it | |||
| for (int i = 0; i < tensor_list.size(); i++) { | |||
| std::ostringstream ss; | |||
| ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; | |||
| MS_LOG(DEBUG) << "Tensor print: " << ss.str() << "."; | |||
| } | |||
| rc = di.FetchNextTensorRow(&tensor_list); | |||
| ASSERT_TRUE(rc.IsOk()); | |||
| row_count++; | |||
| } | |||
| ASSERT_EQ(row_count, 2); | |||
| } | |||
| @@ -0,0 +1 @@ | |||
| {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium", "data_created": "2017/09/01"}, "licenses": [{"url": "http://creativecommons.org/license/by-nc-sa/2.0/","id": 3,"name": "Attribution-Noncommercial License"}], "images": [{"license": 3, "file_name": "000000391895.jpg", "id": 391895}, {"license": 3, "file_name: "000000318219.jpg", "id": 318219}, {"license": 3, "file_name": "000000554625.jpg", "id": 554625}, {"license": 3, "file_name": "000000574769.jpg", "id": 574769}, {"license": 3, "file_name": "000000060623.jpg", "id": 60623}, {"license": 3, "file_name": "000000309022.jpg", "id": 309022}], "annotations": [{"segmentation": [[10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0]], "category_id": 1, "iscrowd": 0, "image_id": 391895, "bbox": [10,10,10,10], "area": 100, "id": 10000}, {"segmentation": [[20.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0]], "category_id": 2, "iscrowd": 0, "image_id": 318219, "bbox": [20,20,20,20], "area": 400, "id": 10001}, {"segmentation": [[40.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,40.0,41.0,42.0]], "category_id": 3, "iscrowd": 0, "image_id": 554625, "bbox": [30,30,30,30], "area": 900, "id": 10002}, {"segmentation": [[50.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0]], "category_id": 4, "iscrowd": 0, "image_id": 574769, "bbox": [40,40,40,40], "area": 1600, "id": 10003}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 5, "iscrowd": 0, "image_id": 60623, "bbox": [50,50,50,50], "area": 2500, "id": 10004}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0],[68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 6, "iscrowd": 0, "image_id": 309022, "bbox": [60,60,60,60], "area": 3600, "id": 10005}, {"segmentation": [[70.0,72.0,73.0,74.0,75.0]], "category_id": 7, "iscrowd": 0, "image_id": 391895, "bbox": [70,70,70,70], "area": 4900, "id": 10006}, {"segmentation": {"counts": [10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0],"size": [200,300]}, "category_id": 8, "iscrowd": 1, "image_id": 318219, "bbox": [80,80,80,80], "area": 6400, "id": 10007}], "categories": [{"supercategory": "person", "id": 1, "name": "person"},{"supercategory": "vehicle", "id": 2, "name": "bicycle"},{"supercategory": "vehicle", "id": 3, "name": "car"},{"supercategory": "vehicle", "id": 4, "name": "cat"},{"supercategory": "vehicle", "id": 5, "name": "dog"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 7, "name": "monkey"}]} | |||
| @@ -0,0 +1 @@ | |||
| {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2017, "contributor": "COCO Consortium", "data_created": "2017/09/01"}, "images":[{"license": 3, "file_name": "000000391895.jpg", "id": 391895},{"license": 3, "file_name": "000000318219.jpg", "id": 318219}],"annotations": [{"segmentation": [[10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0]], "num_keypoints": 10,"area": 12345,"iscrowd": 0,"keypoints": [244,139,2,0,0,0,226,118,2,0,0,0,154,159,2,143,261,2,135,312,2,271,423,2,184,530,2,261,280,2,347,592,2,0,0,0,123,596,2,0,0,0,0,0,0,0,0,0,0,0,0],"image_id": 318219,"bbox": [40.65,38.8,418.38,601.2],"category_id": 1, "id": 491276},{"segmentation": [[20.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0]], "num_keypoints": 14,"area": 45678,"iscrowd": 0,"keypoints": [368,61,1,369,52,2,0,0,0,382,48,2,0,0,0,368,84,2,435,81,2,362,125,2,446,125,2,360,153,2,0,0,0,397,167,1,439,166,1,369,193,2,461,234,2,361,246,2,474,287,2],"image_id": 391895,"bbox": [339.88,22.16,153.88,300.73],"category_id": 1, "id": 202758}]} | |||
| @@ -0,0 +1 @@ | |||
| {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium", "data_created": "2017/09/01"}, "licenses": [{"url": "http://creativecommons.org/license/by-nc-sa/2.0/","id": 3,"name": "Attribution-Noncommercial License"}], "image": [{"license": 3, "file_name": "000000391895.jpg", "id": 391895}, {"license": 3, "file_name": "000000318219.jpg", "id": 318219}, {"license": 3, "file_name": "000000554625.jpg", "id": 554625}, {"license": 3, "file_name": "000000574769.jpg", "id": 574769}, {"license": 3, "file_name": "000000060623.jpg", "id": 60623}, {"license": 3, "file_name": "000000309022.jpg", "id": 309022}], "annotations": [{"segmentation": [[10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0]], "category_id": 1, "iscrowd": 0, "image_id": 391895, "bbox": [10,10,10,10], "area": 100, "id": 10000}, {"segmentation": [[20.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0]], "category_id": 2, "iscrowd": 0, "image_id": 318219, "bbox": [20,20,20,20], "area": 400, "id": 10001}, {"segmentation": [[40.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,40.0,41.0,42.0]], "category_id": 3, "iscrowd": 0, "image_id": 554625, "bbox": [30,30,30,30], "area": 900, "id": 10002}, {"segmentation": [[50.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0]], "category_id": 4, "iscrowd": 0, "image_id": 574769, "bbox": [40,40,40,40], "area": 1600, "id": 10003}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 5, "iscrowd": 0, "image_id": 60623, "bbox": [50,50,50,50], "area": 2500, "id": 10004}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0],[68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 6, "iscrowd": 0, "image_id": 309022, "bbox": [60,60,60,60], "area": 3600, "id": 10005}, {"segmentation": [[70.0,72.0,73.0,74.0,75.0]], "category_id": 7, "iscrowd": 0, "image_id": 391895, "bbox": [70,70,70,70], "area": 4900, "id": 10006}, {"segmentation": {"counts": [10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0],"size": [200,300]}, "category_id": 8, "iscrowd": 1, "image_id": 318219, "bbox": [80,80,80,80], "area": 6400, "id": 10007}], "categories": [{"supercategory": "person", "id": 1, "name": "person"},{"supercategory": "vehicle", "id": 2, "name": "bicycle"},{"supercategory": "vehicle", "id": 3, "name": "car"},{"supercategory": "vehicle", "id": 4, "name": "cat"},{"supercategory": "vehicle", "id": 5, "name": "dog"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 7, "name": "monkey"}]} | |||
| @@ -0,0 +1 @@ | |||
| {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium", "data_created": "2017/09/01"},"licenses": [{"url": "http://creativecommons.org/license/by-nc-sa/2.0/","id": 3,"name": "Attribution-Noncommercial License"}],"images": [{"license": 3, "file_name": "000000391895.jpg", "id": 391895}, {"license": 3, "file_name": "000000574769.jpg", "id": 574769}],"annotations": [{"segments_info": [{"id": 10461346, "category_id": 1, "iscrowd": 0, "bbox": [472,173,36,48],"area": 705},{"id": 5263261, "category_id": 1, "iscrowd": 0, "bbox": [340,22,154,301],"area": 14062},{"id": 770861, "category_id": 2, "iscrowd": 0, "bbox": [486, 183, 30, 35],"area": 626}], "file_name": "000000391895", "image_id": 391895}, {"segments_info": [{"id": 5000790, "category_id": 1, "iscrowd": 0, "bbox": [103,133,229,422],"area": 43102},{"id": 35650815, "category_id": 3, "iscrowd": 0, "bbox": [243,175,93,164],"area": 6079}], "file_name": "000000574769.png", "image_id": 574769}],"categories": [{"supercategory": "person","isthing": 1,"id": 1,"name": "person"},{"supercategory": "vehicle","isthing": 1,"id": 2,"name": "bicycle"},{"supercategory": "vehicle","isthing": 1,"id": 3, "name": "car"}]} | |||
| @@ -0,0 +1 @@ | |||
| {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium", "data_created": "2017/09/01"}, "licenses": [{"url": "http://creativecommons.org/license/by-nc-sa/2.0/","id": 3,"name": "Attribution-Noncommercial License"}], "images": [{"license": 3, "file_name": "000000391895.jpg", "id": 391895}, {"license": 3, "file_name": "000000318219.jpg", "id": 318219}, {"license": 3, "file_name": "000000554625.jpg", "id": 554625}, {"license": 3, "file_name": "000000574769.jpg", "id": 574769}, {"license": 3, "file_name": "000000060623.jpg", "id": 60623}, {"license": 3, "file_name": "000000309022.jpg", "id": 309022}], "annotations": [{"segmentation": [[10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0]], "category_id": 1, "iscrowd": 0, "image_id": 391895, "bbox": [10,10,10,10], "area": 100, "id": 10000}, {"segmentation": [[20.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0]], "category_id": 2, "iscrowd": 0, "image_id": 318219, "bbox": [20,20,20,20], "area": 400, "id": 10001}, {"segmentation": [[40.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,40.0,41.0,42.0]], "category_id": 3, "iscrowd": 0, "image_id": 554625, "bbox": [30,30,30,30], "area": 900, "id": 10002}, {"segmentation": [[50.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0]], "category_id": 4, "iscrowd": 0, "image_id": 574769, "bbox": [40,40,40,40], "area": 1600, "id": 10003}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 5, "iscrowd": 0, "image_id": 60623, "bbox": [50,50,50,50], "area": 2500, "id": 10004}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0],[68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 6, "iscrowd": 0, "image_id": 309022, "bbox": [60,60,60,60], "area": 3600, "id": 10005}, {"segmentation": [[70.0,72.0,73.0,74.0,75.0]], "category_id": 7, "iscrowd": 0, "image_id": 391895, "bbox": [70,70,70,70], "area": 4900, "id": 10006}, {"segmentation": {"counts": [10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0],"size": [200,300]}, "category_id": 8, "iscrowd": 1, "image_id": 318219, "bbox": [80,80,80,80], "area": 6400, "id": 10007}], "categories": [{"supercategory": "person", "id": 1, "name": "person"},{"supercategory": "vehicle", "id": 2, "name": "bicycle"},{"supercategory": "vehicle", "id": 3, "name": "car"},{"supercategory": "vehicle", "id": 4, "name": "cat"},{"supercategory": "vehicle", "id": 5, "name": "dog"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 7, "name": "monkey"}]} | |||
| @@ -0,0 +1,254 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| import numpy as np | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.transforms.vision.c_transforms as vision | |||
| DATA_DIR = "../data/dataset/testCOCO/train/" | |||
| ANNOTATION_FILE = "../data/dataset/testCOCO/annotations/train.json" | |||
| KEYPOINT_FILE = "../data/dataset/testCOCO/annotations/key_point.json" | |||
| PANOPTIC_FILE = "../data/dataset/testCOCO/annotations/panoptic.json" | |||
| INVALID_FILE = "../data/dataset/testCOCO/annotations/invalid.json" | |||
| LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json" | |||
| def test_coco_detection(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", | |||
| decode=True, shuffle=False) | |||
| num_iter = 0 | |||
| image_shape = [] | |||
| bbox = [] | |||
| category_id = [] | |||
| for data in data1.create_dict_iterator(): | |||
| image_shape.append(data["image"].shape) | |||
| bbox.append(data["bbox"]) | |||
| category_id.append(data["category_id"]) | |||
| num_iter += 1 | |||
| assert num_iter == 6 | |||
| assert image_shape[0] == (2268, 4032, 3) | |||
| assert image_shape[1] == (561, 595, 3) | |||
| assert image_shape[2] == (607, 585, 3) | |||
| assert image_shape[3] == (642, 675, 3) | |||
| assert image_shape[4] == (2268, 4032, 3) | |||
| assert image_shape[5] == (2268, 4032, 3) | |||
| assert np.array_equal(np.array([[10., 10., 10., 10.], [70., 70., 70., 70.]]), bbox[0]) | |||
| assert np.array_equal(np.array([[20., 20., 20., 20.], [80., 80., 80.0, 80.]]), bbox[1]) | |||
| assert np.array_equal(np.array([[30.0, 30.0, 30.0, 30.]]), bbox[2]) | |||
| assert np.array_equal(np.array([[40., 40., 40., 40.]]), bbox[3]) | |||
| assert np.array_equal(np.array([[50., 50., 50., 50.]]), bbox[4]) | |||
| assert np.array_equal(np.array([[60., 60., 60., 60.]]), bbox[5]) | |||
| assert np.array_equal(np.array([[1], [7]]), category_id[0]) | |||
| assert np.array_equal(np.array([[2], [8]]), category_id[1]) | |||
| assert np.array_equal(np.array([[3]]), category_id[2]) | |||
| assert np.array_equal(np.array([[4]]), category_id[3]) | |||
| assert np.array_equal(np.array([[5]]), category_id[4]) | |||
| assert np.array_equal(np.array([[6]]), category_id[5]) | |||
| def test_coco_stuff(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff", | |||
| decode=True, shuffle=False) | |||
| num_iter = 0 | |||
| image_shape = [] | |||
| segmentation = [] | |||
| iscrowd = [] | |||
| for data in data1.create_dict_iterator(): | |||
| image_shape.append(data["image"].shape) | |||
| segmentation.append(data["segmentation"]) | |||
| iscrowd.append(data["iscrowd"]) | |||
| num_iter += 1 | |||
| assert num_iter == 6 | |||
| assert image_shape[0] == (2268, 4032, 3) | |||
| assert image_shape[1] == (561, 595, 3) | |||
| assert image_shape[2] == (607, 585, 3) | |||
| assert image_shape[3] == (642, 675, 3) | |||
| assert image_shape[4] == (2268, 4032, 3) | |||
| assert image_shape[5] == (2268, 4032, 3) | |||
| assert np.array_equal(np.array([[10., 12., 13., 14., 15., 16., 17., 18., 19., 20.], | |||
| [70., 72., 73., 74., 75., -1., -1., -1., -1., -1.]]), | |||
| segmentation[0]) | |||
| assert np.array_equal(np.array([[0], [0]]), iscrowd[0]) | |||
| assert np.array_equal(np.array([[20.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0], | |||
| [10.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, -1.0]]), | |||
| segmentation[1]) | |||
| assert np.array_equal(np.array([[0], [1]]), iscrowd[1]) | |||
| assert np.array_equal(np.array([[40., 42., 43., 44., 45., 46., 47., 48., 49., 40., 41., 42.]]), segmentation[2]) | |||
| assert np.array_equal(np.array([[0]]), iscrowd[2]) | |||
| assert np.array_equal(np.array([[50., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63.]]), | |||
| segmentation[3]) | |||
| assert np.array_equal(np.array([[0]]), iscrowd[3]) | |||
| assert np.array_equal(np.array([[60., 62., 63., 64., 65., 66., 67., 68., 69., 70., 71., 72., 73., 74.]]), | |||
| segmentation[4]) | |||
| assert np.array_equal(np.array([[0]]), iscrowd[4]) | |||
| assert np.array_equal(np.array([[60., 62., 63., 64., 65., 66., 67.], [68., 69., 70., 71., 72., 73., 74.]]), | |||
| segmentation[5]) | |||
| assert np.array_equal(np.array([[0]]), iscrowd[5]) | |||
| def test_coco_keypoint(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint", | |||
| decode=True, shuffle=False) | |||
| num_iter = 0 | |||
| image_shape = [] | |||
| keypoints = [] | |||
| num_keypoints = [] | |||
| for data in data1.create_dict_iterator(): | |||
| image_shape.append(data["image"].shape) | |||
| keypoints.append(data["keypoints"]) | |||
| num_keypoints.append(data["num_keypoints"]) | |||
| num_iter += 1 | |||
| assert num_iter == 2 | |||
| assert image_shape[0] == (2268, 4032, 3) | |||
| assert image_shape[1] == (561, 595, 3) | |||
| assert np.array_equal(np.array([[368., 61., 1., 369., 52., 2., 0., 0., 0., 382., 48., 2., 0., 0., 0., 368., 84., 2., | |||
| 435., 81., 2., 362., 125., 2., 446., 125., 2., 360., 153., 2., 0., 0., 0., 397., | |||
| 167., 1., 439., 166., 1., 369., 193., 2., 461., 234., 2., 361., 246., 2., 474., | |||
| 287., 2.]]), keypoints[0]) | |||
| assert np.array_equal(np.array([[14]]), num_keypoints[0]) | |||
| assert np.array_equal(np.array([[244., 139., 2., 0., 0., 0., 226., 118., 2., 0., 0., 0., 154., 159., 2., 143., 261., | |||
| 2., 135., 312., 2., 271., 423., 2., 184., 530., 2., 261., 280., 2., 347., 592., 2., | |||
| 0., 0., 0., 123., 596., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), | |||
| keypoints[1]) | |||
| assert np.array_equal(np.array([[10]]), num_keypoints[1]) | |||
| def test_coco_panoptic(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False) | |||
| num_iter = 0 | |||
| image_shape = [] | |||
| bbox = [] | |||
| category_id = [] | |||
| iscrowd = [] | |||
| area = [] | |||
| for data in data1.create_dict_iterator(): | |||
| image_shape.append(data["image"].shape) | |||
| bbox.append(data["bbox"]) | |||
| category_id.append(data["category_id"]) | |||
| iscrowd.append(data["iscrowd"]) | |||
| area.append(data["area"]) | |||
| num_iter += 1 | |||
| assert num_iter == 2 | |||
| assert image_shape[0] == (2268, 4032, 3) | |||
| assert np.array_equal(np.array([[472, 173, 36, 48], [340, 22, 154, 301], [486, 183, 30, 35]]), bbox[0]) | |||
| assert np.array_equal(np.array([[1], [1], [2]]), category_id[0]) | |||
| assert np.array_equal(np.array([[0], [0], [0]]), iscrowd[0]) | |||
| assert np.array_equal(np.array([[705], [14062], [626]]), area[0]) | |||
| assert image_shape[1] == (642, 675, 3) | |||
| assert np.array_equal(np.array([[103, 133, 229, 422], [243, 175, 93, 164]]), bbox[1]) | |||
| assert np.array_equal(np.array([[1], [3]]), category_id[1]) | |||
| assert np.array_equal(np.array([[0], [0]]), iscrowd[1]) | |||
| assert np.array_equal(np.array([[43102], [6079]]), area[1]) | |||
| def test_coco_detection_classindex(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| class_index = data1.get_class_indexing() | |||
| assert class_index == {'person': [1], 'bicycle': [2], 'car': [3], 'cat': [4], 'dog': [5], 'monkey': [7]} | |||
| num_iter = 0 | |||
| for _ in data1.__iter__(): | |||
| num_iter += 1 | |||
| assert num_iter == 6 | |||
| def test_coco_panootic_classindex(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True) | |||
| class_index = data1.get_class_indexing() | |||
| assert class_index == {'person': [1, 1], 'bicycle': [2, 1], 'car': [3, 1]} | |||
| num_iter = 0 | |||
| for _ in data1.__iter__(): | |||
| num_iter += 1 | |||
| assert num_iter == 2 | |||
| def test_coco_case_0(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| data1 = data1.shuffle(10) | |||
| data1 = data1.batch(3, pad_info={}) | |||
| num_iter = 0 | |||
| for _ in data1.create_dict_iterator(): | |||
| num_iter += 1 | |||
| assert num_iter == 2 | |||
| def test_coco_case_1(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| sizes = [0.5, 0.5] | |||
| randomize = False | |||
| dataset1, dataset2 = data1.split(sizes=sizes, randomize=randomize) | |||
| num_iter = 0 | |||
| for _ in dataset1.create_dict_iterator(): | |||
| num_iter += 1 | |||
| assert num_iter == 3 | |||
| num_iter = 0 | |||
| for _ in dataset2.create_dict_iterator(): | |||
| num_iter += 1 | |||
| assert num_iter == 3 | |||
| def test_coco_case_2(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| resize_op = vision.Resize((224, 224)) | |||
| data1 = data1.map(input_columns=["image"], operations=resize_op) | |||
| data1 = data1.repeat(4) | |||
| num_iter = 0 | |||
| for _ in data1.__iter__(): | |||
| num_iter += 1 | |||
| assert num_iter == 24 | |||
| def test_coco_case_exception(): | |||
| try: | |||
| data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection") | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except ValueError as e: | |||
| assert "does not exist or permission denied" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file="./file_not_exist", task="Detection") | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except ValueError as e: | |||
| assert "does not exist or permission denied" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Invalid task") | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except ValueError as e: | |||
| assert "Invalid task type" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=LACKOFIMAGE_FILE, task="Detection") | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "Invalid node found in json" in str(e) | |||
| try: | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection") | |||
| for _ in data1.__iter__(): | |||
| pass | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "json.exception.parse_error" in str(e) | |||
| if __name__ == '__main__': | |||
| test_coco_detection() | |||
| test_coco_stuff() | |||
| test_coco_keypoint() | |||
| test_coco_panoptic() | |||
| test_coco_detection_classindex() | |||
| test_coco_panootic_classindex() | |||
| test_coco_case_0() | |||
| test_coco_case_1() | |||
| test_coco_case_2() | |||
| test_coco_case_exception() | |||