CocoDataset implementation

5 years ago · 097cbcc0cb
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@@ -23,6 +23,7 @@
 #include "dataset/engine/datasetops/source/image_folder_op.h"
 #include "dataset/engine/datasetops/source/mnist_op.h"
 #include "dataset/engine/datasetops/source/voc_op.h"
 #include "dataset/engine/datasetops/source/coco_op.h"
 #include "dataset/core/tensor.h"
 #include "dataset/engine/dataset_iterator.h"
 #include "dataset/engine/datasetops/source/manifest_op.h"
@@ -65,6 +66,7 @@ static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {{kStorage, &D
                                                                   {kMnist, &DEPipeline::ParseMnistOp},
                                                                   {kManifest, &DEPipeline::ParseManifestOp},
                                                                   {kVoc, &DEPipeline::ParseVOCOp},
                                                                   {kCoco, &DEPipeline::ParseCocoOp},
                                                                   {kCifar10, &DEPipeline::ParseCifar10Op},
                                                                   {kCifar100, &DEPipeline::ParseCifar100Op},
                                                                   {kCelebA, &DEPipeline::ParseCelebAOp},
@@ -930,6 +932,16 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *
    RETURN_STATUS_UNEXPECTED(err_msg);
  }

  if (args["task"].is_none()) {
    std::string err_msg = "Error: No task specified";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }

  if (args["mode"].is_none()) {
    std::string err_msg = "Error: No mode specified";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }

  std::shared_ptr<VOCOp::Builder> builder = std::make_shared<VOCOp::Builder>();
  (void)builder->SetDir(ToString(args["dataset_dir"]));
  (void)builder->SetTask(ToString(args["task"]));
@@ -957,6 +969,47 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *
  return Status::OK();
 }

 Status DEPipeline::ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
  if (args["dataset_dir"].is_none()) {
    std::string err_msg = "Error: No dataset path specified";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }

  if (args["annotation_file"].is_none()) {
    std::string err_msg = "Error: No annotation_file specified";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }

  if (args["task"].is_none()) {
    std::string err_msg = "Error: No task specified";
    RETURN_STATUS_UNEXPECTED(err_msg);
  }

  std::shared_ptr<CocoOp::Builder> builder = std::make_shared<CocoOp::Builder>();
  (void)builder->SetDir(ToString(args["dataset_dir"]));
  (void)builder->SetFile(ToString(args["annotation_file"]));
  (void)builder->SetTask(ToString(args["task"]));
  for (auto arg : args) {
    std::string key = py::str(arg.first);
    py::handle value = arg.second;
    if (!value.is_none()) {
      if (key == "num_parallel_workers") {
        (void)builder->SetNumWorkers(ToInt(value));
      } else if (key == "sampler") {
        auto create = py::reinterpret_borrow<py::object>(value).attr("create");
        std::shared_ptr<Sampler> sampler = create().cast<std::shared_ptr<Sampler>>();
        (void)builder->SetSampler(std::move(sampler));
      } else if (key == "decode") {
        (void)builder->SetDecode(ToBool(value));
      }
    }
  }
  std::shared_ptr<CocoOp> op;
  RETURN_IF_NOT_OK(builder->Build(&op));
  *ptr = op;
  return Status::OK();
 }

 Status DEPipeline::ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
  // Required arguments
  if (args["dataset_dir"].is_none()) {
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.h
@@ -58,6 +58,7 @@ enum OpName {
  kMnist,
  kManifest,
  kVoc,
  kCoco,
  kCifar10,
  kCifar100,
  kCelebA,
@@ -142,6 +143,8 @@ class DEPipeline {

  Status ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  Status ParseCocoOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  Status ParseCifar10Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  Status ParseCifar100Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@@ -56,6 +56,7 @@
 #include "dataset/engine/jagged_connector.h"
 #include "dataset/engine/datasetops/source/text_file_op.h"
 #include "dataset/engine/datasetops/source/voc_op.h"
 #include "dataset/engine/datasetops/source/coco_op.h"
 #include "dataset/engine/gnn/graph.h"
 #include "dataset/kernels/data/to_float16_op.h"
 #include "dataset/text/kernels/jieba_tokenizer_op.h"
@@ -214,6 +215,18 @@ void bindDatasetOps(py::module *m) {
      THROW_IF_ERROR(VOCOp::GetClassIndexing(dir, task_type, task_mode, dict, &output_class_indexing));
      return output_class_indexing;
    });
  (void)py::class_<CocoOp, DatasetOp, std::shared_ptr<CocoOp>>(*m, "CocoOp")
    .def_static("get_class_indexing",
                [](const std::string &dir, const std::string &file, const std::string &task) {
                  std::vector<std::pair<std::string, std::vector<int32_t>>> output_class_indexing;
                  THROW_IF_ERROR(CocoOp::GetClassIndexing(dir, file, task, &output_class_indexing));
                  return output_class_indexing;
                })
    .def_static("get_num_rows", [](const std::string &dir, const std::string &file, const std::string &task) {
      int64_t count = 0;
      THROW_IF_ERROR(CocoOp::CountTotalRows(dir, file, task, &count));
      return count;
    });
 }
 void bindTensor(py::module *m) {
  (void)py::class_<GlobalContext>(*m, "GlobalContext")
@@ -576,6 +589,7 @@ PYBIND11_MODULE(_c_dataengine, m) {
    .value("MNIST", OpName::kMnist)
    .value("MANIFEST", OpName::kManifest)
    .value("VOC", OpName::kVoc)
    .value("COCO", OpName::kCoco)
    .value("CIFAR10", OpName::kCifar10)
    .value("CIFAR100", OpName::kCifar100)
    .value("RANDOMDATA", OpName::kRandomData)
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
@@ -13,6 +13,7 @@ add_library(engine-datasetops-source OBJECT
    image_folder_op.cc
    mnist_op.cc
    voc_op.cc
    coco_op.cc
    manifest_op.cc
    cifar_op.cc
    random_data_op.cc
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
@@ -0,0 +1,632 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "dataset/engine/datasetops/source/coco_op.h"

 #include <algorithm>
 #include <fstream>
 #include <iomanip>
 #include "common/utils.h"
 #include "dataset/core/config_manager.h"
 #include "dataset/core/tensor_shape.h"
 #include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
 #include "dataset/engine/db_connector.h"
 #include "dataset/engine/execution_tree.h"

 namespace mindspore {
 namespace dataset {
 const char kColumnImage[] = "image";
 const char kJsonImages[] = "images";
 const char kJsonImagesFileName[] = "file_name";
 const char kJsonId[] = "id";
 const char kJsonAnnotations[] = "annotations";
 const char kJsonAnnoSegmentation[] = "segmentation";
 const char kJsonAnnoCounts[] = "counts";
 const char kJsonAnnoSegmentsInfo[] = "segments_info";
 const char kJsonAnnoIscrowd[] = "iscrowd";
 const char kJsonAnnoBbox[] = "bbox";
 const char kJsonAnnoArea[] = "area";
 const char kJsonAnnoImageId[] = "image_id";
 const char kJsonAnnoNumKeypoints[] = "num_keypoints";
 const char kJsonAnnoKeypoints[] = "keypoints";
 const char kJsonAnnoCategoryId[] = "category_id";
 const char kJsonCategories[] = "categories";
 const char kJsonCategoriesIsthing[] = "isthing";
 const char kJsonCategoriesName[] = "name";
 const float kDefaultPadValue = -1.0;
 const unsigned int kPadValueZero = 0;

 CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  builder_num_workers_ = cfg->num_parallel_workers();
  builder_rows_per_buffer_ = cfg->rows_per_buffer();
  builder_op_connector_size_ = cfg->op_connector_size();
  builder_task_type_ = TaskType::Detection;
 }

 Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) {
  RETURN_IF_NOT_OK(SanityCheck());
  if (builder_sampler_ == nullptr) {
    int64_t num_samples = 0;
    int64_t start_index = 0;
    builder_sampler_ = std::make_shared<SequentialSampler>(start_index, num_samples);
  }
  builder_schema_ = std::make_unique<DataSchema>();
  RETURN_IF_NOT_OK(builder_schema_->AddColumn(
    ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
  switch (builder_task_type_) {
    case TaskType::Detection:
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoCategoryId), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
      break;
    case TaskType::Stuff:
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoSegmentation), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
      break;
    case TaskType::Keypoint:
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoKeypoints), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoNumKeypoints), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
      break;
    case TaskType::Panoptic:
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoCategoryId), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoIscrowd), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
      RETURN_IF_NOT_OK(builder_schema_->AddColumn(
        ColDescriptor(std::string(kJsonAnnoArea), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
      break;
    default:
      RETURN_STATUS_UNEXPECTED("Invalid task type");
  }
  *ptr = std::make_shared<CocoOp>(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_,
                                  builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_,
                                  std::move(builder_schema_), std::move(builder_sampler_));
  return Status::OK();
 }

 Status CocoOp::Builder::SanityCheck() {
  Path dir(builder_dir_);
  Path file(builder_file_);
  std::string err_msg;
  err_msg += dir.IsDirectory() == false ? "Coco image folder path is invalid or not set\n" : "";
  err_msg += file.Exists() == false ? "Coco annotation json path is invalid or not set\n" : "";
  err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0 or negative\n" : "";
  return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
 }

 CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
               int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode,
               std::unique_ptr<DataSchema> data_schema, std::shared_ptr<Sampler> sampler)
    : ParallelOp(num_workers, queue_size),
      decode_(decode),
      row_cnt_(0),
      buf_cnt_(0),
      task_type_(task_type),
      image_folder_path_(image_folder_path),
      annotation_path_(annotation_path),
      rows_per_buffer_(rows_per_buffer),
      sampler_(std::move(sampler)),
      data_schema_(std::move(data_schema)) {
  // Set the column name map (base class field)
  for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
    column_name_id_map_[data_schema_->column(i).name()] = i;
  }
  io_block_queues_.Init(num_workers_, queue_size);
 }

 Status CocoOp::TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys) {
  for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) {
    if ((*itr) > num_rows_) continue;
    keys->push_back(*itr);
    row_cnt_++;
    if (row_cnt_ % rows_per_buffer_ == 0) {
      RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add(
        std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone))));
      keys->clear();
    }
  }
  return Status::OK();
 }

 Status CocoOp::operator()() {
  RETURN_IF_NOT_OK(LaunchThreadsAndInitOp());
  std::unique_ptr<DataBuffer> sampler_buffer;
  RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer));
  while (true) {
    std::vector<int64_t> keys;
    keys.reserve(rows_per_buffer_);
    while (sampler_buffer->eoe() == false) {
      std::shared_ptr<Tensor> sample_ids;
      RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0));
      if (sample_ids->type() != DataType(DataType::DE_INT64)) {
        RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64");
      }
      RETURN_IF_NOT_OK(TraverseSampleIds(sample_ids, &keys));
      RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer));
    }
    if (keys.empty() == false) {
      RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(
        std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone))));
    }
    if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
      std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe);
      std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof);
      RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block)));
      RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block)));
      for (int32_t i = 0; i < num_workers_; i++) {
        RETURN_IF_NOT_OK(
          io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)));
      }
      return Status::OK();
    } else {
      RETURN_IF_NOT_OK(
        io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe)));
      RETURN_IF_NOT_OK(wp_.Wait());
      wp_.Clear();
      RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer));
    }
  }
 }

 void CocoOp::Print(std::ostream &out, bool show_all) const {
  // Always show the id and name as first line regardless if this summary or detailed print
  out << "(" << std::setw(2) << operator_id_ << ") <CocoOp>:";
  if (!show_all) {
    // Call the super class for displaying any common 1-liner info
    ParallelOp::Print(out, show_all);
    // Then show any custom derived-internal 1-liner info for this op
    out << "\n";
  } else {
    // Call the super class for displaying any common detailed info
    ParallelOp::Print(out, show_all);
    // Then show any custom derived-internal stuff
    out << "\nNumber of rows: " << num_rows_ << "\nCOCO Directory: " << image_folder_path_ << "\n\n";
  }
 }

 Status CocoOp::Reset() {
  RETURN_IF_NOT_OK(sampler_->Reset());
  row_cnt_ = 0;
  wp_.Set();
  return Status::OK();
 }

 Status CocoOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) {
  std::shared_ptr<Tensor> image, coordinate;
  auto itr = coordinate_map_.find(image_id);
  if (itr == coordinate_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id);

  std::string kImageFile = image_folder_path_ + image_id;
  RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));

  auto bboxRow = itr->second;
  std::vector<float> bbox_row;
  dsize_t bbox_row_num = static_cast<dsize_t>(bboxRow.size());
  dsize_t bbox_column_num = 0;
  for (auto bbox : bboxRow) {
    if (static_cast<dsize_t>(bbox.size()) > bbox_column_num) {
      bbox_column_num = static_cast<dsize_t>(bbox.size());
    }
  }

  for (auto bbox : bboxRow) {
    bbox_row.insert(bbox_row.end(), bbox.begin(), bbox.end());
    dsize_t pad_len = bbox_column_num - static_cast<dsize_t>(bbox.size());
    if (pad_len > 0) {
      for (dsize_t i = 0; i < pad_len; i++) {
        bbox_row.push_back(kDefaultPadValue);
      }
    }
  }

  std::vector<dsize_t> bbox_dim = {bbox_row_num, bbox_column_num};
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&coordinate, data_schema_->column(1).tensorImpl(), TensorShape(bbox_dim),
                                        data_schema_->column(1).type(),
                                        reinterpret_cast<unsigned char *>(&bbox_row[0])));
  if (task_type_ == TaskType::Detection) {
    RETURN_IF_NOT_OK(LoadDetectionTensorRow(image_id, image, coordinate, trow));
  } else if (task_type_ == TaskType::Stuff || task_type_ == TaskType::Keypoint) {
    RETURN_IF_NOT_OK(LoadSimpleTensorRow(image_id, image, coordinate, trow));
  } else if (task_type_ == TaskType::Panoptic) {
    RETURN_IF_NOT_OK(LoadMixTensorRow(image_id, image, coordinate, trow));
  } else {
    RETURN_STATUS_UNEXPECTED("Invalid task type.");
  }

  return Status::OK();
 }

 // When task is Detection, user can get data with four columns:
 // column ["image"] with datatype=uint8
 // column ["bbox"] with datatype=float32
 // column ["category_id"] with datatype=uint32
 // column ["iscrowd"] with datatype=uint32
 // By the way, column ["iscrowd"] is used for some testcases, like fasterRcnn.
 // If "iscrowd" is not existed, user will get default value 0.
 Status CocoOp::LoadDetectionTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image,
                                      std::shared_ptr<Tensor> coordinate, TensorRow *trow) {
  std::shared_ptr<Tensor> category_id, iscrowd;
  std::vector<uint32_t> category_id_row;
  std::vector<uint32_t> iscrowd_row;
  auto itr_item = simple_item_map_.find(image_id);
  if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id);

  std::vector<uint32_t> annotation = itr_item->second;
  for (int64_t i = 0; i < annotation.size(); i++) {
    if (i % 2 == 0) {
      category_id_row.push_back(annotation[i]);
    } else if (i % 2 == 1) {
      iscrowd_row.push_back(annotation[i]);
    }
  }
  RETURN_IF_NOT_OK(Tensor::CreateTensor(
    &category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}),
    data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&category_id_row[0])));

  RETURN_IF_NOT_OK(Tensor::CreateTensor(
    &iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}),
    data_schema_->column(3).type(), reinterpret_cast<unsigned char *>(&iscrowd_row[0])));
  (*trow) = {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)};
  return Status::OK();
 }

 // When task is "Stuff"/"Keypoint", user can get data with three columns:
 // column ["image"] with datatype=uint8
 // column ["segmentation"]/["keypoints"] with datatype=float32
 // column ["iscrowd"]/["num_keypoints"] with datatype=uint32
 Status CocoOp::LoadSimpleTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image,
                                   std::shared_ptr<Tensor> coordinate, TensorRow *trow) {
  std::shared_ptr<Tensor> item;
  std::vector<uint32_t> item_queue;
  auto itr_item = simple_item_map_.find(image_id);
  if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id);

  item_queue = itr_item->second;
  std::vector<dsize_t> bbox_dim = {static_cast<dsize_t>(item_queue.size()), 1};
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&item, data_schema_->column(2).tensorImpl(), TensorShape(bbox_dim),
                                        data_schema_->column(2).type(),
                                        reinterpret_cast<unsigned char *>(&item_queue[0])));
  (*trow) = {std::move(image), std::move(coordinate), std::move(item)};
  return Status::OK();
 }

 // When task is "Panoptic", user can get data with five columns:
 // column ["image"] with datatype=uint8
 // column ["bbox"] with datatype=float32
 // column ["category_id"] with datatype=uint32
 // column ["iscrowd"] with datatype=uint32
 // column ["area"] with datattype=uint32
 Status CocoOp::LoadMixTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image,
                                std::shared_ptr<Tensor> coordinate, TensorRow *trow) {
  std::shared_ptr<Tensor> category_id, iscrowd, area;
  std::vector<uint32_t> category_id_row;
  std::vector<uint32_t> iscrowd_row;
  std::vector<uint32_t> area_row;
  auto itr_item = simple_item_map_.find(image_id);
  if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id);

  std::vector<uint32_t> annotation = itr_item->second;
  for (int64_t i = 0; i < annotation.size(); i++) {
    if (i % 3 == 0) {
      category_id_row.push_back(annotation[i]);
    } else if (i % 3 == 1) {
      iscrowd_row.push_back(annotation[i]);
    } else if (i % 3 == 2) {
      area_row.push_back(annotation[i]);
    }
  }

  RETURN_IF_NOT_OK(Tensor::CreateTensor(
    &category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}),
    data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&category_id_row[0])));

  RETURN_IF_NOT_OK(Tensor::CreateTensor(
    &iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}),
    data_schema_->column(3).type(), reinterpret_cast<unsigned char *>(&iscrowd_row[0])));

  RETURN_IF_NOT_OK(Tensor::CreateTensor(
    &area, data_schema_->column(4).tensorImpl(), TensorShape({static_cast<dsize_t>(area_row.size()), 1}),
    data_schema_->column(4).type(), reinterpret_cast<unsigned char *>(&area_row[0])));
  (*trow) = {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)};
  return Status::OK();
 }

 Status CocoOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
  std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
  TensorRow trow;
  for (const uint64_t &key : keys) {
    RETURN_IF_NOT_OK(this->LoadTensorRow(image_ids_[key], &trow));
    deq->push_back(std::move(trow));
  }
  (*db)->set_tensor_table(std::move(deq));
  return Status::OK();
 }

 Status CocoOp::WorkerEntry(int32_t worker_id) {
  TaskManager::FindMe()->Post();
  int64_t buffer_id = worker_id;
  std::unique_ptr<IOBlock> io_block;
  RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
  while (io_block != nullptr) {
    if (io_block->eoe() == true) {
      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)));
      buffer_id = worker_id;
    } else if (io_block->eof() == true) {
      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
    } else {
      std::vector<int64_t> keys;
      RETURN_IF_NOT_OK(io_block->GetKeys(&keys));
      if (keys.empty() == true) return Status::OK();
      std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone);
      RETURN_IF_NOT_OK(LoadBuffer(keys, &db));
      RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db)));
      buffer_id += num_workers_;
    }
    RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block));
  }
  RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker");
 }

 template <typename T>
 Status CocoOp::SearchNodeInJson(nlohmann::json input_tree, std::string node_name, T *output_node) {
  auto node = input_tree.find(node_name);
  if (node == input_tree.end()) RETURN_STATUS_UNEXPECTED("Invalid node found in json : " + node_name);
  (*output_node) = *node;
  return Status::OK();
 }

 Status CocoOp::ParseAnnotationIds() {
  std::ifstream in(annotation_path_);
  nlohmann::json js;
  in >> js;

  std::vector<std::string> image_que;
  nlohmann::json image_list;
  RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonImages), &image_list));
  RETURN_IF_NOT_OK(ImageColumnLoad(image_list, &image_que));
  nlohmann::json annotations_list;
  RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonAnnotations), &annotations_list));
  for (auto annotation : annotations_list) {
    int32_t image_id = 0, id = 0;
    std::string file_name;
    RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonAnnoImageId), &image_id));
    auto itr_file = image_index_.find(image_id);
    if (itr_file == image_index_.end())
      RETURN_STATUS_UNEXPECTED("Invalid image id of annotations : " + std::to_string(image_id));
    file_name = itr_file->second;
    switch (task_type_) {
      case TaskType::Detection:
        RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id));
        RETURN_IF_NOT_OK(DetectionColumnLoad(annotation, file_name, id));
        break;
      case TaskType::Stuff:
        RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id));
        RETURN_IF_NOT_OK(StuffColumnLoad(annotation, file_name, id));
        break;
      case TaskType::Keypoint:
        RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id));
        RETURN_IF_NOT_OK(KeypointColumnLoad(annotation, file_name, id));
        break;
      case TaskType::Panoptic:
        RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id));
        break;
      default:
        RETURN_STATUS_UNEXPECTED("Invalid task type");
    }
  }
  for (auto img : image_que) {
    if (coordinate_map_.find(img) != coordinate_map_.end()) image_ids_.push_back(img);
  }
  if (task_type_ == TaskType::Detection || task_type_ == TaskType::Panoptic) {
    nlohmann::json node_categories;
    RETURN_IF_NOT_OK(SearchNodeInJson(js, std::string(kJsonCategories), &node_categories));
    RETURN_IF_NOT_OK(CategoriesColumnLoad(node_categories));
  }
  num_rows_ = image_ids_.size();
  return Status::OK();
 }

 Status CocoOp::ImageColumnLoad(nlohmann::json image_tree, std::vector<std::string> *image_vec) {
  if (image_tree.size() == 0) {
    RETURN_STATUS_UNEXPECTED("No images found in " + annotation_path_);
  }
  for (auto img : image_tree) {
    std::string file_name;
    int32_t id = 0;
    RETURN_IF_NOT_OK(SearchNodeInJson(img, std::string(kJsonImagesFileName), &file_name));
    RETURN_IF_NOT_OK(SearchNodeInJson(img, std::string(kJsonId), &id));

    image_index_[id] = file_name;
    image_vec->push_back(file_name);
  }
  return Status::OK();
 }

 Status CocoOp::DetectionColumnLoad(nlohmann::json annotation_tree, const std::string &image_file,
                                   const int32_t &unique_id) {
  std::vector<float> bbox;
  nlohmann::json node_bbox;
  uint32_t category_id = 0, iscrowd = 0;
  RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoBbox), &node_bbox));
  RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCategoryId), &category_id));
  auto node_iscrowd = annotation_tree.find(kJsonAnnoIscrowd);
  if (node_iscrowd != annotation_tree.end()) iscrowd = *node_iscrowd;
  bbox.insert(bbox.end(), node_bbox.begin(), node_bbox.end());
  coordinate_map_[image_file].push_back(bbox);
  simple_item_map_[image_file].push_back(category_id);
  simple_item_map_[image_file].push_back(iscrowd);
  return Status::OK();
 }

 Status CocoOp::StuffColumnLoad(nlohmann::json annotation_tree, const std::string &image_file,
                               const int32_t &unique_id) {
  uint32_t iscrowd = 0;
  std::vector<float> bbox;
  RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoIscrowd), &iscrowd));
  simple_item_map_[image_file].push_back(iscrowd);
  nlohmann::json segmentation;
  RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoSegmentation), &segmentation));
  if (iscrowd == 0) {
    for (auto item : segmentation) {
      if (bbox.size() > 0) bbox.clear();
      bbox.insert(bbox.end(), item.begin(), item.end());
      coordinate_map_[image_file].push_back(bbox);
    }
  } else if (iscrowd == 1) {
    nlohmann::json segmentation_count;
    RETURN_IF_NOT_OK(SearchNodeInJson(segmentation, std::string(kJsonAnnoCounts), &segmentation_count));
    bbox.insert(bbox.end(), segmentation_count.begin(), segmentation_count.end());
    coordinate_map_[image_file].push_back(bbox);
  }
  return Status::OK();
 }

 Status CocoOp::KeypointColumnLoad(nlohmann::json annotation_tree, const std::string &image_file,
                                  const int32_t &unique_id) {
  auto itr_num_keypoint = annotation_tree.find(kJsonAnnoNumKeypoints);
  if (itr_num_keypoint == annotation_tree.end())
    RETURN_STATUS_UNEXPECTED("No num_keypoint found in annotations where id: " + std::to_string(unique_id));
  simple_item_map_[image_file].push_back(*itr_num_keypoint);
  auto itr_keypoint = annotation_tree.find(kJsonAnnoKeypoints);
  if (itr_keypoint == annotation_tree.end())
    RETURN_STATUS_UNEXPECTED("No keypoint found in annotations where id: " + std::to_string(unique_id));
  coordinate_map_[image_file].push_back(*itr_keypoint);
  return Status::OK();
 }

 Status CocoOp::PanopticColumnLoad(nlohmann::json annotation_tree, const std::string &image_file,
                                  const int32_t &image_id) {
  auto itr_segments = annotation_tree.find(kJsonAnnoSegmentsInfo);
  if (itr_segments == annotation_tree.end())
    RETURN_STATUS_UNEXPECTED("No segments_info found in annotations where image_id: " + std::to_string(image_id));
  for (auto info : *itr_segments) {
    std::vector<float> bbox;
    auto itr_bbox = info.find(kJsonAnnoBbox);
    if (itr_bbox == info.end())
      RETURN_STATUS_UNEXPECTED("No bbox found in segments_info where image_id: " + std::to_string(image_id));
    bbox.insert(bbox.end(), itr_bbox->begin(), itr_bbox->end());
    coordinate_map_[image_file].push_back(bbox);

    auto itr_category_id = info.find(kJsonAnnoCategoryId);
    if (itr_category_id == info.end())
      RETURN_STATUS_UNEXPECTED("No category_id found in segments_info where image_id: " + std::to_string(image_id));
    auto itr_iscrowd = info.find(kJsonAnnoIscrowd);
    if (itr_iscrowd == info.end())
      RETURN_STATUS_UNEXPECTED("No iscrowd found in segments_info where image_id: " + std::to_string(image_id));
    auto itr_area = info.find(kJsonAnnoArea);
    if (itr_area == info.end())
      RETURN_STATUS_UNEXPECTED("No area found in segments_info where image_id: " + std::to_string(image_id));
    simple_item_map_[image_file].push_back(*itr_category_id);
    simple_item_map_[image_file].push_back(*itr_iscrowd);
    simple_item_map_[image_file].push_back(*itr_area);
  }
  return Status::OK();
 }

 Status CocoOp::CategoriesColumnLoad(nlohmann::json categories_tree) {
  if (categories_tree.size() == 0) RETURN_STATUS_UNEXPECTED("No categories found in " + annotation_path_);
  for (auto category : categories_tree) {
    int32_t id = 0;
    std::string name;
    std::vector<int32_t> label_info;
    auto itr_id = category.find(kJsonId);
    if (itr_id == category.end()) RETURN_STATUS_UNEXPECTED("No id found in categories of " + annotation_path_);
    id = *itr_id;
    label_info.push_back(id);

    auto itr_name = category.find(kJsonCategoriesName);
    if (itr_name == category.end())
      RETURN_STATUS_UNEXPECTED("No name found in categories where id: " + std::to_string(id));
    name = *itr_name;

    if (task_type_ == TaskType::Panoptic) {
      auto itr_isthing = category.find(kJsonCategoriesIsthing);
      if (itr_isthing == category.end())
        RETURN_STATUS_UNEXPECTED("No isthing found in categories of " + annotation_path_);
      label_info.push_back(*itr_isthing);
    }
    label_index_.emplace_back(std::make_pair(name, label_info));
  }
  return Status::OK();
 }

 Status CocoOp::InitSampler() {
  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
  return Status::OK();
 }

 Status CocoOp::LaunchThreadsAndInitOp() {
  if (tree_ == nullptr) {
    RETURN_STATUS_UNEXPECTED("tree_ not set");
  }
  RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
  RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CocoOp::WorkerEntry, this, std::placeholders::_1)));
  TaskManager::FindMe()->Post();
  RETURN_IF_NOT_OK(this->ParseAnnotationIds());
  RETURN_IF_NOT_OK(this->InitSampler());
  return Status::OK();
 }

 Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
  std::ifstream fs;
  fs.open(path, std::ios::binary | std::ios::in);
  if (fs.fail()) {
    RETURN_STATUS_UNEXPECTED("Fail to open file: " + path);
  }
  int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
  (void)fs.seekg(0, std::ios::beg);
  RETURN_IF_NOT_OK(
    Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type()));
  (void)fs.read(reinterpret_cast<char *>((*tensor)->GetMutableBuffer()), num_elements);
  fs.close();
  if (decode_ == true) {
    Status rc = Decode(*tensor, tensor);
    if (rc.IsError()) {
      RETURN_STATUS_UNEXPECTED("fail to decode file: " + path);
    }
  }
  return Status::OK();
 }

 Status CocoOp::CountTotalRows(const std::string &dir, const std::string &file, const std::string &task,
                              int64_t *count) {
  std::shared_ptr<CocoOp> op;
  RETURN_IF_NOT_OK(Builder().SetDir(dir).SetFile(file).SetTask(task).Build(&op));
  RETURN_IF_NOT_OK(op->ParseAnnotationIds());
  *count = static_cast<int64_t>(op->image_ids_.size());
  return Status::OK();
 }

 Status CocoOp::GetClassIndexing(const std::string &dir, const std::string &file, const std::string &task,
                                std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
  std::shared_ptr<CocoOp> op;
  RETURN_IF_NOT_OK(Builder().SetDir(dir).SetFile(file).SetTask(task).Build(&op));
  RETURN_IF_NOT_OK(op->ParseAnnotationIds());
  *output_class_indexing = op->label_index_;
  return Status::OK();
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.h
@@ -0,0 +1,324 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef DATASET_ENGINE_DATASETOPS_SOURCE_COCO_OP_H_
 #define DATASET_ENGINE_DATASETOPS_SOURCE_COC0_OP_H_

 #include <map>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>

 #include "dataset/core/tensor.h"
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/data_schema.h"
 #include "dataset/engine/datasetops/parallel_op.h"
 #include "dataset/engine/datasetops/source/io_block.h"
 #include "dataset/engine/datasetops/source/sampler/sampler.h"
 #include "dataset/kernels/image/image_utils.h"
 #include "dataset/util/path.h"
 #include "dataset/util/queue.h"
 #include "dataset/util/status.h"
 #include "dataset/util/wait_post.h"

 namespace mindspore {
 namespace dataset {
 // Forward declares
 template <typename T>
 class Queue;

 using CoordinateRow = std::vector<std::vector<float>>;

 class CocoOp : public ParallelOp, public RandomAccessOp {
 public:
  enum class TaskType { Detection = 0, Stuff = 1, Panoptic = 2, Keypoint = 3 };

  class Builder {
   public:
    // Constructor for Builder class of ImageFolderOp
    // @param  uint32_t numWrks - number of parallel workers
    // @param dir - directory folder got ImageNetFolder
    Builder();

    // Destructor.
    ~Builder() = default;

    // Setter method.
    // @param const std::string & build_dir
    // @return Builder setter method returns reference to the builder.
    Builder &SetDir(const std::string &build_dir) {
      builder_dir_ = build_dir;
      return *this;
    }

    // Setter method.
    // @param const std::string & build_file
    // @return Builder setter method returns reference to the builder.
    Builder &SetFile(const std::string &build_file) {
      builder_file_ = build_file;
      return *this;
    }

    // Setter method.
    // @param const std::string & task_type
    // @return Builder setter method returns reference to the builder.
    Builder &SetTask(const std::string &task_type) {
      if (task_type == "Detection") {
        builder_task_type_ = TaskType::Detection;
      } else if (task_type == "Stuff") {
        builder_task_type_ = TaskType::Stuff;
      } else if (task_type == "Panoptic") {
        builder_task_type_ = TaskType::Panoptic;
      } else if (task_type == "Keypoint") {
        builder_task_type_ = TaskType::Keypoint;
      }
      return *this;
    }

    // Setter method.
    // @param int32_t num_workers
    // @return Builder setter method returns reference to the builder.
    Builder &SetNumWorkers(int32_t num_workers) {
      builder_num_workers_ = num_workers;
      return *this;
    }

    // Setter method.
    // @param int32_t op_connector_size
    // @return Builder setter method returns reference to the builder.
    Builder &SetOpConnectorSize(int32_t op_connector_size) {
      builder_op_connector_size_ = op_connector_size;
      return *this;
    }

    // Setter method.
    // @param int32_t rows_per_buffer
    // @return Builder setter method returns reference to the builder.
    Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
      builder_rows_per_buffer_ = rows_per_buffer;
      return *this;
    }

    // Setter method.
    // @param std::shared_ptr<Sampler> sampler
    // @return Builder setter method returns reference to the builder.
    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
      builder_sampler_ = std::move(sampler);
      return *this;
    }

    // Setter method.
    // @param bool do_decode
    // @return Builder setter method returns reference to the builder.
    Builder &SetDecode(bool do_decode) {
      builder_decode_ = do_decode;
      return *this;
    }

    // Check validity of input args
    // @return = The error code return
    Status SanityCheck();

    // The builder "Build" method creates the final object.
    // @param std::shared_ptr<CocoOp> *op - DatasetOp
    // @return - The error code return
    Status Build(std::shared_ptr<CocoOp> *op);

   private:
    bool builder_decode_;
    std::string builder_dir_;
    std::string builder_file_;
    TaskType builder_task_type_;
    int32_t builder_num_workers_;
    int32_t builder_op_connector_size_;
    int32_t builder_rows_per_buffer_;
    std::shared_ptr<Sampler> builder_sampler_;
    std::unique_ptr<DataSchema> builder_schema_;
  };

  // Constructor
  // @param TaskType task_type - task type of Coco
  // @param std::string image_folder_path - image folder path of Coco
  // @param std::string annotation_path - annotation json path of Coco
  // @param int32_t num_workers - number of workers reading images in parallel
  // @param int32_t rows_per_buffer - number of images (rows) in each buffer
  // @param int32_t queue_size - connector queue size
  // @param int64_t num_samples - number of samples to read
  // @param bool decode - whether to decode images
  // @param std::unique_ptr<DataSchema> data_schema - the schema of the Coco dataset
  // @param std::shared_ptr<Sampler> sampler - sampler tells CocoOp what to read
  CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
         int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode,
         std::unique_ptr<DataSchema> data_schema, std::shared_ptr<Sampler> sampler);

  // Destructor
  ~CocoOp() = default;

  // Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector
  // @param int32_t workerId - id of each worker
  // @return Status - The error code return
  Status WorkerEntry(int32_t worker_id) override;

  // Main Loop of CocoOp
  // Master thread: Fill IOBlockQueue, then goes to sleep
  // Worker thread: pulls IOBlock from IOBlockQueue, work on it the put buffer to mOutConnector
  // @return Status - The error code return
  Status operator()() override;

  // A print method typically used for debugging
  // @param out
  // @param show_all
  void Print(std::ostream &out, bool show_all) const override;

  // @param const std::string &dir - Coco image dir path
  // @param const std::string &file - Coco json file path
  // @param const std::string &task - task mode of Coco task
  // @param int64_t numSamples - samples number of CocoDataset
  // @param int64_t *count - output rows number of CocoDataset
  static Status CountTotalRows(const std::string &dir, const std::string &task_type, const std::string &task_mode,
                               int64_t *count);

  // @param const std::string &dir - Coco image dir path
  // @param const std::string &file - Coco json file path
  // @param const std::string &task - task mode of Coco task
  // @param int64_t numSamples - samples number of CocoDataset
  // @param std::map<std::string, int32_t> *output_class_indexing - output class index of CocoDataset
  static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
                                 std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing);

 private:
  // Initialize Sampler, calls sampler->Init() within
  // @return Status - The error code return
  Status InitSampler();

  // Load a tensor row according to image id
  // @param std::string image_id - image id
  // @param TensorRow row - image & target read into this tensor row
  // @return Status - The error code return
  Status LoadTensorRow(const std::string &image_id, TensorRow *row);

  // Load a tensor row with vector which a vector to a tensor
  // @param const std::string &image_id - image is
  // @param std::shared_ptr<Tensor> image - image tensor
  // @param std::shared_ptr<Tensor> coordinate - coordinate tensor
  // @param TensorRow row - image & target read into this tensor row
  // @return Status - The error code return
  Status LoadDetectionTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image,
                                std::shared_ptr<Tensor> coordinate, TensorRow *trow);

  // Load a tensor row with vector which a vector to a tensor
  // @param const std::string &image_id - image is
  // @param std::shared_ptr<Tensor> image - image tensor
  // @param std::shared_ptr<Tensor> coordinate - coordinate tensor
  // @param TensorRow row - image & target read into this tensor row
  // @return Status - The error code return
  Status LoadSimpleTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image,
                             std::shared_ptr<Tensor> coordinate, TensorRow *trow);

  // Load a tensor row with vector which a vector to multi-tensor
  // @param const std::string &image_id - image is
  // @param std::shared_ptr<Tensor> image - image tensor
  // @param std::shared_ptr<Tensor> coordinate - coordinate tensor
  // @param TensorRow row - image & target read into this tensor row
  // @return Status - The error code return
  Status LoadMixTensorRow(const std::string &image_id, std::shared_ptr<Tensor> image,
                          std::shared_ptr<Tensor> coordinate, TensorRow *trow);

  // @param const std::string &path - path to the image file
  // @param const ColDescriptor &col - contains tensor implementation and datatype
  // @param std::shared_ptr<Tensor> tensor - return
  // @return Status - The error code return
  Status ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor);

  // @param const std::vector<uint64_t> &keys - keys in ioblock
  // @param std::unique_ptr<DataBuffer> db
  // @return Status - The error code return
  Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db);

  // Read annotation from Annotation folder
  // @return Status - The error code return
  Status ParseAnnotationIds();

  // @param const std::shared_ptr<Tensor> &sample_ids - sample ids of tensor
  // @param std::vector<int64_t> *keys - image id
  // @return Status - The error code return
  Status TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys);

  // Called first when function is called
  // @return Status - The error code return
  Status LaunchThreadsAndInitOp();

  // Reset dataset state
  // @return Status - The error code return
  Status Reset() override;

  // @param nlohmann::json image_tree - image tree of json
  // @param std::vector<std::string> *image_vec - image id list of json
  // @return Status - The error code return
  Status ImageColumnLoad(nlohmann::json image_tree, std::vector<std::string> *image_vec);

  // @param nlohmann::json categories_tree - categories tree of json
  // return Status - The error code return
  Status CategoriesColumnLoad(nlohmann::json categories_tree);

  // @param nlohmann::json categories_tree - categories tree of json
  // @param const std::string &image_file - current image name in annotation
  // @param const int32_t &id - current unique id of annotation
  // @return Status - The error code return
  Status DetectionColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id);

  // @param nlohmann::json categories_tree - categories tree of json
  // @param const std::string &image_file - current image name in annotation
  // @param const int32_t &id - current unique id of annotation
  // @return Status - The error code return
  Status StuffColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id);

  // @param nlohmann::json categories_tree - categories tree of json
  // @param const std::string &image_file - current image name in annotation
  // @param const int32_t &id - current unique id of annotation
  // @return Status - The error code return
  Status KeypointColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &id);

  // @param nlohmann::json categories_tree - categories tree of json
  // @param const std::string &image_file - current image name in annotation
  // @param const int32_t &image_id - current unique id of annotation
  // @return Status - The error code return
  Status PanopticColumnLoad(nlohmann::json annotation_tree, const std::string &image_file, const int32_t &image_id);

  template <typename T>
  Status SearchNodeInJson(nlohmann::json input_tree, std::string node_name, T *output_node);

  bool decode_;
  int64_t row_cnt_;
  int64_t buf_cnt_;
  std::string image_folder_path_;
  std::string annotation_path_;
  TaskType task_type_;
  int32_t rows_per_buffer_;
  std::shared_ptr<Sampler> sampler_;
  std::unique_ptr<DataSchema> data_schema_;

  WaitPost wp_;
  std::vector<std::string> image_ids_;
  std::map<int32_t, std::string> image_index_;
  QueueList<std::unique_ptr<IOBlock>> io_block_queues_;
  std::vector<std::pair<std::string, std::vector<int32_t>>> label_index_;
  std::map<std::string, CoordinateRow> coordinate_map_;
  std::map<std::string, std::vector<uint32_t>> simple_item_map_;
 };
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // DATASET_ENGINE_DATASETOPS_SOURCE_Coco_OP_H_
--- a/mindspore/dataset/init.py
+++ b/mindspore/dataset/init.py
@@ -20,8 +20,8 @@ can also create samplers with this module to sample data.

 from .core.configuration import config
 from .engine.datasets import TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, \
    GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CelebADataset, TextFileDataset, \
    Schema, Shuffle, zip, RandomDataset
    GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CocoDataset, CelebADataset,\
    TextFileDataset, Schema, Shuffle, zip, RandomDataset
 from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \
    WeightedRandomSampler, Sampler
 from .engine.serializer_deserializer import serialize, deserialize, show
@@ -30,5 +30,5 @@ from .engine.graphdata import GraphData
 __all__ = ["config", "ImageFolderDatasetV2", "MnistDataset",
           "MindDataset", "GeneratorDataset", "TFRecordDataset",
           "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
           "VOCDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", "RandomSampler",
           "VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler", "RandomSampler",
           "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "zip", "GraphData"]
--- a/mindspore/dataset/engine/init.py
+++ b/mindspore/dataset/engine/init.py
@@ -33,5 +33,5 @@ __all__ = ["config", "ConfigurationManager", "zip",
           "ImageFolderDatasetV2", "MnistDataset",
           "MindDataset", "GeneratorDataset", "TFRecordDataset",
           "ManifestDataset", "Cifar10Dataset", "Cifar100Dataset", "CelebADataset",
           "VOCDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler",
           "VOCDataset", "CocoDataset", "TextFileDataset", "Schema", "DistributedSampler", "PKSampler",
           "RandomSampler", "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler"]
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -33,7 +33,7 @@ import copy
 import numpy as np

 from mindspore._c_dataengine import DataType, TFReaderOp, ImageFolderOp, CifarOp, MnistOp, ManifestOp, \
    MindRecordOp, TextFileOp, VOCOp, CBatchInfo
    MindRecordOp, TextFileOp, VOCOp, CocoOp, CBatchInfo
 from mindspore._c_expression import typing

 from mindspore import log as logger
@@ -42,8 +42,9 @@ from .iterators import DictIterator, TupleIterator
 from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
    check_rename, \
    check_take, check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \
    check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \
    check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset, check_concat, check_split
    check_tfrecorddataset, check_vocdataset, check_cocodataset, check_celebadataset, check_minddataset,\
    check_generatordataset, check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset, check_concat,\
    check_split
 from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist

 try:
@@ -3867,10 +3868,14 @@ class VOCDataset(MappableDataset):
    """
    A source dataset for reading and parsing VOC dataset.

    The generated dataset has two columns ['image', 'target'].
    The shape of both column is [image_size] if decode flag is False, or [H, W, C]
    The generated dataset has two columns :
    task='Detection' : ['image', 'annotation'].
    task='Segmentation' : ['image', 'target']
    The shape of both column 'image' and 'target' is [image_size] if decode flag is False, or [H, W, C]
    otherwise.
    The type of both tensor is uint8.
    The type of both tensor 'image' and 'target' is uint8.
    The type of tensor 'annotation' is uint32.

    This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
    below shows what input args are allowed and their expected behavior.

@@ -4035,6 +4040,163 @@ class VOCDataset(MappableDataset):
        return self.sampler.is_sharded()


 class CocoDataset(MappableDataset):
    """
    A source dataset for reading and parsing COCO dataset.

    CocoDataset support four kinds of task:
    2017 Train/Val/Test Detection, Keypoints, Stuff, Panoptic.

    The generated dataset has multi-columns :
    task = 'Detection' : column [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
                                 ['iscrowd', dtype=uint32]].
    task = 'Stuff' : column [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd',dtype=uint32]].
    task = 'Keypoint' : column [['image', dtype=uint8], ['keypoints', dtype=float32], ['num_keypoints', dtype=uint32]].
    task = 'Panoptic' : column [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
                                ['iscrowd', dtype=uint32], ['area', dtype=uint32]].
    This dataset can take in a sampler. sampler and shuffle are mutually exclusive. Table
    below shows what input args are allowed and their expected behavior.

    .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
       :widths: 25 25 50
       :header-rows: 1

       * - Parameter 'sampler'
         - Parameter 'shuffle'
         - Expected Order Behavior
       * - None
         - None
         - random order
       * - None
         - True
         - random order
       * - None
         - False
         - sequential order
       * - Sampler object
         - None
         - order defined by sampler
       * - Sampler object
         - True
         - not allowed
       * - Sampler object
         - False
         - not allowed

    Args:
        dataset_dir (str): Path to the root directory that contains the dataset.
        annotation_file (str): Path to the annotation json.
        task (str): Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
            (default='Detection')
        num_samples (int, optional): The number of images to be included in the dataset
            (default=None, all images).
        num_parallel_workers (int, optional): Number of workers to read the data
            (default=None, number set in the config).
        shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
            order behavior shown in the table).
        decode (bool, optional): Decode the images after reading (default=False).
        sampler (Sampler, optional): Object used to choose samples from the dataset
            (default=None, expected order behavior shown in the table).
        num_shards (int, optional): Number of shards that the dataset should be divided
            into (default=None).
        shard_id (int, optional): The shard ID within num_shards (default=None). This
            argument should be specified only when num_shards is also specified.

    Raises:
        RuntimeError: If sampler and shuffle are specified at the same time.
        RuntimeError: If sampler and sharding are specified at the same time.
        RuntimeError: If num_shards is specified but shard_id is None.
        RuntimeError: If shard_id is specified but num_shards is None.
        RuntimeError: If parse json file failed.
        ValueError: If task is not in ['Detection', 'Stuff', 'Panoptic', 'Keypoint'].
        ValueError: If annotation_file is not exist.
        ValueError: If dataset_dir is not exist.
        ValueError: If shard_id is invalid (< 0 or >= num_shards).

    Examples:
        >>> import mindspore.dataset as ds
        >>> dataset_dir = "/path/to/coco_dataset_directory/image_folder"
        >>> annotation_file = "/path/to/coco_dataset_directory/annotation_folder/annotation.json"
        >>> # 1) read COCO data for Detection task
        >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Detection')
        >>> # 2) read COCO data for Stuff task
        >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Stuff')
        >>> # 3) read COCO data for Panoptic task
        >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Panoptic')
        >>> # 4) read COCO data for Keypoint task
        >>> coco_dataset = ds.CocoDataset(dataset_dir, annotation_file=annotation_file, task='Keypoint')
        >>> # in COCO dataset, each dictionary has keys "image" and "annotation"
    """

    @check_cocodataset
    def __init__(self, dataset_dir, annotation_file, task="Detection", num_samples=None, num_parallel_workers=None,
                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None):
        super().__init__(num_parallel_workers)
        self.dataset_dir = dataset_dir
        self.annotation_file = annotation_file
        self.task = task
        self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id)
        self.num_samples = num_samples
        self.decode = decode
        self.shuffle_level = shuffle
        self.num_shards = num_shards
        self.shard_id = shard_id

    def get_args(self):
        args = super().get_args()
        args["dataset_dir"] = self.dataset_dir
        args["annotation_file"] = self.annotation_file
        args["task"] = self.task
        args["num_samples"] = self.num_samples
        args["sampler"] = self.sampler
        args["decode"] = self.decode
        args["shuffle"] = self.shuffle_level
        args["num_shards"] = self.num_shards
        args["shard_id"] = self.shard_id
        return args

    def get_dataset_size(self):
        """
        Get the number of batches in an epoch.

        Return:
            Number, number of batches.
        """
        num_rows = CocoOp.get_num_rows(self.dataset_dir, self.annotation_file, self.task)
        rows_per_shard = get_num_rows(num_rows, self.num_shards)
        rows_from_sampler = self._get_sampler_dataset_size()

        if rows_from_sampler is None:
            return rows_per_shard

        return min(rows_from_sampler, rows_per_shard)

    def get_class_indexing(self):
        """
        Get the class index.

        Return:
            Dict, A str-to-int mapping from label name to index.
        """
        if self.task not in {"Detection", "Panoptic"}:
            raise NotImplementedError("Only 'Detection' and 'Panoptic' support get_class_indexing.")

        class_index = CocoOp.get_class_indexing(self.dataset_dir, self.annotation_file, self.task)
        return dict(class_index)

    def is_shuffled(self):
        if self.shuffle_level is None:
            return True

        return self.shuffle_level or self.sampler.is_shuffled()

    def is_sharded(self):
        if self.num_shards is not None:
            return self.num_shards > 1

        return self.sampler.is_sharded()


 class CelebADataset(MappableDataset):
    """
    A source dataset for reading and parsing CelebA dataset.Only support list_attr_celeba.txt currently.
--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@@ -165,6 +165,8 @@ class Iterator:
            op_type = OpName.MANIFEST
        elif isinstance(dataset, de.VOCDataset):
            op_type = OpName.VOC
        elif isinstance(dataset, de.CocoDataset):
            op_type = OpName.COCO
        elif isinstance(dataset, de.Cifar10Dataset):
            op_type = OpName.CIFAR10
        elif isinstance(dataset, de.Cifar100Dataset):
--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@@ -299,6 +299,12 @@ def create_node(node):
                        node.get('num_samples'), node.get('num_parallel_workers'), node.get('shuffle'),
                        node.get('decode'), sampler, node.get('num_shards'), node.get('shard_id'))

    elif dataset_op == 'CocoDataset':
        sampler = construct_sampler(node.get('sampler'))
        pyobj = pyclass(node['dataset_dir'], node.get('annotation_file'), node.get('task'), node.get('num_samples'),
                        node.get('num_parallel_workers'), node.get('shuffle'), node.get('decode'), sampler,
                        node.get('num_shards'), node.get('shard_id'))

    elif dataset_op == 'CelebADataset':
        sampler = construct_sampler(node.get('sampler'))
        pyobj = pyclass(node['dataset_dir'], node.get('num_parallel_workers'), node.get('shuffle'),
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -522,6 +522,49 @@ def check_vocdataset(method):
    return new_method


 def check_cocodataset(method):
    """A wrapper that wrap a parameter checker to the original Dataset(CocoDataset)."""

    @wraps(method)
    def new_method(*args, **kwargs):
        param_dict = make_param_dict(method, args, kwargs)

        nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
        nreq_param_bool = ['shuffle', 'decode']

        # check dataset_dir; required argument
        dataset_dir = param_dict.get('dataset_dir')
        if dataset_dir is None:
            raise ValueError("dataset_dir is not provided.")
        check_dataset_dir(dataset_dir)

        # check annotation_file; required argument
        annotation_file = param_dict.get('annotation_file')
        if annotation_file is None:
            raise ValueError("annotation_file is not provided.")
        check_dataset_file(annotation_file)

        # check task; required argument
        task = param_dict.get('task')
        if task is None:
            raise ValueError("task is not provided.")
        if not isinstance(task, str):
            raise ValueError("task is not str type.")

        if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint'}:
            raise ValueError("Invalid task type")

        check_param_type(nreq_param_int, param_dict, int)

        check_param_type(nreq_param_bool, param_dict, bool)

        check_sampler_shuffle_shard_options(param_dict)

        return method(*args, **kwargs)

    return new_method


 def check_celebadataset(method):
    """A wrapper that wrap a parameter checker to the original Dataset(CelebADataset)."""

--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -71,6 +71,7 @@ SET(DE_UT_SRCS
    jieba_tokenizer_op_test.cc
    tokenizer_op_test.cc
    gnn_graph_test.cc
    coco_op_test.cc
    )

 add_executable(de_ut_tests ${DE_UT_SRCS})
--- a/tests/ut/cpp/dataset/coco_op_test.cc
+++ b/tests/ut/cpp/dataset/coco_op_test.cc
@@ -0,0 +1,265 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <fstream>
 #include <iostream>
 #include <memory>
 #include <string>

 #include "common/common.h"
 #include "common/utils.h"
 #include "dataset/core/client.h"
 #include "dataset/core/global_context.h"
 #include "dataset/engine/datasetops/source/coco_op.h"
 #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/random_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/sampler.h"
 #include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
 #include "dataset/util/de_error.h"
 #include "dataset/util/path.h"
 #include "dataset/util/status.h"
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "securec.h"

 namespace common = mindspore::common;

 using namespace mindspore::dataset;
 using mindspore::MsLogLevel::ERROR;
 using mindspore::ExceptionType::NoExceptionType;
 using mindspore::LogStream;

 std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);

 std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);

 std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);

 class MindDataTestCocoOp : public UT::DatasetOpTesting {
 protected:
 };

 TEST_F(MindDataTestCocoOp, TestCocoDetection) {
  // Start with an empty execution tree
  auto my_tree = std::make_shared<ExecutionTree>();
  std::string dataset_path, annotation_path;
  dataset_path = datasets_root_path_ + "/testCOCO/train/";
  annotation_path = datasets_root_path_ + "/testCOCO/annotations/train.json";

  std::string task("Detection");
  std::shared_ptr<CocoOp> my_coco_op;
  CocoOp::Builder builder;
  Status rc = builder.SetDir(dataset_path)
                     .SetFile(annotation_path)
                     .SetTask(task)
                     .Build(&my_coco_op);
  ASSERT_TRUE(rc.IsOk());

  rc = my_tree->AssociateNode(my_coco_op);
  ASSERT_TRUE(rc.IsOk());
  rc = my_tree->AssignRoot(my_coco_op);
  ASSERT_TRUE(rc.IsOk());

  MS_LOG(DEBUG) << "Launch tree and begin iteration.";
  rc = my_tree->Prepare();
  ASSERT_TRUE(rc.IsOk());

  rc = my_tree->Launch();
  ASSERT_TRUE(rc.IsOk());

  // Start the loop of reading tensors from our pipeline
  DatasetIterator di(my_tree);
  TensorRow tensor_list;
  rc = di.FetchNextTensorRow(&tensor_list);
  ASSERT_TRUE(rc.IsOk());

  int row_count = 0;
  while (!tensor_list.empty()) {
    MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";

    //Display the tensor by calling the printer on it
    for (int i = 0; i < tensor_list.size(); i++) {
      std::ostringstream ss;
      ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
      MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
    }

    rc = di.FetchNextTensorRow(&tensor_list);
    ASSERT_TRUE(rc.IsOk());
    row_count++;
  }

  ASSERT_EQ(row_count, 6);
 }

 TEST_F(MindDataTestCocoOp, TestCocoStuff) {
  // Start with an empty execution tree
  auto my_tree = std::make_shared<ExecutionTree>();
  std::string dataset_path, annotation_path;
  dataset_path = datasets_root_path_ + "/testCOCO/train/";
  annotation_path = datasets_root_path_ + "/testCOCO/annotations/train.json";

  std::string task("Stuff");
  std::shared_ptr<CocoOp> my_coco_op;
  CocoOp::Builder builder;
  Status rc = builder.SetDir(dataset_path)
    .SetFile(annotation_path)
    .SetTask(task)
    .Build(&my_coco_op);
  ASSERT_TRUE(rc.IsOk());

  rc = my_tree->AssociateNode(my_coco_op);
  ASSERT_TRUE(rc.IsOk());
  rc = my_tree->AssignRoot(my_coco_op);
  ASSERT_TRUE(rc.IsOk());

  MS_LOG(DEBUG) << "Launch tree and begin iteration.";
  rc = my_tree->Prepare();
  ASSERT_TRUE(rc.IsOk());

  rc = my_tree->Launch();
  ASSERT_TRUE(rc.IsOk());

  // Start the loop of reading tensors from our pipeline
  DatasetIterator di(my_tree);
  TensorRow tensor_list;
  rc = di.FetchNextTensorRow(&tensor_list);
  ASSERT_TRUE(rc.IsOk());

  int row_count = 0;
  while (!tensor_list.empty()) {
    MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";

    //Display the tensor by calling the printer on it
    for (int i = 0; i < tensor_list.size(); i++) {
      std::ostringstream ss;
      ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
      MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
    }

    rc = di.FetchNextTensorRow(&tensor_list);
    ASSERT_TRUE(rc.IsOk());
    row_count++;
  }

  ASSERT_EQ(row_count, 6);
 }

 TEST_F(MindDataTestCocoOp, TestCocoKeypoint) {
  // Start with an empty execution tree
  auto my_tree = std::make_shared<ExecutionTree>();
  std::string dataset_path, annotation_path;
  dataset_path = datasets_root_path_ + "/testCOCO/train/";
  annotation_path = datasets_root_path_ + "/testCOCO/annotations/key_point.json";

  std::string task("Keypoint");
  std::shared_ptr<CocoOp> my_coco_op;
  CocoOp::Builder builder;
  Status rc = builder.SetDir(dataset_path)
    .SetFile(annotation_path)
    .SetTask(task)
    .Build(&my_coco_op);
  ASSERT_TRUE(rc.IsOk());

  rc = my_tree->AssociateNode(my_coco_op);
  ASSERT_TRUE(rc.IsOk());
  rc = my_tree->AssignRoot(my_coco_op);
  ASSERT_TRUE(rc.IsOk());

  MS_LOG(DEBUG) << "Launch tree and begin iteration.";
  rc = my_tree->Prepare();
  ASSERT_TRUE(rc.IsOk());

  rc = my_tree->Launch();
  ASSERT_TRUE(rc.IsOk());

  // Start the loop of reading tensors from our pipeline
  DatasetIterator di(my_tree);
  TensorRow tensor_list;
  rc = di.FetchNextTensorRow(&tensor_list);
  ASSERT_TRUE(rc.IsOk());

  int row_count = 0;
  while (!tensor_list.empty()) {
    MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";

    //Display the tensor by calling the printer on it
    for (int i = 0; i < tensor_list.size(); i++) {
      std::ostringstream ss;
      ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
      MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
    }
    rc = di.FetchNextTensorRow(&tensor_list);
    ASSERT_TRUE(rc.IsOk());
    row_count++;
  }

  ASSERT_EQ(row_count, 2);
 }

 TEST_F(MindDataTestCocoOp, TestCocoPanoptic) {
  // Start with an empty execution tree
  auto my_tree = std::make_shared<ExecutionTree>();
  std::string dataset_path, annotation_path;
  dataset_path = datasets_root_path_ + "/testCOCO/train/";
  annotation_path = datasets_root_path_ + "/testCOCO/annotations/panoptic.json";

  std::string task("Panoptic");
  std::shared_ptr<CocoOp> my_coco_op;
  CocoOp::Builder builder;
  Status rc = builder.SetDir(dataset_path)
    .SetFile(annotation_path)
    .SetTask(task)
    .Build(&my_coco_op);
  ASSERT_TRUE(rc.IsOk());

  rc = my_tree->AssociateNode(my_coco_op);
  ASSERT_TRUE(rc.IsOk());
  rc = my_tree->AssignRoot(my_coco_op);
  ASSERT_TRUE(rc.IsOk());

  MS_LOG(DEBUG) << "Launch tree and begin iteration.";
  rc = my_tree->Prepare();
  ASSERT_TRUE(rc.IsOk());

  rc = my_tree->Launch();
  ASSERT_TRUE(rc.IsOk());

  // Start the loop of reading tensors from our pipeline
  DatasetIterator di(my_tree);
  TensorRow tensor_list;
  rc = di.FetchNextTensorRow(&tensor_list);
  ASSERT_TRUE(rc.IsOk());

  int row_count = 0;
  while (!tensor_list.empty()) {
    MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";

    //Display the tensor by calling the printer on it
    for (int i = 0; i < tensor_list.size(); i++) {
      std::ostringstream ss;
      ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
      MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
    }
    rc = di.FetchNextTensorRow(&tensor_list);
    ASSERT_TRUE(rc.IsOk());
    row_count++;
  }

  ASSERT_EQ(row_count, 2);
 }
--- a/tests/ut/data/dataset/testCOCO/annotations/invalid.json
+++ b/tests/ut/data/dataset/testCOCO/annotations/invalid.json
@@ -0,0 +1 @@
 {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium", "data_created": "2017/09/01"}, "licenses": [{"url": "http://creativecommons.org/license/by-nc-sa/2.0/","id": 3,"name": "Attribution-Noncommercial License"}], "images": [{"license": 3, "file_name": "000000391895.jpg", "id": 391895}, {"license": 3, "file_name: "000000318219.jpg", "id": 318219}, {"license": 3, "file_name": "000000554625.jpg", "id": 554625}, {"license": 3, "file_name": "000000574769.jpg", "id": 574769}, {"license": 3, "file_name": "000000060623.jpg", "id": 60623}, {"license": 3, "file_name": "000000309022.jpg", "id": 309022}], "annotations": [{"segmentation": [[10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0]], "category_id": 1, "iscrowd": 0, "image_id": 391895, "bbox": [10,10,10,10], "area": 100, "id": 10000}, {"segmentation": [[20.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0]], "category_id": 2, "iscrowd": 0, "image_id": 318219, "bbox": [20,20,20,20], "area": 400, "id": 10001}, {"segmentation": [[40.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,40.0,41.0,42.0]], "category_id": 3, "iscrowd": 0, "image_id": 554625, "bbox": [30,30,30,30], "area": 900, "id": 10002}, {"segmentation": [[50.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0]], "category_id": 4, "iscrowd": 0, "image_id": 574769, "bbox": [40,40,40,40], "area": 1600, "id": 10003}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 5, "iscrowd": 0, "image_id": 60623, "bbox": [50,50,50,50], "area": 2500, "id": 10004}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0],[68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 6, "iscrowd": 0, "image_id": 309022, "bbox": [60,60,60,60], "area": 3600, "id": 10005}, {"segmentation": [[70.0,72.0,73.0,74.0,75.0]], "category_id": 7, "iscrowd": 0, "image_id": 391895, "bbox": [70,70,70,70], "area": 4900, "id": 10006}, {"segmentation": {"counts": [10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0],"size": [200,300]}, "category_id": 8, "iscrowd": 1, "image_id": 318219, "bbox": [80,80,80,80], "area": 6400, "id": 10007}], "categories": [{"supercategory": "person", "id": 1, "name": "person"},{"supercategory": "vehicle", "id": 2, "name": "bicycle"},{"supercategory": "vehicle", "id": 3, "name": "car"},{"supercategory": "vehicle", "id": 4, "name": "cat"},{"supercategory": "vehicle", "id": 5, "name": "dog"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 7, "name": "monkey"}]}
--- a/tests/ut/data/dataset/testCOCO/annotations/key_point.json
+++ b/tests/ut/data/dataset/testCOCO/annotations/key_point.json
@@ -0,0 +1 @@
 {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2017, "contributor": "COCO Consortium", "data_created": "2017/09/01"}, "images":[{"license": 3, "file_name": "000000391895.jpg", "id": 391895},{"license": 3, "file_name": "000000318219.jpg", "id": 318219}],"annotations": [{"segmentation": [[10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0]], "num_keypoints": 10,"area": 12345,"iscrowd": 0,"keypoints": [244,139,2,0,0,0,226,118,2,0,0,0,154,159,2,143,261,2,135,312,2,271,423,2,184,530,2,261,280,2,347,592,2,0,0,0,123,596,2,0,0,0,0,0,0,0,0,0,0,0,0],"image_id": 318219,"bbox": [40.65,38.8,418.38,601.2],"category_id": 1, "id": 491276},{"segmentation": [[20.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0]], "num_keypoints": 14,"area": 45678,"iscrowd": 0,"keypoints": [368,61,1,369,52,2,0,0,0,382,48,2,0,0,0,368,84,2,435,81,2,362,125,2,446,125,2,360,153,2,0,0,0,397,167,1,439,166,1,369,193,2,461,234,2,361,246,2,474,287,2],"image_id": 391895,"bbox": [339.88,22.16,153.88,300.73],"category_id": 1, "id": 202758}]}
--- a/tests/ut/data/dataset/testCOCO/annotations/lack_of_images.json
+++ b/tests/ut/data/dataset/testCOCO/annotations/lack_of_images.json
@@ -0,0 +1 @@
 {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium", "data_created": "2017/09/01"}, "licenses": [{"url": "http://creativecommons.org/license/by-nc-sa/2.0/","id": 3,"name": "Attribution-Noncommercial License"}], "image": [{"license": 3, "file_name": "000000391895.jpg", "id": 391895}, {"license": 3, "file_name": "000000318219.jpg", "id": 318219}, {"license": 3, "file_name": "000000554625.jpg", "id": 554625}, {"license": 3, "file_name": "000000574769.jpg", "id": 574769}, {"license": 3, "file_name": "000000060623.jpg", "id": 60623}, {"license": 3, "file_name": "000000309022.jpg", "id": 309022}], "annotations": [{"segmentation": [[10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0]], "category_id": 1, "iscrowd": 0, "image_id": 391895, "bbox": [10,10,10,10], "area": 100, "id": 10000}, {"segmentation": [[20.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0]], "category_id": 2, "iscrowd": 0, "image_id": 318219, "bbox": [20,20,20,20], "area": 400, "id": 10001}, {"segmentation": [[40.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,40.0,41.0,42.0]], "category_id": 3, "iscrowd": 0, "image_id": 554625, "bbox": [30,30,30,30], "area": 900, "id": 10002}, {"segmentation": [[50.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0]], "category_id": 4, "iscrowd": 0, "image_id": 574769, "bbox": [40,40,40,40], "area": 1600, "id": 10003}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 5, "iscrowd": 0, "image_id": 60623, "bbox": [50,50,50,50], "area": 2500, "id": 10004}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0],[68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 6, "iscrowd": 0, "image_id": 309022, "bbox": [60,60,60,60], "area": 3600, "id": 10005}, {"segmentation": [[70.0,72.0,73.0,74.0,75.0]], "category_id": 7, "iscrowd": 0, "image_id": 391895, "bbox": [70,70,70,70], "area": 4900, "id": 10006}, {"segmentation": {"counts": [10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0],"size": [200,300]}, "category_id": 8, "iscrowd": 1, "image_id": 318219, "bbox": [80,80,80,80], "area": 6400, "id": 10007}], "categories": [{"supercategory": "person", "id": 1, "name": "person"},{"supercategory": "vehicle", "id": 2, "name": "bicycle"},{"supercategory": "vehicle", "id": 3, "name": "car"},{"supercategory": "vehicle", "id": 4, "name": "cat"},{"supercategory": "vehicle", "id": 5, "name": "dog"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 7, "name": "monkey"}]}
--- a/tests/ut/data/dataset/testCOCO/annotations/panoptic.json
+++ b/tests/ut/data/dataset/testCOCO/annotations/panoptic.json
@@ -0,0 +1 @@
 {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium", "data_created": "2017/09/01"},"licenses": [{"url": "http://creativecommons.org/license/by-nc-sa/2.0/","id": 3,"name": "Attribution-Noncommercial License"}],"images": [{"license": 3, "file_name": "000000391895.jpg", "id": 391895}, {"license": 3, "file_name": "000000574769.jpg", "id": 574769}],"annotations": [{"segments_info": [{"id": 10461346, "category_id": 1, "iscrowd": 0, "bbox": [472,173,36,48],"area": 705},{"id": 5263261, "category_id": 1, "iscrowd": 0, "bbox": [340,22,154,301],"area": 14062},{"id": 770861, "category_id": 2, "iscrowd": 0, "bbox": [486, 183, 30, 35],"area": 626}], "file_name": "000000391895", "image_id": 391895}, {"segments_info": [{"id": 5000790, "category_id": 1, "iscrowd": 0, "bbox": [103,133,229,422],"area": 43102},{"id": 35650815, "category_id": 3, "iscrowd": 0, "bbox": [243,175,93,164],"area": 6079}], "file_name": "000000574769.png", "image_id": 574769}],"categories": [{"supercategory": "person","isthing": 1,"id": 1,"name": "person"},{"supercategory": "vehicle","isthing": 1,"id": 2,"name": "bicycle"},{"supercategory": "vehicle","isthing": 1,"id": 3, "name": "car"}]}
--- a/tests/ut/data/dataset/testCOCO/annotations/train.json
+++ b/tests/ut/data/dataset/testCOCO/annotations/train.json
@@ -0,0 +1 @@
 {"info": {"description": "COCO 2017 Dataset", "url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium", "data_created": "2017/09/01"}, "licenses": [{"url": "http://creativecommons.org/license/by-nc-sa/2.0/","id": 3,"name": "Attribution-Noncommercial License"}], "images": [{"license": 3, "file_name": "000000391895.jpg", "id": 391895}, {"license": 3, "file_name": "000000318219.jpg", "id": 318219}, {"license": 3, "file_name": "000000554625.jpg", "id": 554625}, {"license": 3, "file_name": "000000574769.jpg", "id": 574769}, {"license": 3, "file_name": "000000060623.jpg", "id": 60623}, {"license": 3, "file_name": "000000309022.jpg", "id": 309022}], "annotations": [{"segmentation": [[10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0]], "category_id": 1, "iscrowd": 0, "image_id": 391895, "bbox": [10,10,10,10], "area": 100, "id": 10000}, {"segmentation": [[20.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0]], "category_id": 2, "iscrowd": 0, "image_id": 318219, "bbox": [20,20,20,20], "area": 400, "id": 10001}, {"segmentation": [[40.0,42.0,43.0,44.0,45.0,46.0,47.0,48.0,49.0,40.0,41.0,42.0]], "category_id": 3, "iscrowd": 0, "image_id": 554625, "bbox": [30,30,30,30], "area": 900, "id": 10002}, {"segmentation": [[50.0,52.0,53.0,54.0,55.0,56.0,57.0,58.0,59.0,60.0,61.0,62.0,63.0]], "category_id": 4, "iscrowd": 0, "image_id": 574769, "bbox": [40,40,40,40], "area": 1600, "id": 10003}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0,68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 5, "iscrowd": 0, "image_id": 60623, "bbox": [50,50,50,50], "area": 2500, "id": 10004}, {"segmentation": [[60.0,62.0,63.0,64.0,65.0,66.0,67.0],[68.0,69.0,70.0,71.0,72.0,73.0,74.0]], "category_id": 6, "iscrowd": 0, "image_id": 309022, "bbox": [60,60,60,60], "area": 3600, "id": 10005}, {"segmentation": [[70.0,72.0,73.0,74.0,75.0]], "category_id": 7, "iscrowd": 0, "image_id": 391895, "bbox": [70,70,70,70], "area": 4900, "id": 10006}, {"segmentation": {"counts": [10.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0],"size": [200,300]}, "category_id": 8, "iscrowd": 1, "image_id": 318219, "bbox": [80,80,80,80], "area": 6400, "id": 10007}], "categories": [{"supercategory": "person", "id": 1, "name": "person"},{"supercategory": "vehicle", "id": 2, "name": "bicycle"},{"supercategory": "vehicle", "id": 3, "name": "car"},{"supercategory": "vehicle", "id": 4, "name": "cat"},{"supercategory": "vehicle", "id": 5, "name": "dog"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 6, "name": "monkey"},{"supercategory": "vehicle", "id": 7, "name": "monkey"}]}
--- a/tests/ut/data/dataset/testCOCO/train/000000060623.jpg
+++ b/tests/ut/data/dataset/testCOCO/train/000000060623.jpg
--- a/tests/ut/data/dataset/testCOCO/train/000000309022.jpg
+++ b/tests/ut/data/dataset/testCOCO/train/000000309022.jpg
--- a/tests/ut/data/dataset/testCOCO/train/000000318219.jpg
+++ b/tests/ut/data/dataset/testCOCO/train/000000318219.jpg
--- a/tests/ut/data/dataset/testCOCO/train/000000391895.jpg
+++ b/tests/ut/data/dataset/testCOCO/train/000000391895.jpg
--- a/tests/ut/data/dataset/testCOCO/train/000000554625.jpg
+++ b/tests/ut/data/dataset/testCOCO/train/000000554625.jpg
--- a/tests/ut/data/dataset/testCOCO/train/000000574769.jpg
+++ b/tests/ut/data/dataset/testCOCO/train/000000574769.jpg
--- a/tests/ut/python/dataset/test_datasets_coco.py
+++ b/tests/ut/python/dataset/test_datasets_coco.py
@@ -0,0 +1,254 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 import numpy as np
 import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as vision

 DATA_DIR = "../data/dataset/testCOCO/train/"
 ANNOTATION_FILE = "../data/dataset/testCOCO/annotations/train.json"
 KEYPOINT_FILE = "../data/dataset/testCOCO/annotations/key_point.json"
 PANOPTIC_FILE = "../data/dataset/testCOCO/annotations/panoptic.json"
 INVALID_FILE = "../data/dataset/testCOCO/annotations/invalid.json"
 LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json"

 def test_coco_detection():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection",
                           decode=True, shuffle=False)
    num_iter = 0
    image_shape = []
    bbox = []
    category_id = []
    for data in data1.create_dict_iterator():
        image_shape.append(data["image"].shape)
        bbox.append(data["bbox"])
        category_id.append(data["category_id"])
        num_iter += 1
    assert num_iter == 6
    assert image_shape[0] == (2268, 4032, 3)
    assert image_shape[1] == (561, 595, 3)
    assert image_shape[2] == (607, 585, 3)
    assert image_shape[3] == (642, 675, 3)
    assert image_shape[4] == (2268, 4032, 3)
    assert image_shape[5] == (2268, 4032, 3)
    assert np.array_equal(np.array([[10., 10., 10., 10.], [70., 70., 70., 70.]]), bbox[0])
    assert np.array_equal(np.array([[20., 20., 20., 20.], [80., 80., 80.0, 80.]]), bbox[1])
    assert np.array_equal(np.array([[30.0, 30.0, 30.0, 30.]]), bbox[2])
    assert np.array_equal(np.array([[40., 40., 40., 40.]]), bbox[3])
    assert np.array_equal(np.array([[50., 50., 50., 50.]]), bbox[4])
    assert np.array_equal(np.array([[60., 60., 60., 60.]]), bbox[5])
    assert np.array_equal(np.array([[1], [7]]), category_id[0])
    assert np.array_equal(np.array([[2], [8]]), category_id[1])
    assert np.array_equal(np.array([[3]]), category_id[2])
    assert np.array_equal(np.array([[4]]), category_id[3])
    assert np.array_equal(np.array([[5]]), category_id[4])
    assert np.array_equal(np.array([[6]]), category_id[5])

 def test_coco_stuff():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff",
                           decode=True, shuffle=False)
    num_iter = 0
    image_shape = []
    segmentation = []
    iscrowd = []
    for data in data1.create_dict_iterator():
        image_shape.append(data["image"].shape)
        segmentation.append(data["segmentation"])
        iscrowd.append(data["iscrowd"])
        num_iter += 1
    assert num_iter == 6
    assert image_shape[0] == (2268, 4032, 3)
    assert image_shape[1] == (561, 595, 3)
    assert image_shape[2] == (607, 585, 3)
    assert image_shape[3] == (642, 675, 3)
    assert image_shape[4] == (2268, 4032, 3)
    assert image_shape[5] == (2268, 4032, 3)
    assert np.array_equal(np.array([[10., 12., 13., 14., 15., 16., 17., 18., 19., 20.],
                                    [70., 72., 73., 74., 75., -1., -1., -1., -1., -1.]]),
                          segmentation[0])
    assert np.array_equal(np.array([[0], [0]]), iscrowd[0])
    assert np.array_equal(np.array([[20.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0],
                                    [10.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, -1.0]]),
                          segmentation[1])
    assert np.array_equal(np.array([[0], [1]]), iscrowd[1])
    assert np.array_equal(np.array([[40., 42., 43., 44., 45., 46., 47., 48., 49., 40., 41., 42.]]), segmentation[2])
    assert np.array_equal(np.array([[0]]), iscrowd[2])
    assert np.array_equal(np.array([[50., 52., 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63.]]),
                          segmentation[3])
    assert np.array_equal(np.array([[0]]), iscrowd[3])
    assert np.array_equal(np.array([[60., 62., 63., 64., 65., 66., 67., 68., 69., 70., 71., 72., 73., 74.]]),
                          segmentation[4])
    assert np.array_equal(np.array([[0]]), iscrowd[4])
    assert np.array_equal(np.array([[60., 62., 63., 64., 65., 66., 67.], [68., 69., 70., 71., 72., 73., 74.]]),
                          segmentation[5])
    assert np.array_equal(np.array([[0]]), iscrowd[5])

 def test_coco_keypoint():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint",
                           decode=True, shuffle=False)
    num_iter = 0
    image_shape = []
    keypoints = []
    num_keypoints = []
    for data in data1.create_dict_iterator():
        image_shape.append(data["image"].shape)
        keypoints.append(data["keypoints"])
        num_keypoints.append(data["num_keypoints"])
        num_iter += 1
    assert num_iter == 2
    assert image_shape[0] == (2268, 4032, 3)
    assert image_shape[1] == (561, 595, 3)
    assert np.array_equal(np.array([[368., 61., 1., 369., 52., 2., 0., 0., 0., 382., 48., 2., 0., 0., 0., 368., 84., 2.,
                                     435., 81., 2., 362., 125., 2., 446., 125., 2., 360., 153., 2., 0., 0., 0., 397.,
                                     167., 1., 439., 166., 1., 369., 193., 2., 461., 234., 2., 361., 246., 2., 474.,
                                     287., 2.]]), keypoints[0])
    assert np.array_equal(np.array([[14]]), num_keypoints[0])
    assert np.array_equal(np.array([[244., 139., 2., 0., 0., 0., 226., 118., 2., 0., 0., 0., 154., 159., 2., 143., 261.,
                                     2., 135., 312., 2., 271., 423., 2., 184., 530., 2., 261., 280., 2., 347., 592., 2.,
                                     0., 0., 0., 123., 596., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]),
                          keypoints[1])
    assert np.array_equal(np.array([[10]]), num_keypoints[1])

 def test_coco_panoptic():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False)
    num_iter = 0
    image_shape = []
    bbox = []
    category_id = []
    iscrowd = []
    area = []
    for data in data1.create_dict_iterator():
        image_shape.append(data["image"].shape)
        bbox.append(data["bbox"])
        category_id.append(data["category_id"])
        iscrowd.append(data["iscrowd"])
        area.append(data["area"])
        num_iter += 1
    assert num_iter == 2
    assert image_shape[0] == (2268, 4032, 3)
    assert np.array_equal(np.array([[472, 173, 36, 48], [340, 22, 154, 301], [486, 183, 30, 35]]), bbox[0])
    assert np.array_equal(np.array([[1], [1], [2]]), category_id[0])
    assert np.array_equal(np.array([[0], [0], [0]]), iscrowd[0])
    assert np.array_equal(np.array([[705], [14062], [626]]), area[0])
    assert image_shape[1] == (642, 675, 3)
    assert np.array_equal(np.array([[103, 133, 229, 422], [243, 175, 93, 164]]), bbox[1])
    assert np.array_equal(np.array([[1], [3]]), category_id[1])
    assert np.array_equal(np.array([[0], [0]]), iscrowd[1])
    assert np.array_equal(np.array([[43102], [6079]]), area[1])

 def test_coco_detection_classindex():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
    class_index = data1.get_class_indexing()
    assert class_index == {'person': [1], 'bicycle': [2], 'car': [3], 'cat': [4], 'dog': [5], 'monkey': [7]}
    num_iter = 0
    for _ in data1.__iter__():
        num_iter += 1
    assert num_iter == 6

 def test_coco_panootic_classindex():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True)
    class_index = data1.get_class_indexing()
    assert class_index == {'person': [1, 1], 'bicycle': [2, 1], 'car': [3, 1]}
    num_iter = 0
    for _ in data1.__iter__():
        num_iter += 1
    assert num_iter == 2

 def test_coco_case_0():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
    data1 = data1.shuffle(10)
    data1 = data1.batch(3, pad_info={})
    num_iter = 0
    for _ in data1.create_dict_iterator():
        num_iter += 1
    assert num_iter == 2

 def test_coco_case_1():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
    sizes = [0.5, 0.5]
    randomize = False
    dataset1, dataset2 = data1.split(sizes=sizes, randomize=randomize)

    num_iter = 0
    for _ in dataset1.create_dict_iterator():
        num_iter += 1
    assert num_iter == 3
    num_iter = 0
    for _ in dataset2.create_dict_iterator():
        num_iter += 1
    assert num_iter == 3

 def test_coco_case_2():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True)
    resize_op = vision.Resize((224, 224))

    data1 = data1.map(input_columns=["image"], operations=resize_op)
    data1 = data1.repeat(4)
    num_iter = 0
    for _ in data1.__iter__():
        num_iter += 1
    assert num_iter == 24

 def test_coco_case_exception():
    try:
        data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection")
        for _ in data1.__iter__():
            pass
        assert False
    except ValueError as e:
        assert "does not exist or permission denied" in str(e)

    try:
        data1 = ds.CocoDataset(DATA_DIR, annotation_file="./file_not_exist", task="Detection")
        for _ in data1.__iter__():
            pass
        assert False
    except ValueError as e:
        assert "does not exist or permission denied" in str(e)

    try:
        data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Invalid task")
        for _ in data1.__iter__():
            pass
        assert False
    except ValueError as e:
        assert "Invalid task type" in str(e)

    try:
        data1 = ds.CocoDataset(DATA_DIR, annotation_file=LACKOFIMAGE_FILE, task="Detection")
        for _ in data1.__iter__():
            pass
        assert False
    except RuntimeError as e:
        assert "Invalid node found in json" in str(e)

    try:
        data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection")
        for _ in data1.__iter__():
            pass
        assert False
    except RuntimeError as e:
        assert "json.exception.parse_error" in str(e)

 if __name__ == '__main__':
    test_coco_detection()
    test_coco_stuff()
    test_coco_keypoint()
    test_coco_panoptic()
    test_coco_detection_classindex()
    test_coco_panootic_classindex()
    test_coco_case_0()
    test_coco_case_1()
    test_coco_case_2()
    test_coco_case_exception()