[feat] [assistant] [I40GYJ] add Captioning task of CocoDataset

4 years ago · eee07047a0
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2021 Huawei Technologies Co., Ltd
 * Copyright 2019-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -32,6 +32,7 @@ const char kJsonImagesFileName[] = "file_name";
 const char kJsonId[] = "id";
 const char kJsonAnnotations[] = "annotations";
 const char kJsonAnnoSegmentation[] = "segmentation";
 const char kJsonAnnoCaption[] = "caption";
 const char kJsonAnnoCounts[] = "counts";
 const char kJsonAnnoSegmentsInfo[] = "segments_info";
 const char kJsonAnnoIscrowd[] = "iscrowd";
@@ -74,17 +75,37 @@ void CocoOp::Print(std::ostream &out, bool show_all) const {
 }

 Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
  RETURN_UNEXPECTED_IF_NULL(trow);
  std::string image_id = image_ids_[row_id];
  std::shared_ptr<Tensor> image, coordinate;
  std::shared_ptr<Tensor> image;
  auto real_path = FileUtils::GetRealPath(image_folder_path_.data());
  if (!real_path.has_value()) {
    RETURN_STATUS_UNEXPECTED("Invalid file path, COCO dataset image folder: " + image_folder_path_ +
                             " does not exist.");
  }
  Path image_folder(real_path.value());
  Path kImageFile = image_folder / image_id;
  RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile.ToString(), data_schema_->Column(0), &image));
  if (task_type_ == TaskType::Captioning) {
    std::shared_ptr<Tensor> captions;
    auto itr = captions_map_.find(image_id);
    if (itr == captions_map_.end()) {
      RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id +
                               " is missing from image node in annotation file: " + annotation_path_);
    }
    auto captions_str = itr->second;
    RETURN_IF_NOT_OK(
      Tensor::CreateFromVector(captions_str, TensorShape({static_cast<dsize_t>(captions_str.size()), 1}), &captions));
    RETURN_IF_NOT_OK(LoadCaptioningTensorRow(row_id, image_id, image, captions, trow));
    return Status::OK();
  }
  std::shared_ptr<Tensor> coordinate;
  auto itr = coordinate_map_.find(image_id);
  if (itr == coordinate_map_.end()) {
    RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id +
                             " is missing from image node in annotation file: " + annotation_path_);
  }

  std::string kImageFile = image_folder_path_ + std::string("/") + image_id;
  RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));

  auto bboxRow = itr->second;
  std::vector<float> bbox_row;
  dsize_t bbox_row_num = static_cast<dsize_t>(bboxRow.size());
@@ -115,7 +136,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
  } else if (task_type_ == TaskType::Panoptic) {
    RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow));
  } else {
    RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic.");
    RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Panoptic or Captioning.");
  }

  return Status::OK();
@@ -123,6 +144,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {

 Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
                                      std::shared_ptr<Tensor> coordinate, TensorRow *trow) {
  RETURN_UNEXPECTED_IF_NULL(trow);
  std::shared_ptr<Tensor> category_id, iscrowd;
  std::vector<uint32_t> category_id_row;
  std::vector<uint32_t> iscrowd_row;
@@ -147,8 +169,10 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
    Tensor::CreateFromVector(iscrowd_row, TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), &iscrowd));

  (*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)});
  std::string image_full_path = image_folder_path_ + std::string("/") + image_id;
  std::vector<std::string> path_list = {image_full_path, annotation_path_, annotation_path_, annotation_path_};
  Path image_folder(image_folder_path_);
  Path image_full_path = image_folder / image_id;
  std::vector<std::string> path_list = {image_full_path.ToString(), annotation_path_, annotation_path_,
                                        annotation_path_};
  if (extra_metadata_) {
    std::string img_id;
    size_t pos = image_id.find(".");
@@ -159,7 +183,7 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
    std::shared_ptr<Tensor> filename;
    RETURN_IF_NOT_OK(Tensor::CreateScalar(img_id, &filename));
    trow->push_back(std::move(filename));
    path_list.push_back(image_full_path);
    path_list.push_back(image_full_path.ToString());
  }
  trow->setPath(path_list);
  return Status::OK();
@@ -167,8 +191,10 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima

 Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
                                   std::shared_ptr<Tensor> coordinate, TensorRow *trow) {
  RETURN_UNEXPECTED_IF_NULL(trow);
  std::shared_ptr<Tensor> item;
  std::vector<uint32_t> item_queue;
  Path image_folder(image_folder_path_);
  auto itr_item = simple_item_map_.find(image_id);
  if (itr_item == simple_item_map_.end()) {
    RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id +
@@ -180,8 +206,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
  RETURN_IF_NOT_OK(Tensor::CreateFromVector(item_queue, TensorShape(bbox_dim), &item));

  (*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(item)});
  std::string image_full_path = image_folder_path_ + std::string("/") + image_id;
  std::vector<std::string> path_list = {image_full_path, annotation_path_, annotation_path_};
  Path image_full_path = image_folder / image_id;
  std::vector<std::string> path_list = {image_full_path.ToString(), annotation_path_, annotation_path_};
  if (extra_metadata_) {
    std::string img_id;
    size_t pos = image_id.find(".");
@@ -192,7 +218,30 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
    std::shared_ptr<Tensor> filename;
    RETURN_IF_NOT_OK(Tensor::CreateScalar(img_id, &filename));
    trow->push_back(std::move(filename));
    path_list.push_back(image_full_path);
    path_list.push_back(image_full_path.ToString());
  }
  trow->setPath(path_list);
  return Status::OK();
 }

 Status CocoOp::LoadCaptioningTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
                                       std::shared_ptr<Tensor> captions, TensorRow *trow) {
  RETURN_UNEXPECTED_IF_NULL(trow);
  (*trow) = TensorRow(row_id, {std::move(image), std::move(captions)});
  Path image_folder(image_folder_path_);
  Path image_full_path = image_folder / image_id;
  std::vector<std::string> path_list = {image_full_path.ToString(), annotation_path_};
  if (extra_metadata_) {
    std::string img_id;
    size_t pos = image_id.find(".");
    if (pos == image_id.npos) {
      RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\".");
    }
    std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id));
    std::shared_ptr<Tensor> filename;
    RETURN_IF_NOT_OK(Tensor::CreateScalar(img_id, &filename));
    trow->push_back(std::move(filename));
    path_list.push_back(image_full_path.ToString());
  }
  trow->setPath(path_list);
  return Status::OK();
@@ -200,6 +249,7 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_

 Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
                                std::shared_ptr<Tensor> coordinate, TensorRow *trow) {
  RETURN_UNEXPECTED_IF_NULL(trow);
  std::shared_ptr<Tensor> category_id, iscrowd, area;
  std::vector<uint32_t> category_id_row;
  std::vector<uint32_t> iscrowd_row;
@@ -230,9 +280,10 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,

  (*trow) = TensorRow(
    row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)});
  std::string image_full_path = image_folder_path_ + std::string("/") + image_id;
  std::vector<std::string> path_list = {image_full_path, annotation_path_, annotation_path_, annotation_path_,
                                        annotation_path_};
  Path image_folder(image_folder_path_);
  Path image_full_path = image_folder / image_id;
  std::vector<std::string> path_list = {image_full_path.ToString(), annotation_path_, annotation_path_,
                                        annotation_path_, annotation_path_};
  if (extra_metadata_) {
    std::string img_id;
    size_t pos = image_id.find(".");
@@ -243,7 +294,7 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
    std::shared_ptr<Tensor> filename;
    RETURN_IF_NOT_OK(Tensor::CreateScalar(img_id, &filename));
    trow->push_back(std::move(filename));
    path_list.push_back(image_full_path);
    path_list.push_back(image_full_path.ToString());
  }
  trow->setPath(path_list);
  return Status::OK();
@@ -316,12 +367,27 @@ Status CocoOp::PrepareData() {
      case TaskType::Panoptic:
        RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id));
        break;
      case TaskType::Captioning:
        RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonId), &id));
        RETURN_IF_NOT_OK(CaptionColumnLoad(annotation, file_name, id));
        break;
      default:
        RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic.");
        RETURN_STATUS_UNEXPECTED(
          "Invalid parameter, task type should be Detection, Stuff, Keypoint, Panoptic or Captioning.");
    }
  }
  for (auto img : image_que) {
    if (coordinate_map_.find(img) != coordinate_map_.end()) image_ids_.push_back(img);
  if (task_type_ == TaskType::Captioning) {
    for (auto img : image_que) {
      if (captions_map_.find(img) != captions_map_.end()) {
        image_ids_.push_back(img);
      }
    }
  } else {
    for (auto img : image_que) {
      if (coordinate_map_.find(img) != coordinate_map_.end()) {
        image_ids_.push_back(img);
      }
    }
  }
  num_rows_ = image_ids_.size();
  if (num_rows_ == 0) {
@@ -447,6 +513,14 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s
  return Status::OK();
 }

 Status CocoOp::CaptionColumnLoad(const nlohmann::json &annotation_tree, const std::string &image_file,
                                 const int32_t &unique_id) {
  std::string caption = "";
  RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCaption), &caption));
  captions_map_[image_file].push_back(caption);
  return Status::OK();
 }

 Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
  if (categories_tree.size() == 0) {
    RETURN_STATUS_UNEXPECTED(
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2021 Huawei Technologies Co., Ltd
 * Copyright 2019-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -49,7 +49,7 @@ using CoordinateRow = std::vector<std::vector<float>>;

 class CocoOp : public MappableLeafOp {
 public:
  enum class TaskType { Detection = 0, Stuff = 1, Panoptic = 2, Keypoint = 3 };
  enum class TaskType { Detection = 0, Stuff = 1, Panoptic = 2, Keypoint = 3, Captioning = 4 };

  class Builder {
   public:
@@ -146,126 +146,142 @@ class CocoOp : public MappableLeafOp {
    std::unique_ptr<DataSchema> builder_schema_;
  };

  // Constructor
  // @param TaskType task_type - task type of Coco
  // @param std::string image_folder_path - image folder path of Coco
  // @param std::string annotation_path - annotation json path of Coco
  // @param int32_t num_workers - number of workers reading images in parallel
  // @param int32_t queue_size - connector queue size
  // @param int64_t num_samples - number of samples to read
  // @param bool decode - whether to decode images
  // @param std::unique_ptr<DataSchema> data_schema - the schema of the Coco dataset
  // @param std::shared_ptr<Sampler> sampler - sampler tells CocoOp what to read
  /// \brief Constructor.
  /// \param[in] task_type Task type of Coco.
  /// \param[in] image_folder_path Image folder path of Coco.
  /// \param[in] annotation_path Annotation json path of Coco.
  /// \param[in] num_workers Number of workers reading images in parallel.
  /// \param[in] queue_size Connector queue size.
  /// \param[in] num_samples Number of samples to read.
  /// \param[in] decode Whether to decode images.
  /// \param[in] data_schema The schema of the Coco dataset.
  /// \param[in] sampler Sampler tells CocoOp what to read.
  CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
         int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
         std::shared_ptr<SamplerRT> sampler, bool extra_metadata);

  // Destructor
  /// \brief Destructor.
  ~CocoOp() = default;

  // A print method typically used for debugging
  // @param out
  // @param show_all
  /// \brief A print method typically used for debugging.
  /// \param[out] out The output stream to write output to.
  /// \param[in] show_all A bool to control if you want to show all info or just a summary.
  void Print(std::ostream &out, bool show_all) const override;

  // @param int64_t *count - output rows number of CocoDataset
  /// \param[out] count Output rows number of CocoDataset.
  Status CountTotalRows(int64_t *count);

  // Op name getter
  // @return Name of the current Op
  /// \brief Op name getter.
  /// \return Name of the current Op.
  std::string Name() const override { return "CocoOp"; }

  /// \brief Gets the class indexing
  /// \return Status The status code returned
  /// \brief Gets the class indexing.
  /// \return Status The status code returned.
  Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override;

 private:
  // Load a tensor row according to image id
  // @param row_id_type row_id - id for this tensor row
  // @param std::string image_id - image id
  // @param TensorRow row - image & target read into this tensor row
  // @return Status The status code returned
  /// \brief Load a tensor row according to image id.
  /// \param[in] row_id Id for this tensor row.
  /// \param[out] row Image & target read into this tensor row.
  /// \return Status The status code returned.
  Status LoadTensorRow(row_id_type row_id, TensorRow *row) override;

  // Load a tensor row with vector which a vector to a tensor, for "Detection" task
  // @param row_id_type row_id - id for this tensor row
  // @param const std::string &image_id - image is
  // @param std::shared_ptr<Tensor> image - image tensor
  // @param std::shared_ptr<Tensor> coordinate - coordinate tensor
  // @param TensorRow row - image & target read into this tensor row
  // @return Status The status code returned
  /// \brief Load a tensor row with vector which a vector to a tensor, for "Detection" task.
  /// \param[in] row_id Id for this tensor row.
  /// \param[in] image_id Image id.
  /// \param[in] image Image tensor.
  /// \param[in] coordinate Coordinate tensor.
  /// \param[out] row Image & target read into this tensor row.
  /// \return Status The status code returned.
  Status LoadDetectionTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
                                std::shared_ptr<Tensor> coordinate, TensorRow *trow);

  // Load a tensor row with vector which a vector to a tensor, for "Stuff/Keypoint" task
  // @param row_id_type row_id - id for this tensor row
  // @param const std::string &image_id - image is
  // @param std::shared_ptr<Tensor> image - image tensor
  // @param std::shared_ptr<Tensor> coordinate - coordinate tensor
  // @param TensorRow row - image & target read into this tensor row
  // @return Status The status code returned
  /// \brief Load a tensor row with vector which a vector to a tensor, for "Stuff/Keypoint" task.
  /// \param[in] row_id Id for this tensor row.
  /// \param[in] image_id Image id.
  /// \param[in] image Image tensor.
  /// \param[in] coordinate Coordinate tensor.
  /// \param[out] row Image & target read into this tensor row.
  /// \return Status The status code returned.
  Status LoadSimpleTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
                             std::shared_ptr<Tensor> coordinate, TensorRow *trow);

  // Load a tensor row with vector which a vector to multi-tensor, for "Panoptic" task
  // @param row_id_type row_id - id for this tensor row
  // @param const std::string &image_id - image is
  // @param std::shared_ptr<Tensor> image - image tensor
  // @param std::shared_ptr<Tensor> coordinate - coordinate tensor
  // @param TensorRow row - image & target read into this tensor row
  // @return Status The status code returned
  /// \brief Load a tensor row with vector which a vector to multi-tensor, for "Panoptic" task.
  /// \param[in] row_id Id for this tensor row.
  /// \param[in] image_id Image id.
  /// \param[in] image Image tensor.
  /// \param[in] coordinate Coordinate tensor.
  /// \param[out] row Image & target read into this tensor row.
  /// \return Status The status code returned.
  Status LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
                          std::shared_ptr<Tensor> coordinate, TensorRow *trow);

  // @param const std::string &path - path to the image file
  // @param const ColDescriptor &col - contains tensor implementation and datatype
  // @param std::shared_ptr<Tensor> tensor - return
  // @return Status The status code returned
  /// \brief Load a tensor row with vector which a vector to multi-tensor, for "Captioning" task.
  /// \param[in] row_id Id for this tensor row.
  /// \param[in] image_id Image id.
  /// \param[in] image Image tensor.
  /// \param[in] captions Captions tensor.
  /// \param[out] trow Image & target read into this tensor row.
  /// \return Status The status code returned.
  Status LoadCaptioningTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
                                 std::shared_ptr<Tensor> captions, TensorRow *trow);

  /// \param[in] path Path to the image file.
  /// \param[in] col Contains tensor implementation and datatype.
  /// \param[out] tensor Returned tensor.
  /// \return Status The status code returned.
  Status ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor);

  // Read annotation from Annotation folder
  // @return Status The status code returned
  /// \brief Read annotation from Annotation folder.
  /// \return Status The status code returned.
  Status PrepareData() override;

  // @param nlohmann::json image_tree - image tree of json
  // @param std::vector<std::string> *image_vec - image id list of json
  // @return Status The status code returned
  /// \param[in] image_tree Image tree of json.
  /// \param[out] image_vec Image id list of json.
  /// \return Status The status code returned.
  Status ImageColumnLoad(const nlohmann::json &image_tree, std::vector<std::string> *image_vec);

  // @param nlohmann::json categories_tree - categories tree of json
  // @return Status The status code returned
  /// \param[in] categories_tree Categories tree of json.
  /// \return Status The status code returned.
  Status CategoriesColumnLoad(const nlohmann::json &categories_tree);

  // @param nlohmann::json categories_tree - categories tree of json
  // @param const std::string &image_file - current image name in annotation
  // @param const int32_t &id - current unique id of annotation
  // @return Status The status code returned
  /// \param[in] categories_tree Categories tree of json.
  /// \param[in] image_file Current image name in annotation.
  /// \param[in] id Current unique id of annotation.
  /// \return Status The status code returned.
  Status DetectionColumnLoad(const nlohmann::json &annotation_tree, const std::string &image_file, const int32_t &id);

  // @param nlohmann::json categories_tree - categories tree of json
  // @param const std::string &image_file - current image name in annotation
  // @param const int32_t &id - current unique id of annotation
  // @return Status The status code returned
  /// \param[in] categories_tree Categories tree of json.
  /// \param[in] image_file Current image name in annotation.
  /// \param[in] id Current unique id of annotation.
  /// \return Status The status code returned.
  Status StuffColumnLoad(const nlohmann::json &annotation_tree, const std::string &image_file, const int32_t &id);

  // @param nlohmann::json categories_tree - categories tree of json
  // @param const std::string &image_file - current image name in annotation
  // @param const int32_t &id - current unique id of annotation
  // @return Status The status code returned
  /// \param[in] categories_tree Categories tree of json.
  /// \param[in] image_file Current image name in annotation.
  /// \param[in] id Current unique id of annotation.
  /// \return Status The status code returned.
  Status KeypointColumnLoad(const nlohmann::json &annotation_tree, const std::string &image_file, const int32_t &id);

  // @param nlohmann::json categories_tree - categories tree of json
  // @param const std::string &image_file - current image name in annotation
  // @param const int32_t &image_id - current unique id of annotation
  // @return Status The status code returned
  /// \param[in] categories_tree Categories tree of json.
  /// \param[in] image_file Current image name in annotation.
  /// \param[in] image_id Current unique id of annotation.
  /// \return Status The status code returned.
  Status PanopticColumnLoad(const nlohmann::json &annotation_tree, const std::string &image_file,
                            const int32_t &image_id);

  /// \brief Function for finding a caption in annotation_tree.
  /// \param[in] annotation_tree Annotation tree of json.
  /// \param[in] image_file Current image name in annotation.
  /// \param[in] id Current unique id of annotation.
  /// \return Status The status code returned.
  Status CaptionColumnLoad(const nlohmann::json &annotation_tree, const std::string &image_file, const int32_t &id);

  template <typename T>
  Status SearchNodeInJson(const nlohmann::json &input_tree, std::string node_name, T *output_node);

  // Private function for computing the assignment of the column name map.
  // @return - Status
  /// \brief Private function for computing the assignment of the column name map.
  /// \return Status The status code returned.
  Status ComputeColMap() override;

  bool decode_;
@@ -280,6 +296,7 @@ class CocoOp : public MappableLeafOp {
  std::vector<std::pair<std::string, std::vector<int32_t>>> label_index_;
  std::map<std::string, CoordinateRow> coordinate_map_;
  std::map<std::string, std::vector<uint32_t>> simple_item_map_;
  std::map<std::string, std::vector<std::string>> captions_map_;
  std::set<uint32_t> category_set_;
 };
 }  // namespace dataset
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -56,7 +56,8 @@ Status CocoNode::ValidateParams() {
  RETURN_IF_NOT_OK(ValidateDatasetDirParam("CocoDataset", dataset_dir_));
  RETURN_IF_NOT_OK(ValidateDatasetSampler("CocoDataset", sampler_));
  RETURN_IF_NOT_OK(ValidateDatasetFilesParam("CocoDataset", {annotation_file_}, "annotation file"));
  RETURN_IF_NOT_OK(ValidateStringValue("CocoDataset", task_, {"Detection", "Stuff", "Panoptic", "Keypoint"}));
  RETURN_IF_NOT_OK(
    ValidateStringValue("CocoDataset", task_, {"Detection", "Stuff", "Panoptic", "Keypoint", "Captioning"}));

  return Status::OK();
 }
@@ -72,6 +73,8 @@ Status CocoNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
    task_type = CocoOp::TaskType::Keypoint;
  } else if (task_ == "Panoptic") {
    task_type = CocoOp::TaskType::Panoptic;
  } else if (task_ == "Captioning") {
    task_type = CocoOp::TaskType::Captioning;
  } else {
    std::string err_msg = "Task type:'" + task_ + "' is not supported.";
    MS_LOG(ERROR) << err_msg;
@@ -112,6 +115,10 @@ Status CocoNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
      RETURN_IF_NOT_OK(
        schema->AddColumn(ColDescriptor(std::string("area"), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
      break;
    case CocoOp::TaskType::Captioning:
      RETURN_IF_NOT_OK(schema->AddColumn(
        ColDescriptor(std::string("captions"), DataType(DataType::DE_STRING), TensorImpl::kFlexible, 1)));
      break;
    default:
      std::string err_msg = "CocoNode::Build : Invalid task type";
      MS_LOG(ERROR) << err_msg;
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -28,72 +28,73 @@ namespace dataset {

 class CocoNode : public MappableSourceNode {
 public:
  /// \brief Constructor
  /// \brief Constructor.
  CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
           const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
           const bool &extra_metadata);

  /// \brief Destructor
  /// \brief Destructor.
  ~CocoNode() = default;

  /// \brief Node name getter
  /// \return Name of the current node
  /// \brief Node name getter.
  /// \return Name of the current node.
  std::string Name() const override { return kCocoNode; }

  /// \brief Print the description
  /// \param out - The output stream to write output to
  /// \brief Print the description.
  /// \param[out] out The output stream to write output to.
  void Print(std::ostream &out) const override;

  /// \brief Copy the node to a new object
  /// \return A shared pointer to the new copy
  /// \brief Copy the node to a new object.
  /// \return A shared pointer to the new copy.
  std::shared_ptr<DatasetNode> Copy() override;

  /// \brief a base class override function to create the required runtime dataset op objects for this class
  /// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create
  /// \return Status Status::OK() if build successfully
  /// \brief A base class override function to create the required runtime dataset op objects for this class.
  /// \param[out] node_ops A vector containing shared pointer to the Dataset Ops that this object will create.
  /// \return Status Status::OK() if build successfully.
  Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override;

  /// \brief Parameters validation
  /// \return Status Status::OK() if all the parameters are valid
  /// \brief Parameters validation.
  /// \return Status Status::OK() if all the parameters are valid.
  Status ValidateParams() override;

  /// \brief Get the shard id of node
  /// \return Status Status::OK() if get shard id successfully
  /// \brief Get the shard id of node.
  /// \param[in] shard_id shard id.
  /// \return Status Status::OK() if get shard id successfully.
  Status GetShardId(int32_t *shard_id) override;

  /// \brief Base-class override for GetDatasetSize
  /// \param[in] size_getter Shared pointer to DatasetSizeGetter
  /// \brief Base-class override for GetDatasetSize.
  /// \param[in] size_getter Shared pointer to DatasetSizeGetter.
  /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
  ///     dataset size at the expense of accuracy.
  /// \param[out] dataset_size the size of the dataset
  /// \return Status of the function
  /// \param[out] dataset_size the size of the dataset.
  /// \return Status of the function.
  Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
                        int64_t *dataset_size) override;

  /// \brief Getter functions
  /// \brief Getter functions.
  const std::string &DatasetDir() const { return dataset_dir_; }
  const std::string &AnnotationFile() const { return annotation_file_; }
  const std::string &Task() const { return task_; }
  bool Decode() const { return decode_; }

  /// \brief Get the arguments of node
  /// \param[out] out_json JSON string of all attributes
  /// \return Status of the function
  /// \brief Get the arguments of node.
  /// \param[out] out_json JSON string of all attributes.
  /// \return Status of the function.
  Status to_json(nlohmann::json *out_json) override;

 #ifndef ENABLE_ANDROID
  /// \brief Function to read dataset in json
  /// \param[in] json_obj The JSON object to be deserialized
  /// \param[out] ds Deserialized dataset
  /// \return Status The status code returned
  /// \brief Function to read dataset in json.
  /// \param[in] json_obj The JSON object to be deserialized.
  /// \param[out] ds Deserialized dataset.
  /// \return Status The status code returned.
  static Status from_json(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
 #endif

  /// \brief Sampler getter
  /// \return SamplerObj of the current node
  /// \brief Sampler getter.
  /// \return SamplerObj of the current node.
  std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }

  /// \brief Sampler setter
  /// \brief Sampler setter.
  void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; }

 private:
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
@@ -1940,9 +1940,11 @@ class MS_API CocoDataset : public Dataset {
 ///                                 ['num_keypoints', dtype=uint32]].
 ///     - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
 ///                                 ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
 ///     - task='Captioning', column: [['image', dtype=uint8], ['captions', dtype=string]].
 /// \param[in] dataset_dir Path to the root directory that contains the dataset.
 /// \param[in] annotation_file Path to the annotation json.
 /// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'.
 /// \param[in] task Set the task type of reading coco data. Supported task types are "Detection", "Stuff", "Panoptic",
 ///     "Keypoint" and "Captioning".
 /// \param[in] decode Decode the images after reading.
 /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
 ///     given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
@@ -1981,9 +1983,11 @@ Coco(const std::string &dataset_dir, const std::string &annotation_file, const s
 ///                                 ['num_keypoints', dtype=uint32]].
 ///     - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
 ///                                 ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
 ///     - task='Captioning', column: [['image', dtype=uint8], ['captions', dtype=string]].
 /// \param[in] dataset_dir Path to the root directory that contains the dataset.
 /// \param[in] annotation_file Path to the annotation json.
 /// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'.
 /// \param[in] task Set the task type of reading coco data. Supported task types are "Detection", "Stuff", "Panoptic",
 ///     "Keypoint" and "Captioning".
 /// \param[in] decode Decode the images after reading.
 /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
 /// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
@@ -2006,9 +2010,11 @@ inline std::shared_ptr<CocoDataset> MS_API Coco(const std::string &dataset_dir,
 ///                                 ['num_keypoints', dtype=uint32]].
 ///     - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
 ///                                 ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
 ///     - task='Captioning', column: [['image', dtype=uint8], ['captions', dtype=string]].
 /// \param[in] dataset_dir Path to the root directory that contains the dataset.
 /// \param[in] annotation_file Path to the annotation json.
 /// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'.
 /// \param[in] task Set the task type of reading coco data. Supported task types are "Detection", "Stuff", "Panoptic",
 ///     "Keypoint" and "Captioning".
 /// \param[in] decode Decode the images after reading.
 /// \param[in] sampler Sampler object used to choose samples from the dataset.
 /// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
--- a/mindspore/python/mindspore/dataset/engine/datasets_vision.py
+++ b/mindspore/python/mindspore/dataset/engine/datasets_vision.py
@@ -992,8 +992,8 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
    """
    A source dataset that reads and parses COCO dataset.

    CocoDataset supports four kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation and
    Panoptic Segmentation of 2017 Train/Val/Test dataset.
    CocoDataset supports five kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation,
    Panoptic Segmentation and Captioning of 2017 Train/Val/Test dataset.

    The generated dataset with different task setting has different output columns:

@@ -1005,12 +1005,13 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
        :py:obj:`[keypoints, dtype=float32]`, :py:obj:`[num_keypoints, dtype=uint32]`.
    - task = :py:obj:`Panoptic`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
        :py:obj:`[category_id, dtype=uint32]`, :py:obj:`[iscrowd, dtype=uint32]`, :py:obj:`[area, dtype=uint32]`.
    - task = :py:obj:`Captioning`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[captions, dtype=string]`.

    Args:
        dataset_dir (str): Path to the root directory that contains the dataset.
        annotation_file (str): Path to the annotation JSON file.
        task (str, optional): Set the task type for reading COCO data. Supported task types:
            `Detection`, `Stuff`, `Panoptic` and `Keypoint` (default= `Detection`).
            `Detection`, `Stuff`, `Panoptic`, `Keypoint` and `Captioning` (default=`Detection`).
        num_samples (int, optional): The number of images to be included in the dataset
            (default=None, all images).
        num_parallel_workers (int, optional): Number of workers to read the data
@@ -1038,7 +1039,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
        RuntimeError: If num_shards is specified but shard_id is None.
        RuntimeError: If shard_id is specified but num_shards is None.
        RuntimeError: If parse JSON file failed.
        ValueError: If task is not in [`Detection`, `Stuff`, `Panoptic`, `Keypoint`].
        ValueError: If task is not in [`Detection`, `Stuff`, `Panoptic`, `Keypoint`, `Captioning`].
        ValueError: If annotation_file is not exist.
        ValueError: If dataset_dir is not exist.
        ValueError: If shard_id is invalid (< 0 or >= num_shards).
@@ -1100,6 +1101,11 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
        ...                          annotation_file=coco_annotation_file,
        ...                          task='Keypoint')
        >>>
        >>> # 5) Read COCO data for Captioning task
        >>> dataset = ds.CocoDataset(dataset_dir=coco_dataset_dir,
        ...                          annotation_file=coco_annotation_file,
        ...                          task='Captioning')
        >>>
        >>> # In COCO dataset, each dictionary has keys "image" and "annotation"

    About COCO dataset:
--- a/mindspore/python/mindspore/dataset/engine/validators.py
+++ b/mindspore/python/mindspore/dataset/engine/validators.py
@@ -670,7 +670,7 @@ def check_cocodataset(method):
        task = param_dict.get('task')
        type_check(task, (str,), "task")

        if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint'}:
        if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint', 'Captioning'}:
            raise ValueError("Invalid task type: " + task + ".")

        validate_dataset_param_value(nreq_param_int, param_dict, int)
--- a/tests/ut/cpp/dataset/c_api_dataset_coco_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_coco_test.cc
@@ -1,5 +1,5 @@
 /**
 * Copyright 2020-2021 Huawei Technologies Co., Ltd
 * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -25,21 +25,24 @@ class MindDataTestPipeline : public UT::DatasetOpTesting {
 protected:
 };

 /// Feature: CocoDataset
 /// Description: default test of CocoDataset
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoDefault) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDefault.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";

  std::shared_ptr<Dataset> ds = Coco(folder_path, annotation_file);
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  // Iterate the dataset and get each row.
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

@@ -57,13 +60,16 @@ TEST_F(MindDataTestPipeline, TestCocoDefault) {

  EXPECT_EQ(i, 6);

  // Manually terminate the pipeline
  // Manually terminate the pipeline.
  iter->Stop();
 }

 /// Feature: CocoDataset
 /// Description: default pipeline test of CocoDataset
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoDefaultWithPipeline) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDefaultWithPipeline.";
  // Create two Coco Dataset
  // Create two Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";

@@ -72,7 +78,7 @@ TEST_F(MindDataTestPipeline, TestCocoDefaultWithPipeline) {
  EXPECT_NE(ds1, nullptr);
  EXPECT_NE(ds2, nullptr);

  // Create two Repeat operation on ds
  // Create two Repeat operation on ds.
  int32_t repeat_num = 2;
  ds1 = ds1->Repeat(repeat_num);
  EXPECT_NE(ds1, nullptr);
@@ -80,23 +86,23 @@ TEST_F(MindDataTestPipeline, TestCocoDefaultWithPipeline) {
  ds2 = ds2->Repeat(repeat_num);
  EXPECT_NE(ds2, nullptr);

  // Create two Project operation on ds
  // Create two Project operation on ds.
  std::vector<std::string> column_project = {"image", "bbox", "category_id"};
  ds1 = ds1->Project(column_project);
  EXPECT_NE(ds1, nullptr);
  ds2 = ds2->Project(column_project);
  EXPECT_NE(ds2, nullptr);

  // Create a Concat operation on the ds
  // Create a Concat operation on the ds.
  ds1 = ds1->Concat({ds2});
  EXPECT_NE(ds1, nullptr);

  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds1->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  // Iterate the dataset and get each row.
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

@@ -114,13 +120,16 @@ TEST_F(MindDataTestPipeline, TestCocoDefaultWithPipeline) {

  EXPECT_EQ(i, 30);

  // Manually terminate the pipeline
  // Manually terminate the pipeline.
  iter->Stop();
 }

 /// Feature: CocoDataset
 /// Description: test getters of CocoDataset
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoGetters) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoGetters.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";

@@ -132,9 +141,12 @@ TEST_F(MindDataTestPipeline, TestCocoGetters) {
  EXPECT_EQ(ds->GetColumnNames(), column_names);
 }

 /// Feature: CocoDataset
 /// Description: test detection task of CocoDataset
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoDetection) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoDetection.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";

@@ -142,12 +154,12 @@ TEST_F(MindDataTestPipeline, TestCocoDetection) {
    Coco(folder_path, annotation_file, "Detection", false, std::make_shared<SequentialSampler>(0, 6));
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  // Iterate the dataset and get each row.
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

@@ -188,13 +200,16 @@ TEST_F(MindDataTestPipeline, TestCocoDetection) {

  EXPECT_EQ(i, 6);

  // Manually terminate the pipeline
  // Manually terminate the pipeline.
  iter->Stop();
 }

 /// Feature: CocoDataset
 /// Description: test fail of CocoDataset
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestCocoFail) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoFail.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";
  std::string invalid_folder_path = "./NotExist";
@@ -202,29 +217,32 @@ TEST_F(MindDataTestPipeline, TestCocoFail) {

  std::shared_ptr<Dataset> ds0 = Coco(invalid_folder_path, annotation_file);
  EXPECT_NE(ds0, nullptr);
  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  std::shared_ptr<Iterator> iter0 = ds0->CreateIterator();
  // Expect failure: invalid COCO input
  // Expect failure: invalid COCO input.
  EXPECT_EQ(iter0, nullptr);

  std::shared_ptr<Dataset> ds1 = Coco(folder_path, invalid_annotation_file);
  EXPECT_NE(ds1, nullptr);
  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
  // Expect failure: invalid COCO input
  // Expect failure: invalid COCO input.
  EXPECT_EQ(iter1, nullptr);

  std::shared_ptr<Dataset> ds2 = Coco(folder_path, annotation_file, "valid_mode");
  EXPECT_NE(ds2, nullptr);
  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
  // Expect failure: invalid COCO input
  // Expect failure: invalid COCO input.
  EXPECT_EQ(iter2, nullptr);
 }

 /// Feature: CocoDataset
 /// Description: test keypoint task of CocoDataset
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoKeypoint) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoKeypoint.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/key_point.json";

@@ -232,12 +250,12 @@ TEST_F(MindDataTestPipeline, TestCocoKeypoint) {
    Coco(folder_path, annotation_file, "Keypoint", false, std::make_shared<SequentialSampler>(0, 2));
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  // Iterate the dataset and get each row.
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

@@ -280,13 +298,16 @@ TEST_F(MindDataTestPipeline, TestCocoKeypoint) {

  EXPECT_EQ(i, 2);

  // Manually terminate the pipeline
  // Manually terminate the pipeline.
  iter->Stop();
 }

 /// Feature: CocoDataset
 /// Description: test panoptic task of CocoDataset
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoPanoptic) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoPanoptic.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/panoptic.json";

@@ -294,12 +315,12 @@ TEST_F(MindDataTestPipeline, TestCocoPanoptic) {
    Coco(folder_path, annotation_file, "Panoptic", false, std::make_shared<SequentialSampler>(0, 2));
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  // Iterate the dataset and get each row.
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

@@ -329,7 +350,8 @@ TEST_F(MindDataTestPipeline, TestCocoPanoptic) {
    EXPECT_MSTENSOR_EQ(bbox, expect_bbox);

    std::shared_ptr<Tensor> de_expect_categoryid;
    ASSERT_OK(Tensor::CreateFromVector(expect_categoryid_vector[i], TensorShape({bbox_size, 1}), &de_expect_categoryid));
    ASSERT_OK(
      Tensor::CreateFromVector(expect_categoryid_vector[i], TensorShape({bbox_size, 1}), &de_expect_categoryid));
    mindspore::MSTensor expect_categoryid =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_categoryid));
    EXPECT_MSTENSOR_EQ(category_id, expect_categoryid);
@@ -352,13 +374,16 @@ TEST_F(MindDataTestPipeline, TestCocoPanoptic) {

  EXPECT_EQ(i, 2);

  // Manually terminate the pipeline
  // Manually terminate the pipeline.
  iter->Stop();
 }

 /// Feature: CocoDataset
 /// Description: test get class index of panoptic task
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoPanopticGetClassIndex) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoPanopticGetClassIndex.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/panoptic.json";

@@ -379,9 +404,12 @@ TEST_F(MindDataTestPipeline, TestCocoPanopticGetClassIndex) {
  EXPECT_EQ(class_index1[2].second[1], 1);
 }

 /// Feature: CocoDataset
 /// Description: test stuff task of CocoDataset
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoStuff) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoStuff.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";

@@ -389,12 +417,12 @@ TEST_F(MindDataTestPipeline, TestCocoStuff) {
    Coco(folder_path, annotation_file, "Stuff", false, std::make_shared<SequentialSampler>(0, 6));
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  // Iterate the dataset and get each row
  // Iterate the dataset and get each row.
  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));

@@ -419,7 +447,8 @@ TEST_F(MindDataTestPipeline, TestCocoStuff) {
    EXPECT_MSTENSOR_EQ(image, expect_image);

    std::shared_ptr<Tensor> de_expect_segmentation;
    ASSERT_OK(Tensor::CreateFromVector(expect_segmentation_vector[i], TensorShape(expect_size[i]), &de_expect_segmentation));
    ASSERT_OK(
      Tensor::CreateFromVector(expect_segmentation_vector[i], TensorShape(expect_size[i]), &de_expect_segmentation));
    mindspore::MSTensor expect_segmentation =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_segmentation));
    EXPECT_MSTENSOR_EQ(segmentation, expect_segmentation);
@@ -430,21 +459,74 @@ TEST_F(MindDataTestPipeline, TestCocoStuff) {

  EXPECT_EQ(i, 6);

  // Manually terminate the pipeline
  // Manually terminate the pipeline.
  iter->Stop();
 }

 /// Feature: CocoDataset
 /// Description: test captioning task of CocoDataset
 /// Expectation: the data is processed successfully
 TEST_F(MindDataTestPipeline, TestCocoCaptioning) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoCaptioning.";
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/captions.json";
  std::shared_ptr<Dataset> ds =
    Coco(folder_path, annotation_file, "Captioning", false, std::make_shared<SequentialSampler>(0, 2));
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset.
  // This will trigger the creation of the Execution Tree and launch it.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_NE(iter, nullptr);

  std::unordered_map<std::string, mindspore::MSTensor> row;
  ASSERT_OK(iter->GetNextRow(&row));
  std::string expect_file[] = {"000000391895", "000000318219"};
  std::vector<std::vector<std::string>> expect_captions_vector = {
    {"This is a banana", "This banana is yellow", "This banana is on a white table",
     "The tail of this banana is facing up", "This banana has spots"},
    {"This is an orange", "This orange is orange", "This orange is on a dark cloth",
     "The head of this orange is facing up", "This orange has spots"}};
  std::vector<std::vector<dsize_t>> expect_size = {{5, 1}, {5, 1}};
  uint64_t i = 0;
  while (row.size() != 0) {
    auto image = row["image"];
    auto captions = row["captions"];

    mindspore::MSTensor expect_image = ReadFileToTensor(folder_path + "/" + expect_file[i] + ".jpg");
    EXPECT_MSTENSOR_EQ(image, expect_image);

    std::shared_ptr<Tensor> de_expect_captions;
    ASSERT_OK(Tensor::CreateFromVector(expect_captions_vector[i], TensorShape(expect_size[i]), &de_expect_captions));
    mindspore::MSTensor expect_captions =
      mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_captions));
    EXPECT_MSTENSOR_EQ(captions, expect_captions);

    ASSERT_OK(iter->GetNextRow(&row));
    i++;
  }

  EXPECT_EQ(i, 2);

  // Manually terminate the pipeline.
  iter->Stop();
 }

 /// Feature: CocoDataset
 /// Description: test CocoDataset with the null sampler
 /// Expectation: throw correct error and message
 TEST_F(MindDataTestPipeline, TestCocoWithNullSamplerFail) {
  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoWithNullSamplerFail.";
  // Create a Coco Dataset
  // Create a Coco Dataset.
  std::string folder_path = datasets_root_path_ + "/testCOCO/train";
  std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";

  std::shared_ptr<Dataset> ds = Coco(folder_path, annotation_file, "Detection", false, nullptr);
  EXPECT_NE(ds, nullptr);

  // Create an iterator over the result of the above dataset
  // Create an iterator over the result of the above dataset.
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  // Expect failure: invalid COCO input, sampler cannot be nullptr
  // Expect failure: invalid COCO input, sampler cannot be nullptr.
  EXPECT_EQ(iter, nullptr);
 }
--- a/tests/ut/data/dataset/testCOCO/annotations/captions.json
+++ b/tests/ut/data/dataset/testCOCO/annotations/captions.json
@@ -0,0 +1 @@
 { "info": { "description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2017, "contributor": "COCO Consortium", "data_created": "2017/09/01" }, "images": [{ "license": 3, "file_name": "000000391895.jpg", "id": 391895 }, { "license": 3, "file_name": "000000318219.jpg", "id": 318219 }], "annotations": [{ "id": 1, "image_id": 391895, "caption": "This is a banana" }, { "id": 2, "image_id": 391895, "caption": "This banana is yellow" }, { "id": 3, "image_id": 391895, "caption": "This banana is on a white table" }, { "id": 4, "image_id": 391895, "caption": "The tail of this banana is facing up" }, { "id": 5, "image_id": 391895, "caption": "This banana has spots" }, { "id": 6, "image_id": 318219, "caption": "This is an orange" }, { "id": 7, "image_id": 318219, "caption": "This orange is orange" }, { "id": 8, "image_id": 318219, "caption": "This orange is on a dark cloth" }, { "id": 9, "image_id": 318219, "caption": "The head of this orange is facing up" }, { "id": 10, "image_id": 318219, "caption": "This orange has spots" }] }
--- a/tests/ut/python/dataset/test_datasets_coco.py
+++ b/tests/ut/python/dataset/test_datasets_coco.py
@@ -1,4 +1,4 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 # Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@ DATA_DIR_2 = "../data/dataset/testCOCO/train"
 ANNOTATION_FILE = "../data/dataset/testCOCO/annotations/train.json"
 KEYPOINT_FILE = "../data/dataset/testCOCO/annotations/key_point.json"
 PANOPTIC_FILE = "../data/dataset/testCOCO/annotations/panoptic.json"
 CAPTIONS_FILE = "../data/dataset/testCOCO/annotations/captions.json"
 INVALID_FILE = "../data/dataset/testCOCO/annotations/invalid.json"
 LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json"
 INVALID_CATEGORY_ID_FILE = "../data/dataset/testCOCO/annotations/invalid_category_id.json"
@@ -176,6 +177,38 @@ def test_coco_panoptic():
    np.testing.assert_array_equal(np.array([[43102], [6079]]), area[1])


 def test_coco_captioning():
    """
    Feature: CocoDataset
    Description: test the captioning task of CocoDataset
    Expectation: the data is processed successfully
    """
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=CAPTIONS_FILE, task="Captioning", decode=True, shuffle=False,
                           extra_metadata=True)
    data1 = data1.rename("_meta-filename", "filename")
    num_iter = 0
    file_name = []
    image_shape = []
    captions_list = []
    for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
        file_name.append(text.to_str(data["filename"]))
        image_shape.append(data["image"].shape)
        captions_list.append(data["captions"])
        num_iter += 1
    assert num_iter == 2
    assert file_name == ["000000391895", "000000318219"]
    assert image_shape[0] == (2268, 4032, 3)
    np.testing.assert_array_equal(np.array([[b"This is a banana"], [b"This banana is yellow"],
                                            [b"This banana is on a white table"],
                                            [b"The tail of this banana is facing up"],
                                            [b"This banana has spots"]]), captions_list[0])
    assert image_shape[1] == (561, 595, 3)
    np.testing.assert_array_equal(np.array([[b"This is an orange"], [b"This orange is orange"],
                                            [b"This orange is on a dark cloth"],
                                            [b"The head of this orange is facing up"],
                                            [b"This orange has spots"]]), captions_list[1])


 def test_coco_meta_column():
    data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection",
                           decode=True, shuffle=False, extra_metadata=True)
@@ -487,8 +520,27 @@ def test_coco_case_exception():
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)

    try:
        data1 = ds.CocoDataset(DATA_DIR, annotation_file=CAPTIONS_FILE, task="Captioning")
        data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
        for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)

    try:
        data1 = ds.CocoDataset(DATA_DIR, annotation_file=CAPTIONS_FILE, task="Captioning")
        data1 = data1.map(operations=exception_func, input_columns=["captions"], num_parallel_workers=1)
        for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
            pass
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)


 if __name__ == '__main__':
    test_coco_captioning()
    test_coco_detection()
    test_coco_stuff()
    test_coco_keypoint()