Merge pull request !5409 from luoyang/son_r0.7tags/v0.7.0-beta
| @@ -191,8 +191,8 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir, | |||
| } | |||
| // Function to create a ManifestDataset. | |||
| std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage, | |||
| std::shared_ptr<SamplerObj> sampler, | |||
| std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage, | |||
| const std::shared_ptr<SamplerObj> &sampler, | |||
| const std::map<std::string, int32_t> &class_indexing, bool decode) { | |||
| auto ds = std::make_shared<ManifestDataset>(dataset_file, usage, sampler, class_indexing, decode); | |||
| @@ -211,7 +211,7 @@ std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::s | |||
| // Function to overload "+" operator to concat two datasets | |||
| std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1, | |||
| const std::shared_ptr<Dataset> &datasets2) { | |||
| std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets1, datasets2})); | |||
| std::shared_ptr<ConcatDataset> ds = std::make_shared<ConcatDataset>(std::vector({datasets2, datasets1})); | |||
| // Call derived class validation method. | |||
| return ds->ValidateParams() ? ds : nullptr; | |||
| @@ -580,13 +580,6 @@ bool SchemaObj::from_json(nlohmann::json json_obj) { | |||
| // OTHER FUNCTIONS | |||
| // Helper function to create default RandomSampler. | |||
| std::shared_ptr<SamplerObj> CreateDefaultSampler() { | |||
| const int32_t num_samples = 0; // 0 means to sample all ids. | |||
| bool replacement = false; | |||
| return std::make_shared<RandomSamplerObj>(replacement, num_samples); | |||
| } | |||
| // Helper function to compute a default shuffle size | |||
| Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, | |||
| int64_t *shuffle_size) { | |||
| @@ -682,6 +675,36 @@ bool ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_sha | |||
| return true; | |||
| } | |||
| // Helper function to validate dataset sampler parameter | |||
| bool ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler) { | |||
| if (sampler == nullptr) { | |||
| MS_LOG(ERROR) << dataset_name << ": Sampler is not constructed correctly, sampler: nullptr"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| // Helper function to validate dataset input/output column parameter | |||
| bool ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param, | |||
| const std::vector<std::string> &columns) { | |||
| if (columns.empty()) { | |||
| MS_LOG(ERROR) << dataset_name << ":" << column_param << " should not be empty"; | |||
| return false; | |||
| } | |||
| for (uint32_t i = 0; i < columns.size(); ++i) { | |||
| if (columns[i].empty()) { | |||
| MS_LOG(ERROR) << dataset_name << ":" << column_param << "[" << i << "] should not be empty"; | |||
| return false; | |||
| } | |||
| } | |||
| std::set<std::string> columns_set(columns.begin(), columns.end()); | |||
| if (columns_set.size() != columns.size()) { | |||
| MS_LOG(ERROR) << dataset_name << ":" << column_param << ": Every column name should not be same with others"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| /* ####################################### Derived Dataset classes ################################# */ | |||
| // DERIVED DATASET CLASSES LEAF-NODE DATASETS | |||
| @@ -701,6 +724,9 @@ bool CelebADataset::ValidateParams() { | |||
| if (!ValidateDatasetDirParam("CelebADataset", dataset_dir_)) { | |||
| return false; | |||
| } | |||
| if (!ValidateDatasetSampler("CelebADataset", sampler_)) { | |||
| return false; | |||
| } | |||
| std::set<std::string> dataset_type_list = {"all", "train", "valid", "test"}; | |||
| auto iter = dataset_type_list.find(dataset_type_); | |||
| if (iter == dataset_type_list.end()) { | |||
| @@ -715,11 +741,6 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| RETURN_EMPTY_IF_ERROR( | |||
| schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| @@ -736,18 +757,15 @@ std::vector<std::shared_ptr<DatasetOp>> CelebADataset::Build() { | |||
| Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) | |||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | |||
| bool Cifar10Dataset::ValidateParams() { return ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_); } | |||
| bool Cifar10Dataset::ValidateParams() { | |||
| return ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_) && ValidateDatasetSampler("Cifar10Dataset", sampler_); | |||
| } | |||
| // Function to build CifarOp for Cifar10 | |||
| std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| // Do internal Schema generation. | |||
| auto schema = std::make_unique<DataSchema>(); | |||
| RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| @@ -765,18 +783,16 @@ std::vector<std::shared_ptr<DatasetOp>> Cifar10Dataset::Build() { | |||
| Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, std::shared_ptr<SamplerObj> sampler) | |||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | |||
| bool Cifar100Dataset::ValidateParams() { return ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_); } | |||
| bool Cifar100Dataset::ValidateParams() { | |||
| return ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_) && | |||
| ValidateDatasetSampler("Cifar100Dataset", sampler_); | |||
| } | |||
| // Function to build CifarOp for Cifar100 | |||
| std::vector<std::shared_ptr<DatasetOp>> Cifar100Dataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| // Do internal Schema generation. | |||
| auto schema = std::make_unique<DataSchema>(); | |||
| RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| @@ -987,6 +1003,9 @@ bool CocoDataset::ValidateParams() { | |||
| if (!ValidateDatasetDirParam("CocoDataset", dataset_dir_)) { | |||
| return false; | |||
| } | |||
| if (!ValidateDatasetSampler("CocoDataset", sampler_)) { | |||
| return false; | |||
| } | |||
| Path annotation_file(annotation_file_); | |||
| if (!annotation_file.Exists()) { | |||
| MS_LOG(ERROR) << "annotation_file is invalid or not exist"; | |||
| @@ -1006,11 +1025,6 @@ std::vector<std::shared_ptr<DatasetOp>> CocoDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| CocoOp::TaskType task_type; | |||
| if (task_ == "Detection") { | |||
| task_type = CocoOp::TaskType::Detection; | |||
| @@ -1100,6 +1114,12 @@ bool CSVDataset::ValidateParams() { | |||
| return false; | |||
| } | |||
| if (!column_names_.empty()) { | |||
| if (!ValidateDatasetColumnParam("CSVDataset", "column_names", column_names_)) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| @@ -1155,17 +1175,15 @@ ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std | |||
| class_indexing_(class_indexing), | |||
| exts_(extensions) {} | |||
| bool ImageFolderDataset::ValidateParams() { return ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_); } | |||
| bool ImageFolderDataset::ValidateParams() { | |||
| return ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_) && | |||
| ValidateDatasetSampler("ImageFolderDataset", sampler_); | |||
| } | |||
| std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler, i.e., RandomSampler. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| // Do internal Schema generation. | |||
| // This arg is exist in ImageFolderOp, but not externalized (in Python API). | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| @@ -1180,7 +1198,8 @@ std::vector<std::shared_ptr<DatasetOp>> ImageFolderDataset::Build() { | |||
| return node_ops; | |||
| } | |||
| ManifestDataset::ManifestDataset(std::string dataset_file, std::string usage, std::shared_ptr<SamplerObj> sampler, | |||
| ManifestDataset::ManifestDataset(const std::string &dataset_file, const std::string &usage, | |||
| const std::shared_ptr<SamplerObj> &sampler, | |||
| const std::map<std::string, int32_t> &class_indexing, bool decode) | |||
| : dataset_file_(dataset_file), usage_(usage), decode_(decode), class_index_(class_indexing), sampler_(sampler) {} | |||
| @@ -1190,6 +1209,9 @@ bool ManifestDataset::ValidateParams() { | |||
| MS_LOG(ERROR) << "dataset file: [" << dataset_file_ << "] is invalid or not exist"; | |||
| return false; | |||
| } | |||
| if (!ValidateDatasetSampler("ManifestDataset", sampler_)) { | |||
| return false; | |||
| } | |||
| std::vector<std::string> usage_list = {"train", "eval", "inference"}; | |||
| if (find(usage_list.begin(), usage_list.end(), usage_) == usage_list.end()) { | |||
| @@ -1204,11 +1226,6 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| // Do internal Schema generation. | |||
| auto schema = std::make_unique<DataSchema>(); | |||
| RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| @@ -1228,17 +1245,14 @@ std::vector<std::shared_ptr<DatasetOp>> ManifestDataset::Build() { | |||
| MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) | |||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | |||
| bool MnistDataset::ValidateParams() { return ValidateDatasetDirParam("MnistDataset", dataset_dir_); } | |||
| bool MnistDataset::ValidateParams() { | |||
| return ValidateDatasetDirParam("MnistDataset", dataset_dir_) && ValidateDatasetSampler("MnistDataset", sampler_); | |||
| } | |||
| std::vector<std::shared_ptr<DatasetOp>> MnistDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler, i.e., RandomSampler. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| // Do internal Schema generation. | |||
| auto schema = std::make_unique<DataSchema>(); | |||
| RETURN_EMPTY_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| @@ -1257,6 +1271,14 @@ bool RandomDataset::ValidateParams() { | |||
| MS_LOG(ERROR) << "RandomDataset: total_rows must be greater than 0, now get " << total_rows_; | |||
| return false; | |||
| } | |||
| if (!ValidateDatasetSampler("RandomDataset", sampler_)) { | |||
| return false; | |||
| } | |||
| if (!columns_list_.empty()) { | |||
| if (!ValidateDatasetColumnParam("RandomDataset", "columns_list", columns_list_)) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| @@ -1279,11 +1301,6 @@ std::vector<std::shared_ptr<DatasetOp>> RandomDataset::Build() { | |||
| total_rows_ = schema_obj->get_num_rows(); | |||
| } | |||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| std::string schema_json_string, schema_file_path; | |||
| if (schema_ != nullptr) { | |||
| schema_->set_dataset_type("Random"); | |||
| @@ -1392,6 +1409,9 @@ bool VOCDataset::ValidateParams() { | |||
| MS_LOG(ERROR) << "Invalid dataset path or no dataset path is specified."; | |||
| return false; | |||
| } | |||
| if (!ValidateDatasetSampler("VOCDataset", sampler_)) { | |||
| return false; | |||
| } | |||
| if (task_ == "Segmentation") { | |||
| if (!class_index_.empty()) { | |||
| MS_LOG(ERROR) << "class_indexing is invalid in Segmentation task."; | |||
| @@ -1420,11 +1440,6 @@ std::vector<std::shared_ptr<DatasetOp>> VOCDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| auto schema = std::make_unique<DataSchema>(); | |||
| VOCOp::TaskType task_type_; | |||
| @@ -1539,6 +1554,10 @@ bool ConcatDataset::ValidateParams() { | |||
| MS_LOG(ERROR) << "Concat: concatenated datasets are not specified."; | |||
| return false; | |||
| } | |||
| if (find(datasets_.begin(), datasets_.end(), nullptr) != datasets_.end()) { | |||
| MS_LOG(ERROR) << "Concat: concatenated dataset should not be null."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| @@ -1586,6 +1605,21 @@ bool MapDataset::ValidateParams() { | |||
| MS_LOG(ERROR) << "Map: No operation is specified."; | |||
| return false; | |||
| } | |||
| if (!input_columns_.empty()) { | |||
| if (!ValidateDatasetColumnParam("MapDataset", "input_columns", input_columns_)) { | |||
| return false; | |||
| } | |||
| } | |||
| if (!output_columns_.empty()) { | |||
| if (!ValidateDatasetColumnParam("MapDataset", "output_columns", output_columns_)) { | |||
| return false; | |||
| } | |||
| } | |||
| if (!project_columns_.empty()) { | |||
| if (!ValidateDatasetColumnParam("MapDataset", "project_columns", project_columns_)) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| @@ -1615,12 +1649,12 @@ RenameDataset::RenameDataset(const std::vector<std::string> &input_columns, | |||
| : input_columns_(input_columns), output_columns_(output_columns) {} | |||
| bool RenameDataset::ValidateParams() { | |||
| if (input_columns_.empty() || output_columns_.empty()) { | |||
| MS_LOG(ERROR) << "input and output columns must be specified"; | |||
| if (input_columns_.size() != output_columns_.size()) { | |||
| MS_LOG(ERROR) << "RenameDataset: input and output columns must be the same size"; | |||
| return false; | |||
| } | |||
| if (input_columns_.size() != output_columns_.size()) { | |||
| MS_LOG(ERROR) << "input and output columns must be the same size"; | |||
| if (!ValidateDatasetColumnParam("RenameDataset", "input_columns", input_columns_) || | |||
| !ValidateDatasetColumnParam("RenameDataset", "output_columns", output_columns_)) { | |||
| return false; | |||
| } | |||
| return true; | |||
| @@ -1713,7 +1747,7 @@ std::vector<std::shared_ptr<DatasetOp>> TakeDataset::Build() { | |||
| // Function to validate the parameters for TakeDataset | |||
| bool TakeDataset::ValidateParams() { | |||
| if (take_count_ < 0 && take_count_ != -1) { | |||
| if (take_count_ <= 0 && take_count_ != -1) { | |||
| MS_LOG(ERROR) << "Take: take_count should be either -1 or positive integer, take_count: " << take_count_; | |||
| return false; | |||
| } | |||
| @@ -84,32 +84,32 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = ""); | |||
| // The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset. | |||
| /// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'. | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||
| /// will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \param[in] decode Decode the images after reading (default=false). | |||
| /// \param[in] extensions Set of file extensions to be included in the dataset (default={}). | |||
| /// \return Shared pointer to the current Dataset | |||
| std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type = "all", | |||
| const std::shared_ptr<SamplerObj> &sampler = nullptr, bool decode = false, | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), bool decode = false, | |||
| const std::set<std::string> &extensions = {}); | |||
| /// \brief Function to create a Cifar10 Dataset | |||
| /// \notes The generated dataset has two columns ['image', 'label'] | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||
| /// will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \return Shared pointer to the current Dataset | |||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, | |||
| const std::shared_ptr<SamplerObj> &sampler = nullptr); | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler()); | |||
| /// \brief Function to create a Cifar100 Dataset | |||
| /// \notes The generated dataset has three columns ['image', 'coarse_label', 'fine_label'] | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||
| /// will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \return Shared pointer to the current Dataset | |||
| std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir, | |||
| const std::shared_ptr<SamplerObj> &sampler = nullptr); | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler()); | |||
| /// \brief Function to create a CLUEDataset | |||
| /// \notes The generated dataset has a variable number of columns depending on the task and usage | |||
| @@ -146,12 +146,12 @@ std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files, | |||
| /// \param[in] annotation_file Path to the annotation json | |||
| /// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint' | |||
| /// \param[in] decode Decode the images after reading | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||
| /// will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \return Shared pointer to the current Dataset | |||
| std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file, | |||
| const std::string &task = "Detection", const bool &decode = false, | |||
| const std::shared_ptr<SamplerObj> &sampler = nullptr); | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler()); | |||
| /// \brief Function to create a CSVDataset | |||
| /// \notes The generated dataset has a variable number of columns | |||
| @@ -185,13 +185,13 @@ std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, c | |||
| /// The generated dataset has two columns ['image', 'label'] | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||
| /// \param[in] decode A flag to decode in ImageFolder | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, | |||
| /// A `RandomSampler` will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \param[in] extensions File extensions to be read | |||
| /// \param[in] class_indexing a class name to label map | |||
| /// \return Shared pointer to the current ImageFolderDataset | |||
| std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir, bool decode = false, | |||
| const std::shared_ptr<SamplerObj> &sampler = nullptr, | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | |||
| const std::set<std::string> &extensions = {}, | |||
| const std::map<std::string, int32_t> &class_indexing = {}); | |||
| @@ -199,25 +199,25 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir, | |||
| /// \notes The generated dataset has two columns ['image', 'label'] | |||
| /// \param[in] dataset_file The dataset file to be read | |||
| /// \param[in] usage Need "train", "eval" or "inference" data (default="train") | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, | |||
| /// A `RandomSampler` will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder | |||
| /// names will be sorted alphabetically and each class will be given a unique index starting from 0). | |||
| /// \param[in] decode Decode the images after reading (default=false). | |||
| /// \return Shared pointer to the current ManifestDataset | |||
| std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage = "train", | |||
| std::shared_ptr<SamplerObj> sampler = nullptr, | |||
| std::shared_ptr<ManifestDataset> Manifest(const std::string &dataset_file, const std::string &usage = "train", | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | |||
| const std::map<std::string, int32_t> &class_indexing = {}, | |||
| bool decode = false); | |||
| /// \brief Function to create a MnistDataset | |||
| /// \notes The generated dataset has two columns ['image', 'label'] | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, | |||
| /// A `RandomSampler` will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \return Shared pointer to the current MnistDataset | |||
| std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, | |||
| const std::shared_ptr<SamplerObj> &sampler = nullptr); | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler()); | |||
| /// \brief Function to create a ConcatDataset | |||
| /// \notes Reload "+" operator to concat two datasets | |||
| @@ -231,14 +231,14 @@ std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &dataset | |||
| /// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random) | |||
| /// \param[in] schema SchemaObj to set column type, data type and data shape | |||
| /// \param[in] columns_list List of columns to be read (default={}, read all columns) | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||
| /// will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \return Shared pointer to the current Dataset | |||
| template <typename T = std::shared_ptr<SchemaObj>> | |||
| std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schema = nullptr, | |||
| const std::vector<std::string> &columns_list = {}, | |||
| std::shared_ptr<SamplerObj> sampler = nullptr) { | |||
| auto ds = std::make_shared<RandomDataset>(total_rows, schema, std::move(columns_list), std::move(sampler)); | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler()) { | |||
| auto ds = std::make_shared<RandomDataset>(total_rows, schema, columns_list, std::move(sampler)); | |||
| return ds->ValidateParams() ? ds : nullptr; | |||
| } | |||
| @@ -271,13 +271,13 @@ std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &datase | |||
| /// \param[in] mode Set the data list txt file to be readed | |||
| /// \param[in] class_indexing A str-to-int mapping from label name to index | |||
| /// \param[in] decode Decode the images after reading | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||
| /// will be used to randomly iterate the entire dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||
| /// \return Shared pointer to the current Dataset | |||
| std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation", | |||
| const std::string &mode = "train", | |||
| const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false, | |||
| const std::shared_ptr<SamplerObj> &sampler = nullptr); | |||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler()); | |||
| /// \brief Function to create a ZipDataset | |||
| /// \notes Applies zip to the dataset | |||
| @@ -716,7 +716,7 @@ class ImageFolderDataset : public Dataset { | |||
| class ManifestDataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor | |||
| ManifestDataset(std::string dataset_file, std::string usage, std::shared_ptr<SamplerObj> sampler, | |||
| ManifestDataset(const std::string &dataset_file, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler, | |||
| const std::map<std::string, int32_t> &class_indexing, bool decode); | |||
| /// \brief Destructor | |||
| @@ -768,7 +768,7 @@ class RandomDataset : public Dataset { | |||
| /// \brief Constructor | |||
| RandomDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema, | |||
| const std::vector<std::string> &columns_list, std::shared_ptr<SamplerObj> sampler) | |||
| const std::vector<std::string> &columns_list, const std::shared_ptr<SamplerObj> &sampler) | |||
| : total_rows_(total_rows), | |||
| schema_path_(""), | |||
| schema_(std::move(schema)), | |||
| @@ -776,8 +776,8 @@ class RandomDataset : public Dataset { | |||
| sampler_(std::move(sampler)) {} | |||
| /// \brief Constructor | |||
| RandomDataset(const int32_t &total_rows, std::string schema_path, std::vector<std::string> columns_list, | |||
| std::shared_ptr<SamplerObj> sampler) | |||
| RandomDataset(const int32_t &total_rows, std::string schema_path, const std::vector<std::string> &columns_list, | |||
| const std::shared_ptr<SamplerObj> &sampler) | |||
| : total_rows_(total_rows), schema_path_(schema_path), columns_list_(columns_list), sampler_(std::move(sampler)) {} | |||
| /// \brief Destructor | |||
| @@ -32,7 +32,7 @@ class OneHot(cde.OneHotOp): | |||
| Args: | |||
| num_classes (int): Number of classes of the label | |||
| it should be bigger than or equal to label class number. | |||
| it should be bigger than largest label number in dataset. | |||
| Raises: | |||
| RuntimeError: feature size is bigger than num_classes. | |||
| @@ -107,3 +107,33 @@ TEST_F(MindDataTestPipeline, TestCifar10DatasetFail1) { | |||
| std::shared_ptr<Dataset> ds = Cifar10("", RandomSampler(false, 10)); | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestCifar10DatasetWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar10DatasetWithNullSampler."; | |||
| // Create a Cifar10 Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; | |||
| std::shared_ptr<Dataset> ds = Cifar10(folder_path, nullptr); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestCifar100DatasetWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100DatasetWithNullSampler."; | |||
| // Create a Cifar10 Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testCifar100Data/"; | |||
| std::shared_ptr<Dataset> ds = Cifar100(folder_path, nullptr); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestCifar100DatasetWithWrongSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100DatasetWithWrongSampler."; | |||
| // Create a Cifar10 Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testCifar100Data/"; | |||
| std::shared_ptr<Dataset> ds = Cifar100(folder_path, RandomSampler(false, -10)); | |||
| // Expect failure: sampler is not construnced correctly | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| @@ -290,3 +290,14 @@ TEST_F(MindDataTestPipeline, TestCocoStuff) { | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestCocoWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoWithNullSampler."; | |||
| // Create a Coco Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testCOCO/train"; | |||
| std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json"; | |||
| std::shared_ptr<Dataset> ds = Coco(folder_path, annotation_file, "Detection", false, nullptr); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| @@ -473,3 +473,14 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetShuffleGlobal) { | |||
| GlobalContext::config_manager()->set_seed(original_seed); | |||
| GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestCSVDatasetDuplicateColumnName) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCSVDatasetDuplicateColumnName."; | |||
| // Create a CSVDataset, with single CSV file | |||
| std::string train_file = datasets_root_path_ + "/testCSV/1.csv"; | |||
| std::vector<std::string> column_names = {"col1", "col1", "col3", "col4"}; | |||
| std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, -1, ShuffleMode::kFalse); | |||
| // Expect failure: duplicate column names | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| @@ -59,7 +59,7 @@ TEST_F(MindDataTestPipeline, TestManifestDecode) { | |||
| std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json"; | |||
| // Create a Manifest Dataset | |||
| std::shared_ptr<Dataset> ds = Manifest(file_path, "train", nullptr, {}, true); | |||
| std::shared_ptr<Dataset> ds = Manifest(file_path, "train", RandomSampler(), {}, true); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| @@ -130,7 +130,7 @@ TEST_F(MindDataTestPipeline, TestManifestClassIndex) { | |||
| std::vector<int> expected_label = {111, 222}; | |||
| // Create a Manifest Dataset | |||
| std::shared_ptr<Dataset> ds = Manifest(file_path, "train", nullptr, map, true); | |||
| std::shared_ptr<Dataset> ds = Manifest(file_path, "train", RandomSampler(), map, true); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| @@ -204,3 +204,12 @@ TEST_F(MindDataTestPipeline, TestManifestError) { | |||
| std::shared_ptr<Dataset> ds1 = Manifest(file_path, "invalid_usage"); | |||
| EXPECT_EQ(ds1, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestManifestWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestWithNullSampler."; | |||
| std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json"; | |||
| // Create a Manifest Dataset | |||
| std::shared_ptr<Dataset> ds = Manifest(file_path, "train", nullptr); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| @@ -311,6 +311,34 @@ TEST_F(MindDataTestPipeline, TestProjectMap) { | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestMapDuplicateColumn) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMapDuplicateColumn."; | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5); | |||
| EXPECT_NE(random_vertical_flip_op, nullptr); | |||
| // Create a Map operation on ds | |||
| auto ds1 = ds->Map({random_vertical_flip_op}, {"image", "image"}, {}, {}); | |||
| // Expect failure: duplicate input column name | |||
| EXPECT_EQ(ds1, nullptr); | |||
| // Create a Map operation on ds | |||
| auto ds2 = ds->Map({random_vertical_flip_op}, {}, {"label", "label"}, {}); | |||
| // Expect failure: duplicate output column name | |||
| EXPECT_EQ(ds2, nullptr); | |||
| // Create a Map operation on ds | |||
| auto ds3 = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "image"}); | |||
| // Expect failure: duplicate project column name | |||
| EXPECT_EQ(ds3, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline.TestProjectMapAutoInjection"; | |||
| @@ -362,8 +390,8 @@ TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) { | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRenameFail) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail."; | |||
| TEST_F(MindDataTestPipeline, TestRenameFail1) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail1."; | |||
| // We expect this test to fail because input and output in Rename are not the same size | |||
| // Create an ImageFolder Dataset | |||
| @@ -381,6 +409,38 @@ TEST_F(MindDataTestPipeline, TestRenameFail) { | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRenameFail2) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail2."; | |||
| // We expect this test to fail because input or output column name is empty | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Rename operation on ds | |||
| ds = ds->Rename({"image", "label"}, {"col2", ""}); | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRenameFail3) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail3."; | |||
| // We expect this test to fail because duplicate column name | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Rename operation on ds | |||
| auto ds1 = ds->Rename({"image", "image"}, {"col1", "col2"}); | |||
| EXPECT_EQ(ds1, nullptr); | |||
| // Create a Rename operation on ds | |||
| auto ds2 = ds->Rename({"image", "label"}, {"col1", "col1"}); | |||
| EXPECT_EQ(ds2, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRenameSuccess) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameSuccess."; | |||
| @@ -688,9 +748,15 @@ TEST_F(MindDataTestPipeline, TestTakeDatasetError1) { | |||
| // Create a Take operation on ds with invalid count input | |||
| int32_t count = -5; | |||
| ds = ds->Take(count); | |||
| auto ds1 = ds->Take(count); | |||
| // Expect nullptr for invalid input take_count | |||
| EXPECT_EQ(ds, nullptr); | |||
| EXPECT_EQ(ds1, nullptr); | |||
| // Create a Take operation on ds with invalid count input | |||
| count = 0; | |||
| auto ds2 = ds->Take(count); | |||
| // Expect nullptr for invalid input take_count | |||
| EXPECT_EQ(ds2, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestTakeDatasetNormal) { | |||
| @@ -265,4 +265,28 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic4) { | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| GlobalContext::config_manager()->set_seed(curr_seed); | |||
| } | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRandomDatasetWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetWithNullSampler."; | |||
| // Create a RandomDataset | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2}); | |||
| schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1}); | |||
| std::shared_ptr<Dataset> ds = RandomData(50, schema, {}, nullptr); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRandomDatasetDuplicateColumnName) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetDuplicateColumnName."; | |||
| // Create a RandomDataset | |||
| std::shared_ptr<SchemaObj> schema = Schema(); | |||
| schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2}); | |||
| schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1}); | |||
| std::shared_ptr<Dataset> ds = RandomData(50, schema, {"image", "image"}); | |||
| // Expect failure: duplicate column names | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| @@ -194,3 +194,13 @@ TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) { | |||
| // Expect nullptr for segmentation task with class_index | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestVOCWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCWithNullSampler."; | |||
| // Create a VOC Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; | |||
| std::shared_ptr<Dataset> ds = VOC(folder_path, "Segmentation", "train", {}, false, nullptr); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| @@ -118,24 +118,44 @@ TEST_F(MindDataTestPipeline, TestCelebAException) { | |||
| EXPECT_EQ(ds1, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderFail1) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFail1."; | |||
| TEST_F(MindDataTestPipeline, TestCelebADatasetWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCelebADataset."; | |||
| // Create an ImageFolder Dataset | |||
| std::shared_ptr<Dataset> ds = ImageFolder("", true, nullptr); | |||
| // Create a CelebA Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testCelebAData/"; | |||
| std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", nullptr, false, {}); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestMnistFail1) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFail1."; | |||
| TEST_F(MindDataTestPipeline, TestMnistFailWithWrongDatasetDir) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithWrongDatasetDir."; | |||
| // Create a Mnist Dataset | |||
| std::shared_ptr<Dataset> ds = Mnist("", RandomSampler(false, 10)); | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderFail2) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFail2."; | |||
| TEST_F(MindDataTestPipeline, TestMnistFailWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithNullSampler."; | |||
| // Create a Mnist Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, nullptr); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderWithWrongDatasetDir) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderWithWrongDatasetDir."; | |||
| // Create an ImageFolder Dataset | |||
| std::shared_ptr<Dataset> ds = ImageFolder("", true, nullptr); | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderFailWithWrongExtension) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithWrongExtension."; | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| @@ -150,8 +170,29 @@ TEST_F(MindDataTestPipeline, TestImageFolderFail2) { | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| // Expect no data: can not find files with specified extension | |||
| EXPECT_EQ(row.size(), 0); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderFailWithNullSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithNullSampler."; | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, nullptr); | |||
| // Expect failure: sampler can not be nullptr | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderFailWithWrongSampler) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderFailWithWrongSampler."; | |||
| // Create a Cifar10 Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testCifar100Data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, SequentialSampler(-2, 5)); | |||
| // Expect failure: sampler is not construnced correctly | |||
| EXPECT_EQ(ds, nullptr); | |||
| } | |||