Merge pull request !22310 from 杨旭华/SemeionDatasettags/v1.6.0
| @@ -117,6 +117,7 @@ | |||
| #include "minddata/dataset/engine/ir/datasetops/source/qmnist_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/random_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/sbu_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/semeion_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/sogou_news_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/speech_commands_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/stl10_node.h" | |||
| @@ -1527,6 +1528,27 @@ QMnistDataset::QMnistDataset(const std::vector<char> &dataset_dir, const std::ve | |||
| ir_node_ = std::static_pointer_cast<DatasetNode>(ds); | |||
| } | |||
| SemeionDataset::SemeionDataset(const std::vector<char> &dataset_dir, const std::shared_ptr<Sampler> &sampler, | |||
| const std::shared_ptr<DatasetCache> &cache) { | |||
| auto sampler_obj = sampler ? sampler->Parse() : nullptr; | |||
| auto ds = std::make_shared<SemeionNode>(CharToString(dataset_dir), sampler_obj, cache); | |||
| ir_node_ = std::static_pointer_cast<DatasetNode>(ds); | |||
| } | |||
| SemeionDataset::SemeionDataset(const std::vector<char> &dataset_dir, const Sampler *sampler, | |||
| const std::shared_ptr<DatasetCache> &cache) { | |||
| auto sampler_obj = sampler ? sampler->Parse() : nullptr; | |||
| auto ds = std::make_shared<SemeionNode>(CharToString(dataset_dir), sampler_obj, cache); | |||
| ir_node_ = std::static_pointer_cast<DatasetNode>(ds); | |||
| } | |||
| SemeionDataset::SemeionDataset(const std::vector<char> &dataset_dir, const std::reference_wrapper<Sampler> &sampler, | |||
| const std::shared_ptr<DatasetCache> &cache) { | |||
| auto sampler_obj = sampler.get().Parse(); | |||
| auto ds = std::make_shared<SemeionNode>(CharToString(dataset_dir), sampler_obj, cache); | |||
| ir_node_ = std::static_pointer_cast<DatasetNode>(ds); | |||
| } | |||
| TedliumDataset::TedliumDataset(const std::vector<char> &dataset_dir, const std::vector<char> &release, | |||
| const std::vector<char> &usage, const std::vector<char> &extensions, | |||
| const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache) { | |||
| @@ -49,6 +49,7 @@ | |||
| #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/penn_treebank_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/random_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/semeion_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/speech_commands_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/stl10_node.h" | |||
| #include "minddata/dataset/engine/ir/datasetops/source/tedlium_node.h" | |||
| @@ -474,6 +475,16 @@ PYBIND_REGISTER(SBUNode, 2, ([](const py::module *m) { | |||
| })); | |||
| })); | |||
| PYBIND_REGISTER(SemeionNode, 2, ([](const py::module *m) { | |||
| (void)py::class_<SemeionNode, DatasetNode, std::shared_ptr<SemeionNode>>(*m, "SemeionNode", | |||
| "to create a SemeionNode") | |||
| .def(py::init([](std::string dataset_dir, py::handle sampler) { | |||
| auto semeion = std::make_shared<SemeionNode>(dataset_dir, toSamplerObj(sampler), nullptr); | |||
| THROW_IF_ERROR(semeion->ValidateParams()); | |||
| return semeion; | |||
| })); | |||
| })); | |||
| PYBIND_REGISTER(SogouNewsNode, 2, ([](const py::module *m) { | |||
| (void)py::class_<SogouNewsNode, DatasetNode, std::shared_ptr<SogouNewsNode>>( | |||
| *m, "SogouNewsNode", "to create a SogouNewsNode") | |||
| @@ -33,6 +33,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||
| qmnist_op.cc | |||
| random_data_op.cc | |||
| sbu_op.cc | |||
| semeion_op.cc | |||
| sogou_news_op.cc | |||
| speech_commands_op.cc | |||
| stl10_op.cc | |||
| @@ -0,0 +1,178 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/engine/datasetops/source/semeion_op.h" | |||
| #include <cstdlib> | |||
| #include <fstream> | |||
| #include <iomanip> | |||
| #include "minddata/dataset/core/config_manager.h" | |||
| #include "minddata/dataset/core/tensor_shape.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||
| #include "minddata/dataset/engine/execution_tree.h" | |||
| #include "utils/file_utils.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| constexpr uint32_t kSemeionImageSize = 256; | |||
| constexpr uint32_t kSemeionLabelSize = 10; | |||
| SemeionOp::SemeionOp(const std::string &dataset_dir, int32_t num_parallel_workers, | |||
| std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, int32_t queue_size) | |||
| : MappableLeafOp(num_parallel_workers, queue_size, std::move(sampler)), | |||
| dataset_dir_(dataset_dir), | |||
| data_schema_(std::move(data_schema)), | |||
| semeionline_rows_({}) {} | |||
| void SemeionOp::Print(std::ostream &out, bool show_all) const { | |||
| if (!show_all) { | |||
| // Call the super class for displaying any common 1-liner info. | |||
| ParallelOp::Print(out, show_all); | |||
| // Then show any custom derived-internal 1-liner info for this op. | |||
| out << "\n"; | |||
| } else { | |||
| // Call the super class for displaying any common detailed info. | |||
| ParallelOp::Print(out, show_all); | |||
| // Then show any custom derived-internal stuff. | |||
| out << "\nNumber of rows: " << num_rows_ << "\nSemeionOp directory: " << dataset_dir_; | |||
| } | |||
| } | |||
| Status SemeionOp::PrepareData() { | |||
| auto real_path = FileUtils::GetRealPath(dataset_dir_.data()); | |||
| if (!real_path.has_value()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file path, Semeion Dataset folder: " + dataset_dir_ + " does not exist."); | |||
| } | |||
| Path data_folder(real_path.value()); | |||
| Path file_path = data_folder / "semeion.data"; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), | |||
| "Invalid file, failed to find semeion file: " + data_folder.ToString()); | |||
| MS_LOG(INFO) << "Semeion file found: " << file_path << "."; | |||
| std::ifstream handle(file_path.ToString()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(handle.is_open(), "Invalid file, failed to open file: " + file_path.ToString()); | |||
| std::string line; | |||
| while (getline(handle, line)) { | |||
| semeionline_rows_.push_back(line); | |||
| } | |||
| handle.close(); | |||
| num_rows_ = semeionline_rows_.size(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, | |||
| "Invalid data, SemeionDataset API can't read the data file (interface mismatch or no " | |||
| "data found). Check file path: " + | |||
| dataset_dir_); | |||
| return Status::OK(); | |||
| } | |||
| Status SemeionOp::TransRowIdResult(row_id_type index, std::shared_ptr<Tensor> *img_tensor, | |||
| std::shared_ptr<Tensor> *label_tensor) { | |||
| RETURN_UNEXPECTED_IF_NULL(img_tensor); | |||
| RETURN_UNEXPECTED_IF_NULL(label_tensor); | |||
| std::vector<uint8_t> img; | |||
| uint32_t label; | |||
| std::string line = semeionline_rows_[index]; | |||
| uint32_t i = 0; | |||
| while (i < kSemeionImageSize) { | |||
| auto pos = line.find(" "); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos != std::string::npos, "Invalid data, file content does not match SemeionDataset."); | |||
| std::string s = line.substr(0, pos); | |||
| uint8_t value_img; | |||
| try { | |||
| value_img = std::stoi(s); | |||
| } catch (std::exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, image data in file should be in type of uint8, but got: " + s + "."); | |||
| } | |||
| img.push_back(value_img); | |||
| line.erase(0, pos + 1); // to dedele space | |||
| ++i; | |||
| } | |||
| i = 0; | |||
| while (i < kSemeionLabelSize) { | |||
| auto pos = line.find(" "); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pos != std::string::npos, "Invalid data, file content does not match SemeionDataset."); | |||
| std::string s = line.substr(0, pos); | |||
| line.erase(0, pos + 1); | |||
| uint8_t value_label; | |||
| try { | |||
| value_label = std::stoi(s); | |||
| } catch (std::exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, label data in file should be in type of uint8, but got: " + s + "."); | |||
| } | |||
| if (value_label != 0) { | |||
| label = i; | |||
| break; | |||
| } | |||
| ++i; | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar(label, label_tensor)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(img, img_tensor)); | |||
| RETURN_IF_NOT_OK((*img_tensor)->Reshape(TensorShape{16, 16})); | |||
| return Status::OK(); | |||
| } | |||
| Status SemeionOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| RETURN_UNEXPECTED_IF_NULL(trow); | |||
| Path dir_path(dataset_dir_); | |||
| std::shared_ptr<Tensor> img_tensor, label_tensor; | |||
| RETURN_IF_NOT_OK(TransRowIdResult(row_id, &img_tensor, &label_tensor)); | |||
| (*trow) = TensorRow(row_id, {img_tensor, label_tensor}); | |||
| trow->setPath({dir_path.ToString(), dir_path.ToString()}); | |||
| return Status::OK(); | |||
| } | |||
| Status SemeionOp::CountTotalRows(const std::string &dataset_dir, int64_t *count) { | |||
| RETURN_UNEXPECTED_IF_NULL(count); | |||
| *count = 0; | |||
| const int64_t num_samples = 0; | |||
| const int64_t start_index = 0; | |||
| auto new_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); | |||
| // build a new unique schema object | |||
| auto new_schema = std::make_unique<DataSchema>(); | |||
| RETURN_IF_NOT_OK(new_schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| TensorShape label_scalar = TensorShape::CreateScalar(); | |||
| RETURN_IF_NOT_OK(new_schema->AddColumn( | |||
| ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &label_scalar))); | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| int32_t num_workers = cfg->num_parallel_workers(); | |||
| int32_t op_connect_size = cfg->op_connector_size(); | |||
| auto op = std::make_shared<SemeionOp>(dataset_dir, num_workers, std::move(new_schema), std::move(new_sampler), | |||
| op_connect_size); | |||
| RETURN_IF_NOT_OK(op->PrepareData()); | |||
| *count = static_cast<int64_t>(op->semeionline_rows_.size()); | |||
| return Status::OK(); | |||
| } | |||
| Status SemeionOp::ComputeColMap() { | |||
| // set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (uint32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,92 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SEMEION_OP_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SEMEION_OP_H_ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/tensor.h" | |||
| #include "minddata/dataset/engine/datasetops/parallel_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" | |||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | |||
| #include "minddata/dataset/engine/ir/cache/dataset_cache.h" | |||
| #include "minddata/dataset/util/path.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class SemeionOp : public MappableLeafOp { | |||
| public: | |||
| /// \brief Constructor. | |||
| /// \param[in] dataset_dir Directory of semeion dataset. | |||
| /// \param[in] num_parallel_workers Num of workers in parallel. | |||
| /// \param[in] data_schema Schema of dataset. | |||
| /// \param[in] sampler Sampler tells SemeionOp what to read. | |||
| /// \param[in] queue_size Connector queue size. | |||
| SemeionOp(const std::string &dataset_dir, int32_t num_parallel_workers, std::unique_ptr<DataSchema> data_schema, | |||
| std::shared_ptr<SamplerRT> sampler, int32_t queue_size); | |||
| /// \brief Destructor. | |||
| ~SemeionOp() = default; | |||
| /// \brief A print method typically used for debugging. | |||
| /// \param[in] out Out stream. | |||
| /// \param[in] show_all Whether to show all information. | |||
| void Print(std::ostream &out, bool show_all) const override; | |||
| /// \brief Op name getter. | |||
| /// \return Name of the current Op. | |||
| std::string Name() const override { return "SemeionOp"; } | |||
| /// \brief Count total rows. | |||
| /// \param[in] dataset_dir File path. | |||
| /// \param[out] count Get total row. | |||
| /// \return Status The status code returned. | |||
| static Status CountTotalRows(const std::string &dataset_dir, int64_t *count); | |||
| /// \brief Function to count the number of samples in the SemeionOp. | |||
| /// \return Status The status code returned. | |||
| Status PrepareData() override; | |||
| private: | |||
| /// \brief Load a tensor row according to a pair. | |||
| /// \param[in] index Index need to load. | |||
| /// \param[out] trow Image & label read into this tensor row. | |||
| /// \return Status The status code returned. | |||
| Status LoadTensorRow(row_id_type index, TensorRow *trow) override; | |||
| /// \brief Get the img and label according the row_id. | |||
| /// \param[in] index Index of row need to load. | |||
| /// \param[out] img_tensor The image data. | |||
| /// \param[out] label_tensor The label data. | |||
| /// \return Status The status code returned. | |||
| Status TransRowIdResult(row_id_type index, std::shared_ptr<Tensor> *img_tensor, | |||
| std::shared_ptr<Tensor> *label_tensor); | |||
| /// \brief Private function for computing the assignment of the column name map. | |||
| /// \return Status The status code returned. | |||
| Status ComputeColMap() override; | |||
| const std::string dataset_dir_; | |||
| std::unique_ptr<DataSchema> data_schema_; | |||
| std::vector<std::string> semeionline_rows_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_SEMEION_OP_H_ | |||
| @@ -108,6 +108,7 @@ constexpr char kPlaces365Node[] = "Places365Dataset"; | |||
| constexpr char kQMnistNode[] = "QMnistDataset"; | |||
| constexpr char kRandomNode[] = "RandomDataset"; | |||
| constexpr char kSBUNode[] = "SBUDataset"; | |||
| constexpr char kSemeionNode[] = "SemeionDataset"; | |||
| constexpr char kSogouNewsNode[] = "SogouNewsDataset"; | |||
| constexpr char kSpeechCommandsNode[] = "SpeechCommandsDataset"; | |||
| constexpr char kSTL10Node[] = "STL10Dataset"; | |||
| @@ -34,6 +34,7 @@ set(DATASET_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES | |||
| qmnist_node.cc | |||
| random_node.cc | |||
| sbu_node.cc | |||
| semeion_node.cc | |||
| sogou_news_node.cc | |||
| speech_commands_node.cc | |||
| stl10_node.cc | |||
| @@ -0,0 +1,111 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/engine/ir/datasetops/source/semeion_node.h" | |||
| #include <fstream> | |||
| #include <utility> | |||
| #include "minddata/dataset/engine/datasetops/source/semeion_op.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Constructor for SemeionNode. | |||
| SemeionNode::SemeionNode(const std::string &dataset_dir, const std::shared_ptr<SamplerObj> &sampler, | |||
| const std::shared_ptr<DatasetCache> &cache) | |||
| : MappableSourceNode(std::move(cache)), dataset_dir_(dataset_dir), sampler_(sampler) {} | |||
| std::shared_ptr<DatasetNode> SemeionNode::Copy() { | |||
| std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy(); | |||
| auto node = std::make_shared<SemeionNode>(dataset_dir_, sampler, cache_); | |||
| return node; | |||
| } | |||
| void SemeionNode::Print(std::ostream &out) const { | |||
| out << (Name() + "(cache: " + ((cache_ != nullptr) ? "true" : "false") + ")"); | |||
| } | |||
| Status SemeionNode::ValidateParams() { | |||
| RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); | |||
| RETURN_IF_NOT_OK(ValidateDatasetDirParam("SemeionNode", dataset_dir_)); | |||
| RETURN_IF_NOT_OK(ValidateDatasetSampler("SemeionNode", sampler_)); | |||
| return Status::OK(); | |||
| } | |||
| Status SemeionNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| auto schema = std::make_unique<DataSchema>(); | |||
| RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| TensorShape label_scalar = TensorShape::CreateScalar(); | |||
| RETURN_IF_NOT_OK( | |||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &label_scalar))); | |||
| // Argument that is not exposed to user in the API. | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| auto semeion_op = std::make_shared<SemeionOp>(dataset_dir_, num_workers_, std::move(schema), std::move(sampler_rt), | |||
| connector_que_size_); | |||
| semeion_op->SetTotalRepeats(GetTotalRepeats()); | |||
| semeion_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch()); | |||
| node_ops->push_back(semeion_op); | |||
| return Status::OK(); | |||
| } | |||
| Status SemeionNode::GetShardId(int32_t *shard_id) { | |||
| *shard_id = sampler_->ShardId(); | |||
| return Status::OK(); | |||
| } | |||
| Status SemeionNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate, | |||
| int64_t *dataset_size) { | |||
| if (dataset_size_ > 0) { | |||
| *dataset_size = dataset_size_; | |||
| return Status::OK(); | |||
| } | |||
| int64_t sample_size = -1; | |||
| int64_t num_rows = 0; | |||
| RETURN_IF_NOT_OK(SemeionOp::CountTotalRows(dataset_dir_, &num_rows)); | |||
| // give sampler the total number of files and check if num_samples is smaller. | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); | |||
| sample_size = sampler_rt->CalculateNumSamples(num_rows); | |||
| if (sample_size == -1) { | |||
| RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size)); | |||
| } | |||
| *dataset_size = sample_size; | |||
| // We cache dataset size so as to not duplicated run. | |||
| dataset_size_ = *dataset_size; | |||
| return Status::OK(); | |||
| } | |||
| Status SemeionNode::to_json(nlohmann::json *out_json) { | |||
| nlohmann::json args, sampler_args; | |||
| RETURN_IF_NOT_OK(sampler_->to_json(&sampler_args)); | |||
| args["sampler"] = sampler_args; | |||
| args["num_parallel_workers"] = num_workers_; | |||
| args["dataset_dir"] = dataset_dir_; | |||
| if (cache_ != nullptr) { | |||
| nlohmann::json cache_args; | |||
| RETURN_IF_NOT_OK(cache_->to_json(&cache_args)); | |||
| args["cache"] = cache_args; | |||
| } | |||
| *out_json = args; | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,94 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_SEMEION_NODE_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_SEMEION_NODE_H_ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "minddata/dataset/engine/ir/datasetops/dataset_node.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class SemeionNode : public MappableSourceNode { | |||
| public: | |||
| /// \brief Constructor. | |||
| SemeionNode(const std::string &dataset_dir, const std::shared_ptr<SamplerObj> &sampler, | |||
| const std::shared_ptr<DatasetCache> &cache); | |||
| /// \brief Destructor. | |||
| ~SemeionNode() = default; | |||
| /// \brief Node name getter. | |||
| /// \return Name of the current node. | |||
| std::string Name() const override { return kSemeionNode; } | |||
| /// \brief Print the description. | |||
| /// \param[in] out The output stream to write output to. | |||
| void Print(std::ostream &out) const override; | |||
| /// \brief Copy the node to a new object. | |||
| /// \return A shared pointer to the new copy. | |||
| std::shared_ptr<DatasetNode> Copy() override; | |||
| /// \brief a base class override function to create the required runtime dataset op objects for this class. | |||
| /// \param[out] node_ops A vector containing shared pointer to the Dataset Ops that this object will create. | |||
| /// \return Status Status::OK() if build successfully. | |||
| Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override; | |||
| /// \brief Parameters validation. | |||
| /// \return Status Status::OK() if all the parameters are valid. | |||
| Status ValidateParams() override; | |||
| /// \brief Get the shard id of node. | |||
| /// \param[out] shard_id Shard id. | |||
| /// \return Status Status::OK() if get shard id successfully. | |||
| Status GetShardId(int32_t *shard_id) override; | |||
| /// \brief Base-class override for GetDatasetSize. | |||
| /// \param[in] size_getter Shared pointer to DatasetSizeGetter. | |||
| /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting | |||
| /// dataset size at the expense of accuracy. | |||
| /// \param[out] dataset_size The size of the dataset. | |||
| /// \return Status of the function. | |||
| Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate, | |||
| int64_t *dataset_size) override; | |||
| /// \brief Get the arguments of node. | |||
| /// \param[out] out_json JSON string of all attributes. | |||
| /// \return Status of the function. | |||
| Status to_json(nlohmann::json *out_json) override; | |||
| /// \brief Sampler getter. | |||
| /// \return SamplerObj of the current node. | |||
| std::shared_ptr<SamplerObj> Sampler() override { return sampler_; } | |||
| /// \brief Sampler setter. | |||
| /// \param[in] sampler Sampler object used to choose samples from the dataset. | |||
| void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; } | |||
| /// \brief DatasetDir getter. | |||
| /// \return DatasetDir of the current node. | |||
| const std::string &DatasetDir() const { return dataset_dir_; } | |||
| private: | |||
| std::string dataset_dir_; | |||
| std::shared_ptr<SamplerObj> sampler_; | |||
| }; // class SemeionNode | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_SEMEION_NODE_H_ | |||
| @@ -3916,6 +3916,85 @@ inline std::shared_ptr<SBUDataset> MS_API SBU(const std::string &dataset_dir, bo | |||
| return std::make_shared<SBUDataset>(StringToChar(dataset_dir), decode, sampler, cache); | |||
| } | |||
| /// \class SemeionDataset | |||
| /// \brief A source dataset for reading and parsing Semeion dataset. | |||
| class MS_API SemeionDataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor of SemeionDataset. | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset. | |||
| /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. | |||
| /// \param[in] cache Tensor cache to use. | |||
| SemeionDataset(const std::vector<char> &dataset_dir, const ::std::shared_ptr<Sampler> &sampler, | |||
| const std::shared_ptr<DatasetCache> &cache); | |||
| /// \brief Constructor of SemeionDataset. | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset. | |||
| /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. | |||
| /// \param[in] cache Tensor cache to use. | |||
| SemeionDataset(const std::vector<char> &dataset_dir, const Sampler *sampler, | |||
| const std::shared_ptr<DatasetCache> &cache); | |||
| /// \brief Constructor of SemeionDataset. | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset. | |||
| /// \param[in] sampler Sampler object used to choose samples from the dataset. | |||
| /// \param[in] cache Tensor cache to use. | |||
| SemeionDataset(const std::vector<char> &dataset_dir, const ::std::reference_wrapper<Sampler> &samlper, | |||
| const std::shared_ptr<DatasetCache> &cache); | |||
| /// \brief Destructor of SemeionDataset. | |||
| ~SemeionDataset() = default; | |||
| }; | |||
| /// \brief Function to create a Semeion Dataset. | |||
| /// \note The generated dataset has two columns ["image", "label"]. | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset. | |||
| /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not | |||
| /// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). | |||
| /// \param[in] cache Tensor cache to use (default=nullptr, which means no cache is used). | |||
| /// \return Shared pointer to the SemeionDataset. | |||
| /// \par Example | |||
| /// \code | |||
| /// /* Define dataset path and MindData object */ | |||
| /// std::string folder_path = "/path/to/semeion_dataset_directory"; | |||
| /// std::shared_ptr<Dataset> ds = SEMEION(folder_path, std::make_shared<SequentialSampler>(0, 6)); | |||
| /// | |||
| /// /* Create iterator to read dataset */ | |||
| /// std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| /// std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| /// iter->GetNextRow(&row); | |||
| /// | |||
| /// /* Note: In SEMEION dataset, each dictionary has keys "image" and "label" */ | |||
| /// auto image = row["image"]; | |||
| /// \endcode | |||
| inline std::shared_ptr<SemeionDataset> MS_API | |||
| Semeion(const std::string &dataset_dir, const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(), | |||
| const std::shared_ptr<DatasetCache> &cache = nullptr) { | |||
| return std::make_shared<SemeionDataset>(StringToChar(dataset_dir), sampler, cache); | |||
| } | |||
| /// \brief Function to create a Semeion Dataset | |||
| /// \note The generated dataset has two columns ["image", "label"]. | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset. | |||
| /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. | |||
| /// \param[in] cache Tensor cache to use (default=nullptr, which means no cache is used). | |||
| /// \return Shared pointer to the SemeionDataset. | |||
| inline std::shared_ptr<SemeionDataset> MS_API Semeion(const std::string &dataset_dir, | |||
| const std::reference_wrapper<Sampler> sampler, | |||
| const std::shared_ptr<DatasetCache> &cache = nullptr) { | |||
| return std::make_shared<SemeionDataset>(StringToChar(dataset_dir), sampler, cache); | |||
| } | |||
| /// \brief Function to create a Semeion Dataset. | |||
| /// \note The generated dataset has two columns ["image", "label"]. | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset. | |||
| /// \param[in] sampler Sampler object used to choose samples from the dataset. | |||
| /// \param[in] cache Tensor cache to use (default=nullptr, which means no cache is used). | |||
| /// \return Shared pointer to the SemeionDataset. | |||
| inline std::shared_ptr<SemeionDataset> MS_API Semeion(const std::string &dataset_dir, Sampler *sampler, | |||
| const std::shared_ptr<DatasetCache> &cache = nullptr) { | |||
| return std::make_shared<SemeionDataset>(StringToChar(dataset_dir), sampler, cache); | |||
| } | |||
| /// \class SogouNewsDataset | |||
| /// \brief A source dataset for reading and parsing Sogou News dataset. | |||
| class MS_API SogouNewsDataset : public Dataset { | |||
| @@ -56,6 +56,7 @@ class MS_API Sampler : std::enable_shared_from_this<Sampler> { | |||
| friend class QMnistDataset; | |||
| friend class RandomDataDataset; | |||
| friend class SBUDataset; | |||
| friend class SemeionDataset; | |||
| friend class SpeechCommandsDataset; | |||
| friend class STL10Dataset; | |||
| friend class TedliumDataset; | |||
| @@ -75,7 +75,7 @@ from .validators import check_batch, check_shuffle, check_map, check_filter, che | |||
| check_yes_no_dataset, check_speech_commands_dataset, check_tedlium_dataset, check_svhn_dataset, \ | |||
| check_stl10_dataset, check_yelp_review_dataset, check_penn_treebank_dataset, check_iwslt2016_dataset, \ | |||
| check_iwslt2017_dataset, check_sogou_news_dataset, check_yahoo_answers_dataset, check_udpos_dataset,\ | |||
| check_conll2000_dataset, check_amazon_review_dataset | |||
| check_conll2000_dataset, check_amazon_review_dataset, check_semeion_dataset | |||
| from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \ | |||
| get_prefetch_size | |||
| from ..core.datatypes import mstype_to_detype, mstypelist_to_detypelist | |||
| @@ -9430,6 +9430,126 @@ class YesNoDataset(MappableDataset): | |||
| return cde.YesNoNode(self.dataset_dir, self.sampler) | |||
| class SemeionDataset(MappableDataset): | |||
| """ | |||
| A source dataset for reading and parsing Semeion dataset. | |||
| The generated dataset has two columns :py:obj:`[image, label]`. | |||
| The tensor of column :py:obj:`image` is of the uint8 type. | |||
| The tensor of column :py:obj:`label` is a scalar of the uint32 type. | |||
| Args: | |||
| dataset_dir (str): Path to the root directory that contains the dataset. | |||
| num_samples (int, optional): The number of samples to be included in the dataset | |||
| (default=None, will read all images). | |||
| num_parallel_workers (int, optional): Number of workers to read the data | |||
| (default=None, number set in the config). | |||
| shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected | |||
| order behavior shown in the table). | |||
| sampler (Sampler, optional): Object used to choose samples from the | |||
| dataset (default=None, expected order behavior shown in the table). | |||
| num_shards (int, optional): Number of shards that the dataset will be divided | |||
| into (default=None). When this argument is specified, `num_samples` reflects | |||
| the maximum sample number of per shard. | |||
| shard_id (int, optional): The shard ID within num_shards (default=None). This | |||
| argument can only be specified when num_shards is also specified. | |||
| cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing | |||
| (default=None, which means no cache is used). | |||
| Raises: | |||
| RuntimeError: If num_parallel_workers exceeds the max thread numbers. | |||
| RuntimeError: If sampler and shuffle are specified at the same time. | |||
| RuntimeError: If sampler and sharding are specified at the same time. | |||
| RuntimeError: If num_shards is specified but shard_id is None. | |||
| RuntimeError: If shard_id is specified but num_shards is None. | |||
| ValueError: If shard_id is invalid (< 0 or >= num_shards). | |||
| Note: | |||
| - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive. | |||
| The table below shows what input arguments are allowed and their expected behavior. | |||
| .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle` | |||
| :widths: 25 25 50 | |||
| :header-rows: 1 | |||
| * - Parameter `sampler` | |||
| - Parameter `shuffle` | |||
| - Expected Order Behavior | |||
| * - None | |||
| - None | |||
| - random order | |||
| * - None | |||
| - True | |||
| - random order | |||
| * - None | |||
| - False | |||
| - sequential order | |||
| * - Sampler object | |||
| - None | |||
| - order defined by sampler | |||
| * - Sampler object | |||
| - True | |||
| - not allowed | |||
| * - Sampler object | |||
| - False | |||
| - not allowed | |||
| Examples: | |||
| >>> semeion_dataset_dir = "/path/to/semeion_dataset_directory" | |||
| >>> | |||
| >>> # 1) Get all samples from SEMEION dataset in sequence | |||
| >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, shuffle=False) | |||
| >>> | |||
| >>> # 2) Randomly select 10 samples from SEMEION dataset | |||
| >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, num_samples=10, shuffle=True) | |||
| >>> | |||
| >>> # 3) Get samples from SEMEION dataset for shard 0 in a 2-way distributed training | |||
| >>> dataset = ds.SemeionDataset(dataset_dir=semeion_dataset_dir, num_shards=2, shard_id=0) | |||
| >>> | |||
| >>> # In SEMEION dataset, each dictionary has keys: image, label. | |||
| About SEMEION dataset: | |||
| The dataset was created by Tactile Srl, Brescia, Italy (http://www.tattile.it) and donated in 1994 | |||
| to Semeion Research Center of Sciences of Communication, Rome, Italy (http://www.semeion.it), | |||
| for machine learning research. | |||
| This dataset consists of 1593 records (rows) and 256 attributes (columns). Each record represents | |||
| a handwritten digit, originally scanned with a resolution of 256 grey scale. Each pixel of the each | |||
| original scanned image was first stretched, and after scaled between 0 and 1 | |||
| (setting to 0 every pixel whose value was under the value 127 of the grey scale (127 included) | |||
| and setting to 1 each pixel whose original value in the grey scale was over 127). Finally, each binary image | |||
| was scaled again into a 16x16 square box (the final 256 binary attributes). | |||
| .. code-block:: | |||
| . | |||
| └── semeion_dataset_dir | |||
| └──semeion.data | |||
| └──semeion.names | |||
| Citation: | |||
| .. code-block:: | |||
| @article{ | |||
| title={The Theory of Independent Judges, in Substance Use & Misuse 33(2)1998, pp 439-461}, | |||
| author={M Buscema, MetaNet}, | |||
| } | |||
| """ | |||
| @check_semeion_dataset | |||
| def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, | |||
| sampler=None, num_shards=None, shard_id=None, cache=None): | |||
| super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples, | |||
| shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache) | |||
| self.dataset_dir = dataset_dir | |||
| def parse(self, children=None): | |||
| return cde.SemeionNode(self.dataset_dir, self.sampler) | |||
| class TedliumDataset(MappableDataset): | |||
| """ | |||
| A source dataset for reading and parsing Tedlium dataset. | |||
| @@ -2329,3 +2329,29 @@ def check_amazon_review_dataset(method): | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| def check_semeion_dataset(method): | |||
| """Wrapper method to check the parameters of SemeionDataset.""" | |||
| @wraps(method) | |||
| def new_method(self, *args, **kwargs): | |||
| _, param_dict = parse_user_args(method, *args, **kwargs) | |||
| nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id'] | |||
| nreq_param_bool = ['shuffle'] | |||
| dataset_dir = param_dict.get('dataset_dir') | |||
| check_dir(dataset_dir) | |||
| validate_dataset_param_value(nreq_param_int, param_dict, int) | |||
| validate_dataset_param_value(nreq_param_bool, param_dict, bool) | |||
| check_sampler_shuffle_shard_options(param_dict) | |||
| cache = param_dict.get('cache') | |||
| check_cache_option(cache) | |||
| return method(self, *args, **kwargs) | |||
| return new_method | |||
| @@ -44,6 +44,7 @@ SET(DE_UT_SRCS | |||
| c_api_dataset_randomdata_test.cc | |||
| c_api_dataset_save.cc | |||
| c_api_dataset_sbu_test.cc | |||
| c_api_dataset_semeion_test.cc | |||
| c_api_dataset_sogou_news_test.cc | |||
| c_api_dataset_speech_commands_test.cc | |||
| c_api_dataset_stl10_test.cc | |||
| @@ -0,0 +1,211 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "common/common.h" | |||
| #include "minddata/dataset/include/dataset/datasets.h" | |||
| using namespace mindspore::dataset; | |||
| using mindspore::dataset::DataType; | |||
| using mindspore::dataset::Tensor; | |||
| using mindspore::dataset::TensorShape; | |||
| class MindDataTestPipeline : public UT::DatasetOpTesting { | |||
| protected: | |||
| }; | |||
| /// Feature: SemeionDataset. | |||
| /// Description: read some samples from all files. | |||
| /// Expectation: 10 samples. | |||
| TEST_F(MindDataTestPipeline, TestSemeionDataset) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionDataset."; | |||
| // Create a Semeion Dataset. | |||
| std::string folder_path = datasets_root_path_ + "/testSemeionData"; | |||
| std::shared_ptr<Dataset> ds = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset. | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // Iterate the dataset and get each row. | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| EXPECT_NE(row.find("image"), row.end()); | |||
| EXPECT_NE(row.find("label"), row.end()); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 5); | |||
| // Manually terminate the pipeline. | |||
| iter->Stop(); | |||
| } | |||
| /// Feature: SemeionDataset. | |||
| /// Description: read some samples with pipeline from all files. | |||
| /// Expectation: 10 samples. | |||
| TEST_F(MindDataTestPipeline, TestSemeionDatasetWithPipeline) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionDatasetWithPipeline."; | |||
| // Create two Semeion Dataset. | |||
| std::string folder_path = datasets_root_path_ + "/testSemeionData"; | |||
| std::shared_ptr<Dataset> ds1 = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr); | |||
| std::shared_ptr<Dataset> ds2 = Semeion(folder_path, std::make_shared<RandomSampler>(false, 5), nullptr); | |||
| EXPECT_NE(ds1, nullptr); | |||
| EXPECT_NE(ds2, nullptr); | |||
| // Create two Repeat operation on ds. | |||
| int32_t repeat_num = 1; | |||
| ds1 = ds1->Repeat(repeat_num); | |||
| EXPECT_NE(ds1, nullptr); | |||
| repeat_num = 1; | |||
| ds2 = ds2->Repeat(repeat_num); | |||
| EXPECT_NE(ds2, nullptr); | |||
| // Create two Project operation on ds. | |||
| std::vector<std::string> column_project = {"image", "label"}; | |||
| ds1 = ds1->Project(column_project); | |||
| EXPECT_NE(ds1, nullptr); | |||
| ds2 = ds2->Project(column_project); | |||
| EXPECT_NE(ds2, nullptr); | |||
| // Create a Concat operation on the ds. | |||
| ds1 = ds1->Concat({ds2}); | |||
| EXPECT_NE(ds1, nullptr); | |||
| // Create an iterator over the result of the above dataset. | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds1->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // Iterate the dataset and get each row. | |||
| std::unordered_map<std::string, mindspore::MSTensor> row; | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| EXPECT_NE(row.find("image"), row.end()); | |||
| EXPECT_NE(row.find("label"), row.end()); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor shape: " << image.Shape(); | |||
| ASSERT_OK(iter->GetNextRow(&row)); | |||
| } | |||
| EXPECT_EQ(i, 10); | |||
| // Manually terminate the pipeline. | |||
| iter->Stop(); | |||
| } | |||
| /// Feature: SemeionDataset. | |||
| /// Description: read number of all samples from all files according to different versions. | |||
| /// Expectation: 10. | |||
| TEST_F(MindDataTestPipeline, TestSemeionGetDatasetSize) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionGetDatasetSize."; | |||
| // Create a Semeion Dataset. | |||
| std::string folder_path = datasets_root_path_ + "/testSemeionData"; | |||
| std::shared_ptr<Dataset> ds = Semeion(folder_path); | |||
| EXPECT_NE(ds, nullptr); | |||
| EXPECT_EQ(ds->GetDatasetSize(), 10); | |||
| } | |||
| /// Feature: Data Property Testing. | |||
| /// Description: Includes tests for shape, type, size. | |||
| /// Expectation: correct shape, type, size. | |||
| TEST_F(MindDataTestPipeline, TestSemeionGetters) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionGetters."; | |||
| // Create a Semeion Dataset. | |||
| std::string folder_path = datasets_root_path_ + "/testSemeionData"; | |||
| std::shared_ptr<Dataset> ds = Semeion(folder_path); | |||
| EXPECT_NE(ds, nullptr); | |||
| EXPECT_EQ(ds->GetDatasetSize(), 10); | |||
| std::vector<DataType> types = ToDETypes(ds->GetOutputTypes()); | |||
| std::vector<TensorShape> shapes = ToTensorShapeVec(ds->GetOutputShapes()); | |||
| std::vector<std::string> column_names = {"image", "label"}; | |||
| int64_t num_classes = ds->GetNumClasses(); | |||
| EXPECT_EQ(types.size(), 2); | |||
| EXPECT_EQ(types[0].ToString(), "uint8"); | |||
| EXPECT_EQ(types[1].ToString(), "uint32"); | |||
| EXPECT_EQ(shapes.size(), 2); | |||
| EXPECT_EQ(shapes[0].ToString(), "<16,16>"); | |||
| EXPECT_EQ(shapes[1].ToString(), "<>"); | |||
| EXPECT_EQ(num_classes, -1); | |||
| EXPECT_EQ(ds->GetBatchSize(), 1); | |||
| EXPECT_EQ(ds->GetRepeatCount(), 1); | |||
| EXPECT_EQ(ds->GetDatasetSize(), 10); | |||
| EXPECT_EQ(ToDETypes(ds->GetOutputTypes()), types); | |||
| EXPECT_EQ(ToTensorShapeVec(ds->GetOutputShapes()), shapes); | |||
| EXPECT_EQ(ds->GetNumClasses(), -1); | |||
| EXPECT_EQ(ds->GetColumnNames(), column_names); | |||
| EXPECT_EQ(ds->GetDatasetSize(), 10); | |||
| EXPECT_EQ(ToDETypes(ds->GetOutputTypes()), types); | |||
| EXPECT_EQ(ToTensorShapeVec(ds->GetOutputShapes()), shapes); | |||
| EXPECT_EQ(ds->GetBatchSize(), 1); | |||
| EXPECT_EQ(ds->GetRepeatCount(), 1); | |||
| EXPECT_EQ(ds->GetNumClasses(), -1); | |||
| EXPECT_EQ(ds->GetDatasetSize(), 10); | |||
| } | |||
| /// Feature: SemeionDataset. | |||
| /// Description: test with invalid path. | |||
| /// Expectation: unable to read in data. | |||
| TEST_F(MindDataTestPipeline, TestSemeionDatasetFail) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionDatasetFail."; | |||
| // Create a Semeion Dataset. | |||
| std::string folder_path = datasets_root_path_ + "/testSemeionData"; | |||
| std::shared_ptr<Dataset> ds = Semeion("", std::make_shared<RandomSampler>(false, 4)); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| // Expect failure: invalid Semeion input. | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| /// Feature: SemeionDataset. | |||
| /// Description: test with null sampler. | |||
| /// Expectation: unable to read in data. | |||
| TEST_F(MindDataTestPipeline, TestSemeionDatasetWithNullSamplerFail) { | |||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSemeionDatasetWithNullSamplerFail."; | |||
| // Create a Semeion Dataset. | |||
| std::string folder_path = datasets_root_path_ + "/testSemeionData"; | |||
| std::shared_ptr<Dataset> ds = Semeion(folder_path, nullptr); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| // Expect failure: invalid Semeion input, sampler cannot be nullptr. | |||
| EXPECT_EQ(iter, nullptr); | |||
| } | |||
| @@ -0,0 +1,10 @@ | |||
| 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1 0 0 0 0 0 0 0 0 0 | |||
| 1.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 0.0000 0.0000 0.0000 0 0 1 0 0 0 0 0 0 0 | |||
| @@ -0,0 +1,247 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================== | |||
| import os | |||
| import matplotlib.pyplot as plt | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.dataset as ds | |||
| import mindspore.dataset.vision.c_transforms as c_vision | |||
| DATA_DIR_SEMEION = "../data/dataset/testSemeionData" | |||
| def load_semeion(path): | |||
| """ | |||
| load Semeion data | |||
| """ | |||
| fp = os.path.realpath(os.path.join(path, "semeion.data")) | |||
| data = np.loadtxt(fp) | |||
| images = (data[:, :256]).astype('uint8') | |||
| images = images.reshape(-1, 16, 16) | |||
| labels = np.nonzero(data[:, 256:])[1] | |||
| return images, labels | |||
| def visualize_dataset(images, labels): | |||
| """ | |||
| Helper function to visualize the dataset samples | |||
| """ | |||
| num_samples = len(images) | |||
| for i in range(num_samples): | |||
| plt.subplot(1, num_samples, i + 1) | |||
| plt.imshow(images[i]) | |||
| plt.title(labels[i]) | |||
| plt.show() | |||
| def test_semeion_content_check(): | |||
| """ | |||
| Feature: SemeionDataset | |||
| Description: Check content of each sample | |||
| Expectation: correct content | |||
| """ | |||
| data1 = ds.SemeionDataset(DATA_DIR_SEMEION, num_samples=10, shuffle=False) | |||
| images, labels = load_semeion(DATA_DIR_SEMEION) | |||
| num_iter = 0 | |||
| # in this example, each dictionary has keys "image" and "label" | |||
| for i, d in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_array_equal(d["image"], images[i]) | |||
| np.testing.assert_array_equal(d["label"], labels[i]) | |||
| num_iter += 1 | |||
| assert num_iter == 10 | |||
| def test_semeion_basic(): | |||
| """ | |||
| Feature: SemeionDataset | |||
| Description: use different data to test the functions of different versions | |||
| Expectation: all samples(10) | |||
| num_samples | |||
| set 5 | |||
| get 5 | |||
| num_parallel_workers | |||
| set 1(num_samples=6) | |||
| get 6 | |||
| num repeat | |||
| set 3(num_samples=3) | |||
| get 9 | |||
| """ | |||
| # case 0: test loading all samples | |||
| data0 = ds.SemeionDataset(DATA_DIR_SEMEION) | |||
| num_iter0 = 0 | |||
| for _ in data0.create_dict_iterator(num_epochs=1): | |||
| num_iter0 += 1 | |||
| assert num_iter0 == 10 | |||
| # case 1: test num_samples | |||
| data1 = ds.SemeionDataset(DATA_DIR_SEMEION, num_samples=5) | |||
| num_iter1 = 0 | |||
| for _ in data1.create_dict_iterator(num_epochs=1): | |||
| num_iter1 += 1 | |||
| assert num_iter1 == 5 | |||
| # case 2: test num_parallel_workers | |||
| data2 = ds.SemeionDataset(DATA_DIR_SEMEION, num_samples=6, num_parallel_workers=1) | |||
| num_iter2 = 0 | |||
| for _ in data2.create_dict_iterator(num_epochs=1): | |||
| num_iter2 += 1 | |||
| assert num_iter2 == 6 | |||
| # case 3: test repeat | |||
| data3 = ds.SemeionDataset(DATA_DIR_SEMEION, num_samples=3) | |||
| data3 = data3.repeat(3) | |||
| num_iter3 = 0 | |||
| for _ in data3.create_dict_iterator(num_epochs=1): | |||
| num_iter3 += 1 | |||
| assert num_iter3 == 9 | |||
| def test_semeion_sequential_sampler(): | |||
| """ | |||
| Feature: SemeionDataset | |||
| Description: test semeion sequential sampler | |||
| Expectation: correct data | |||
| """ | |||
| num_samples = 4 | |||
| sampler = ds.SequentialSampler(num_samples=num_samples) | |||
| data1 = ds.SemeionDataset(DATA_DIR_SEMEION, sampler=sampler) | |||
| data2 = ds.SemeionDataset(DATA_DIR_SEMEION, shuffle=False, num_samples=num_samples) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_equal(item1["label"], item2["label"]) | |||
| np.testing.assert_equal(item1["image"], item2["image"]) | |||
| num_iter += 1 | |||
| assert num_iter == num_samples | |||
| def test_semeion_exceptions(): | |||
| """ | |||
| Feature: SemeionDataset | |||
| Description: error test | |||
| Expectation: throw error | |||
| """ | |||
| error_msg_1 = "sampler and shuffle cannot be specified at the same time" | |||
| with pytest.raises(RuntimeError, match=error_msg_1): | |||
| ds.SemeionDataset(DATA_DIR_SEMEION, shuffle=False, sampler=ds.PKSampler(3)) | |||
| error_msg_2 = "sampler and sharding cannot be specified at the same time" | |||
| with pytest.raises(RuntimeError, match=error_msg_2): | |||
| ds.SemeionDataset(DATA_DIR_SEMEION, sampler=ds.PKSampler(3), num_shards=2, shard_id=0) | |||
| error_msg_3 = "num_shards is specified and currently requires shard_id as well" | |||
| with pytest.raises(RuntimeError, match=error_msg_3): | |||
| ds.SemeionDataset(DATA_DIR_SEMEION, num_shards=10) | |||
| error_msg_4 = "shard_id is specified but num_shards is not" | |||
| with pytest.raises(RuntimeError, match=error_msg_4): | |||
| ds.SemeionDataset(DATA_DIR_SEMEION, shard_id=0) | |||
| error_msg_5 = "Input shard_id is not within the required interval" | |||
| with pytest.raises(ValueError, match=error_msg_5): | |||
| ds.SemeionDataset(DATA_DIR_SEMEION, num_shards=2, shard_id=-1) | |||
| with pytest.raises(ValueError, match=error_msg_5): | |||
| ds.SemeionDataset(DATA_DIR_SEMEION, num_shards=2, shard_id=5) | |||
| error_msg_6 = "num_parallel_workers exceeds" | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.SemeionDataset(DATA_DIR_SEMEION, shuffle=False, num_parallel_workers=0) | |||
| with pytest.raises(ValueError, match=error_msg_6): | |||
| ds.SemeionDataset(DATA_DIR_SEMEION, shuffle=False, num_parallel_workers=256) | |||
| def test_semeion_visualize(plot=False): | |||
| """ | |||
| Feature: SemeionDataset | |||
| Description: visualize SemeionDataset results | |||
| Expectation: visualization | |||
| """ | |||
| data1 = ds.SemeionDataset(DATA_DIR_SEMEION, num_samples=10, shuffle=False) | |||
| num_iter = 0 | |||
| image_list, label_list = [], [] | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| image = item["image"] | |||
| label = item["label"] | |||
| image_list.append(image) | |||
| label_list.append("label {}".format(label)) | |||
| assert isinstance(image, np.ndarray) | |||
| assert image.shape == (16, 16) | |||
| assert image.dtype == np.uint8 | |||
| assert label.dtype == np.uint32 | |||
| num_iter += 1 | |||
| assert num_iter == 10 | |||
| if plot: | |||
| visualize_dataset(image_list, label_list) | |||
| def test_semeion_exception_file_path(): | |||
| """ | |||
| Feature: SemeionDataset | |||
| Description: error test | |||
| Expectation: throw error | |||
| """ | |||
| def exception_func(item): | |||
| raise Exception("Error occur!") | |||
| try: | |||
| data = ds.SemeionDataset(DATA_DIR_SEMEION) | |||
| data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) | |||
| num_rows = 0 | |||
| for _ in data.create_dict_iterator(): | |||
| num_rows += 1 | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| try: | |||
| data = ds.SemeionDataset(DATA_DIR_SEMEION) | |||
| data = data.map(operations=exception_func, input_columns=["label"], num_parallel_workers=1) | |||
| num_rows = 0 | |||
| for _ in data.create_dict_iterator(): | |||
| num_rows += 1 | |||
| assert False | |||
| except RuntimeError as e: | |||
| assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) | |||
| def test_semeion_pipeline(): | |||
| """ | |||
| Feature: SemeionDataset | |||
| Description: Read a sample | |||
| Expectation: The amount of each function are equal | |||
| """ | |||
| # Original image | |||
| dataset = ds.SemeionDataset(DATA_DIR_SEMEION, num_samples=1) | |||
| resize_op = c_vision.Resize((100, 100)) | |||
| # Filtered image by Resize | |||
| dataset = dataset.map(operations=resize_op, input_columns=["image"], num_parallel_workers=1) | |||
| i = 0 | |||
| for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| i += 1 | |||
| assert i == 1 | |||
| if __name__ == '__main__': | |||
| test_semeion_content_check() | |||
| test_semeion_basic() | |||
| test_semeion_sequential_sampler() | |||
| test_semeion_exceptions() | |||
| test_semeion_visualize(plot=False) | |||
| test_semeion_exception_file_path() | |||
| test_semeion_pipeline() | |||