Merge pull request !7540 from ZiruiWu/refactor_ir_demotags/v1.1.0
| @@ -78,6 +78,8 @@ add_dependencies(callback core) | |||||
| add_dependencies(text core) | add_dependencies(text core) | ||||
| add_dependencies(text-kernels core) | add_dependencies(text-kernels core) | ||||
| add_dependencies(cpp-API core) | add_dependencies(cpp-API core) | ||||
| add_dependencies(engine-ir-datasetops core) | |||||
| add_dependencies(engine-ir-datasetops-source core) | |||||
| if (ENABLE_PYTHON) | if (ENABLE_PYTHON) | ||||
| add_dependencies(APItoPython core) | add_dependencies(APItoPython core) | ||||
| endif() | endif() | ||||
| @@ -99,6 +101,8 @@ set(submodules | |||||
| $<TARGET_OBJECTS:lite-cv> | $<TARGET_OBJECTS:lite-cv> | ||||
| $<TARGET_OBJECTS:kernels-data> | $<TARGET_OBJECTS:kernels-data> | ||||
| $<TARGET_OBJECTS:cpp-API> | $<TARGET_OBJECTS:cpp-API> | ||||
| $<TARGET_OBJECTS:engine-ir-datasetops> | |||||
| $<TARGET_OBJECTS:engine-ir-datasetops-source> | |||||
| $<TARGET_OBJECTS:kernels-soft-dvpp-image> | $<TARGET_OBJECTS:kernels-soft-dvpp-image> | ||||
| $<TARGET_OBJECTS:soft-dvpp-utils> | $<TARGET_OBJECTS:soft-dvpp-utils> | ||||
| $<TARGET_OBJECTS:engine-datasetops-source> | $<TARGET_OBJECTS:engine-datasetops-source> | ||||
| @@ -61,6 +61,10 @@ | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" | ||||
| // IR nodes | |||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | |||||
| #include "minddata/dataset/core/config_manager.h" | #include "minddata/dataset/core/config_manager.h" | ||||
| #include "minddata/dataset/util/path.h" | #include "minddata/dataset/util/path.h" | ||||
| #include "minddata/dataset/util/random.h" | #include "minddata/dataset/util/random.h" | ||||
| @@ -69,15 +73,6 @@ namespace mindspore { | |||||
| namespace dataset { | namespace dataset { | ||||
| namespace api { | namespace api { | ||||
| #define RETURN_EMPTY_IF_ERROR(_s) \ | |||||
| do { \ | |||||
| Status __rc = (_s); \ | |||||
| if (__rc.IsError()) { \ | |||||
| MS_LOG(ERROR) << __rc; \ | |||||
| return {}; \ | |||||
| } \ | |||||
| } while (false) | |||||
| // Function to create the iterator, which will build and launch the execution tree. | // Function to create the iterator, which will build and launch the execution tree. | ||||
| std::shared_ptr<Iterator> Dataset::CreateIterator(std::vector<std::string> columns) { | std::shared_ptr<Iterator> Dataset::CreateIterator(std::vector<std::string> columns) { | ||||
| std::shared_ptr<Iterator> iter; | std::shared_ptr<Iterator> iter; | ||||
| @@ -1283,43 +1278,6 @@ std::vector<std::shared_ptr<DatasetOp>> CSVNode::Build() { | |||||
| node_ops.push_back(csv_op); | node_ops.push_back(csv_op); | ||||
| return node_ops; | return node_ops; | ||||
| } | } | ||||
| ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, | |||||
| bool recursive, std::set<std::string> extensions, | |||||
| std::map<std::string, int32_t> class_indexing) | |||||
| : dataset_dir_(dataset_dir), | |||||
| decode_(decode), | |||||
| sampler_(sampler), | |||||
| recursive_(recursive), | |||||
| class_indexing_(class_indexing), | |||||
| exts_(extensions) {} | |||||
| Status ImageFolderNode::ValidateParams() { | |||||
| RETURN_IF_NOT_OK(ValidateDatasetDirParam("ImageFolderNode", dataset_dir_)); | |||||
| RETURN_IF_NOT_OK(ValidateDatasetSampler("ImageFolderNode", sampler_)); | |||||
| return Status::OK(); | |||||
| } | |||||
| std::vector<std::shared_ptr<DatasetOp>> ImageFolderNode::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| // Do internal Schema generation. | |||||
| // This arg is exist in ImageFolderOp, but not externalized (in Python API). | |||||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||||
| TensorShape scalar = TensorShape::CreateScalar(); | |||||
| RETURN_EMPTY_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||||
| RETURN_EMPTY_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar))); | |||||
| node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||||
| recursive_, decode_, exts_, class_indexing_, std::move(schema), | |||||
| std::move(sampler_->Build()))); | |||||
| return node_ops; | |||||
| } | |||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| ManifestNode::ManifestNode(const std::string &dataset_file, const std::string &usage, | ManifestNode::ManifestNode(const std::string &dataset_file, const std::string &usage, | ||||
| const std::shared_ptr<SamplerObj> &sampler, | const std::shared_ptr<SamplerObj> &sampler, | ||||
| @@ -1800,54 +1758,6 @@ std::vector<std::shared_ptr<DatasetOp>> VOCNode::Build() { | |||||
| } | } | ||||
| #endif | #endif | ||||
| // DERIVED DATASET CLASSES LEAF-NODE DATASETS | |||||
| // (In alphabetical order) | |||||
| BatchNode::BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad, | |||||
| std::vector<std::string> cols_to_map, | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map) | |||||
| : batch_size_(batch_size), | |||||
| drop_remainder_(drop_remainder), | |||||
| pad_(pad), | |||||
| cols_to_map_(cols_to_map), | |||||
| pad_map_(pad_map) { | |||||
| this->children.push_back(child); | |||||
| } | |||||
| std::vector<std::shared_ptr<DatasetOp>> BatchNode::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| #ifdef ENABLE_PYTHON | |||||
| py::function noop; | |||||
| node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, | |||||
| cols_to_map_, cols_to_map_, noop, noop, pad_map_)); | |||||
| #else | |||||
| node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, | |||||
| cols_to_map_, pad_map_)); | |||||
| #endif | |||||
| // Until py::function is implemented for C++ API, there is no need for a project op to be inserted after batch | |||||
| // because project is only needed when batch op performs per_batch_map. This per_batch_map is a pyfunc | |||||
| return node_ops; | |||||
| } | |||||
| Status BatchNode::ValidateParams() { | |||||
| if (batch_size_ <= 0) { | |||||
| std::string err_msg = "BatchNode: batch_size should be positive integer, but got: " + std::to_string(batch_size_); | |||||
| MS_LOG(ERROR) << err_msg; | |||||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||||
| } | |||||
| if (!cols_to_map_.empty()) { | |||||
| std::string err_msg = "BatchNode: cols_to_map functionality is not implemented in C++; this should be left empty."; | |||||
| MS_LOG(ERROR) << err_msg; | |||||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| BucketBatchByLengthNode::BucketBatchByLengthNode( | BucketBatchByLengthNode::BucketBatchByLengthNode( | ||||
| std::shared_ptr<Dataset> child, const std::vector<std::string> &column_names, | std::shared_ptr<Dataset> child, const std::vector<std::string> &column_names, | ||||
| @@ -1884,7 +1794,7 @@ std::vector<std::shared_ptr<DatasetOp>> BucketBatchByLengthNode::Build() { | |||||
| Status BucketBatchByLengthNode::ValidateParams() { | Status BucketBatchByLengthNode::ValidateParams() { | ||||
| if (element_length_function_ == nullptr && column_names_.size() != 1) { | if (element_length_function_ == nullptr && column_names_.size() != 1) { | ||||
| std::string err_msg = "BucketBatchByLengthNode: element_length_function not specified, but not one column name: " + | std::string err_msg = "BucketBatchByLengthNode: element_length_function not specified, but not one column name: " + | ||||
| column_names_.size(); | |||||
| std::to_string(column_names_.size()); | |||||
| MS_LOG(ERROR) << err_msg; | MS_LOG(ERROR) << err_msg; | ||||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | RETURN_STATUS_SYNTAX_ERROR(err_msg); | ||||
| } | } | ||||
| @@ -1,8 +1,10 @@ | |||||
| add_subdirectory(datasetops) | add_subdirectory(datasetops) | ||||
| add_subdirectory(opt) | add_subdirectory(opt) | ||||
| add_subdirectory(gnn) | add_subdirectory(gnn) | ||||
| add_subdirectory(ir) | |||||
| add_subdirectory(perf) | add_subdirectory(perf) | ||||
| add_subdirectory(cache) | add_subdirectory(cache) | ||||
| if (ENABLE_TDTQUE) | if (ENABLE_TDTQUE) | ||||
| add_subdirectory(tdt) | add_subdirectory(tdt) | ||||
| endif () | endif () | ||||
| @@ -0,0 +1,3 @@ | |||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||||
| add_subdirectory(datasetops) | |||||
| @@ -0,0 +1,5 @@ | |||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||||
| add_subdirectory(source) | |||||
| add_library(engine-ir-datasetops OBJECT | |||||
| batch_node.cc) | |||||
| @@ -0,0 +1,76 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "minddata/dataset/engine/datasetops/batch_op.h" | |||||
| #include "minddata/dataset/util/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| namespace api { | |||||
| BatchNode::BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad, | |||||
| std::vector<std::string> cols_to_map, | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map) | |||||
| : batch_size_(batch_size), | |||||
| drop_remainder_(drop_remainder), | |||||
| pad_(pad), | |||||
| cols_to_map_(cols_to_map), | |||||
| pad_map_(pad_map) { | |||||
| this->children.push_back(child); | |||||
| } | |||||
| Status BatchNode::ValidateParams() { | |||||
| if (batch_size_ <= 0) { | |||||
| std::string err_msg = "Batch: batch_size should be positive integer, but got: " + std::to_string(batch_size_); | |||||
| MS_LOG(ERROR) << err_msg; | |||||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||||
| } | |||||
| if (!cols_to_map_.empty()) { | |||||
| std::string err_msg = "cols_to_map functionality is not implemented in C++; this should be left empty."; | |||||
| MS_LOG(ERROR) << err_msg; | |||||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| std::vector<std::shared_ptr<DatasetOp>> BatchNode::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| #ifdef ENABLE_PYTHON | |||||
| py::function noop; | |||||
| node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, | |||||
| cols_to_map_, cols_to_map_, noop, noop, pad_map_)); | |||||
| #else | |||||
| node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, | |||||
| cols_to_map_, pad_map_)); | |||||
| #endif | |||||
| // Until py::function is implemented for C++ API, there is no need for a project op to be inserted after batch | |||||
| // because project is only needed when batch op performs per_batch_map. This per_batch_map is a pyfunc | |||||
| return node_ops; | |||||
| } | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,61 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_ | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "minddata/dataset/include/datasets.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| namespace api { | |||||
| class BatchNode : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad, | |||||
| std::vector<std::string> cols_to_map, | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map); | |||||
| /// \brief Destructor | |||||
| ~BatchNode() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return The list of shared pointers to the newly created DatasetOps | |||||
| std::vector<std::shared_ptr<DatasetOp>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return Status Status::OK() if all the parameters are valid | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| int32_t batch_size_; | |||||
| bool drop_remainder_; | |||||
| bool pad_; | |||||
| std::vector<std::string> cols_to_map_; | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_; | |||||
| }; | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_ | |||||
| @@ -0,0 +1,4 @@ | |||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||||
| add_library(engine-ir-datasetops-source OBJECT | |||||
| image_folder_node.cc) | |||||
| @@ -0,0 +1,70 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "minddata/dataset/engine/datasetops/source/image_folder_op.h" | |||||
| #include "minddata/dataset/util/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| namespace api { | |||||
| ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, | |||||
| bool recursive, std::set<std::string> extensions, | |||||
| std::map<std::string, int32_t> class_indexing) | |||||
| : dataset_dir_(dataset_dir), | |||||
| decode_(decode), | |||||
| sampler_(sampler), | |||||
| recursive_(recursive), | |||||
| class_indexing_(class_indexing), | |||||
| exts_(extensions) {} | |||||
| Status ImageFolderNode::ValidateParams() { | |||||
| RETURN_IF_NOT_OK(ValidateDatasetDirParam("ImageFolderNode", dataset_dir_)); | |||||
| RETURN_IF_NOT_OK(ValidateDatasetSampler("ImageFolderNode", sampler_)); | |||||
| return Status::OK(); | |||||
| } | |||||
| std::vector<std::shared_ptr<DatasetOp>> ImageFolderNode::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| // Do internal Schema generation. | |||||
| // This arg is exist in ImageFolderOp, but not externalized (in Python API). | |||||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||||
| TensorShape scalar = TensorShape::CreateScalar(); | |||||
| RETURN_EMPTY_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||||
| RETURN_EMPTY_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar))); | |||||
| node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||||
| recursive_, decode_, exts_, class_indexing_, std::move(schema), | |||||
| std::move(sampler_->Build()))); | |||||
| return node_ops; | |||||
| } | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,63 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_IMAGE_FOLDER_NODE_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_IMAGE_FOLDER_NODE_H_ | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "minddata/dataset/include/datasets.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| namespace api { | |||||
| /// \class ImageFolderNode | |||||
| /// \brief A Dataset derived class to represent ImageFolder dataset | |||||
| class ImageFolderNode : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive, | |||||
| std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing); | |||||
| /// \brief Destructor | |||||
| ~ImageFolderNode() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return The list of shared pointers to the newly created DatasetOps | |||||
| std::vector<std::shared_ptr<DatasetOp>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return Status Status::OK() if all the parameters are valid | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| std::string dataset_dir_; | |||||
| bool decode_; | |||||
| bool recursive_; | |||||
| std::shared_ptr<SamplerObj> sampler_; | |||||
| std::map<std::string, int32_t> class_indexing_; | |||||
| std::set<std::string> exts_; | |||||
| }; | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_IMAGE_FOLDER_NODE_H_ | |||||
| @@ -22,6 +22,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <set> | #include <set> | ||||
| #include <string> | #include <string> | ||||
| #include <unordered_set> | |||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #include "minddata/dataset/core/constants.h" | #include "minddata/dataset/core/constants.h" | ||||
| @@ -65,6 +66,7 @@ class CocoNode; | |||||
| class CSVNode; | class CSVNode; | ||||
| class CsvBase; | class CsvBase; | ||||
| class ImageFolderNode; | class ImageFolderNode; | ||||
| class BatchNode; | |||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| class ManifestNode; | class ManifestNode; | ||||
| class MindDataNode; | class MindDataNode; | ||||
| @@ -77,7 +79,6 @@ class TFRecordNode; | |||||
| class VOCNode; | class VOCNode; | ||||
| #endif | #endif | ||||
| // Dataset Op classes (in alphabetical order) | // Dataset Op classes (in alphabetical order) | ||||
| class BatchNode; | |||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| class BucketBatchByLengthNode; | class BucketBatchByLengthNode; | ||||
| class BuildVocabNode; | class BuildVocabNode; | ||||
| @@ -92,6 +93,30 @@ class SkipNode; | |||||
| class TakeNode; | class TakeNode; | ||||
| class ZipNode; | class ZipNode; | ||||
| #define RETURN_EMPTY_IF_ERROR(_s) \ | |||||
| do { \ | |||||
| Status __rc = (_s); \ | |||||
| if (__rc.IsError()) { \ | |||||
| MS_LOG(ERROR) << __rc; \ | |||||
| return {}; \ | |||||
| } \ | |||||
| } while (false) | |||||
| // Helper function to validate dataset num_shards and shard_id parameters | |||||
| Status ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_shards, int32_t shard_id); | |||||
| // Helper function to validate dataset sampler parameter | |||||
| Status ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler); | |||||
| Status ValidateStringValue(const std::string &str, const std::unordered_set<std::string> &valid_strings); | |||||
| // Helper function to validate dataset input/output column parameterCD - | |||||
| Status ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param, | |||||
| const std::vector<std::string> &columns); | |||||
| // Helper function to validate dataset directory parameter | |||||
| Status ValidateDatasetDirParam(const std::string &dataset_name, std::string dataset_dir); | |||||
| /// \brief Function to create a SchemaObj | /// \brief Function to create a SchemaObj | ||||
| /// \param[in] schema_file Path of schema file | /// \param[in] schema_file Path of schema file | ||||
| /// \return Shared pointer to the current schema | /// \return Shared pointer to the current schema | ||||
| @@ -915,34 +940,6 @@ class CSVNode : public Dataset { | |||||
| int32_t shard_id_; | int32_t shard_id_; | ||||
| }; | }; | ||||
| /// \class ImageFolderNode | |||||
| /// \brief A Dataset derived class to represent ImageFolder dataset | |||||
| class ImageFolderNode : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive, | |||||
| std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing); | |||||
| /// \brief Destructor | |||||
| ~ImageFolderNode() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return The list of shared pointers to the newly created DatasetOps | |||||
| std::vector<std::shared_ptr<DatasetOp>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return Status Status::OK() if all the parameters are valid | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| std::string dataset_dir_; | |||||
| bool decode_; | |||||
| bool recursive_; | |||||
| std::shared_ptr<SamplerObj> sampler_; | |||||
| std::map<std::string, int32_t> class_indexing_; | |||||
| std::set<std::string> exts_; | |||||
| }; | |||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| class ManifestNode : public Dataset { | class ManifestNode : public Dataset { | ||||
| public: | public: | ||||
| @@ -1202,32 +1199,6 @@ class VOCNode : public Dataset { | |||||
| // DERIVED DATASET CLASSES FOR DATASET OPS | // DERIVED DATASET CLASSES FOR DATASET OPS | ||||
| // (In alphabetical order) | // (In alphabetical order) | ||||
| class BatchNode : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad, | |||||
| std::vector<std::string> cols_to_map, | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map); | |||||
| /// \brief Destructor | |||||
| ~BatchNode() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return The list of shared pointers to the newly created DatasetOps | |||||
| std::vector<std::shared_ptr<DatasetOp>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return Status Status::OK() if all the parameters are valid | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| int32_t batch_size_; | |||||
| bool drop_remainder_; | |||||
| bool pad_; | |||||
| std::vector<std::string> cols_to_map_; | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_; | |||||
| }; | |||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| class BucketBatchByLengthNode : public Dataset { | class BucketBatchByLengthNode : public Dataset { | ||||
| public: | public: | ||||
| @@ -16,6 +16,8 @@ | |||||
| #include "common/common.h" | #include "common/common.h" | ||||
| #include "minddata/dataset/include/datasets.h" | #include "minddata/dataset/include/datasets.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| using namespace mindspore::dataset::api; | using namespace mindspore::dataset::api; | ||||
| using mindspore::dataset::Tensor; | using mindspore::dataset::Tensor; | ||||
| using mindspore::dataset::TensorShape; | using mindspore::dataset::TensorShape; | ||||
| @@ -18,6 +18,8 @@ | |||||
| #include "minddata/dataset/include/datasets.h" | #include "minddata/dataset/include/datasets.h" | ||||
| #include "minddata/dataset/include/vision.h" | #include "minddata/dataset/include/vision.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | |||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| using namespace mindspore::dataset::api; | using namespace mindspore::dataset::api; | ||||
| using mindspore::dataset::Tensor; | using mindspore::dataset::Tensor; | ||||
| @@ -19,12 +19,14 @@ | |||||
| #include "minddata/dataset/core/config_manager.h" | #include "minddata/dataset/core/config_manager.h" | ||||
| #include "minddata/dataset/core/global_context.h" | #include "minddata/dataset/core/global_context.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| using namespace mindspore::dataset; | using namespace mindspore::dataset; | ||||
| using namespace mindspore::dataset::api; | using namespace mindspore::dataset::api; | ||||
| using mindspore::dataset::Tensor; | |||||
| using mindspore::dataset::DataType; | |||||
| using mindspore::dataset::ShuffleMode; | using mindspore::dataset::ShuffleMode; | ||||
| using mindspore::dataset::Tensor; | |||||
| using mindspore::dataset::TensorShape; | using mindspore::dataset::TensorShape; | ||||
| using mindspore::dataset::DataType; | |||||
| class MindDataTestPipeline : public UT::DatasetOpTesting { | class MindDataTestPipeline : public UT::DatasetOpTesting { | ||||
| protected: | protected: | ||||
| @@ -355,11 +357,9 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetShard) { | |||||
| // Create a TFRecord Dataset | // Create a TFRecord Dataset | ||||
| // Each file has two columns("image", "label") and 3 rows | // Each file has two columns("image", "label") and 3 rows | ||||
| std::vector<std::string> files = { | |||||
| datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data", | |||||
| datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0002.data", | |||||
| datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0003.data" | |||||
| }; | |||||
| std::vector<std::string> files = {datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data", | |||||
| datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0002.data", | |||||
| datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0003.data"}; | |||||
| std::shared_ptr<Dataset> ds1 = TFRecord({files}, "", {}, 0, ShuffleMode::kFalse, 2, 1, true); | std::shared_ptr<Dataset> ds1 = TFRecord({files}, "", {}, 0, ShuffleMode::kFalse, 2, 1, true); | ||||
| EXPECT_NE(ds1, nullptr); | EXPECT_NE(ds1, nullptr); | ||||
| std::shared_ptr<Dataset> ds2 = TFRecord({files}, "", {}, 0, ShuffleMode::kFalse, 2, 1, false); | std::shared_ptr<Dataset> ds2 = TFRecord({files}, "", {}, 0, ShuffleMode::kFalse, 2, 1, false); | ||||
| @@ -16,6 +16,8 @@ | |||||
| #include "common/common.h" | #include "common/common.h" | ||||
| #include "minddata/dataset/include/datasets.h" | #include "minddata/dataset/include/datasets.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | |||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| using namespace mindspore::dataset::api; | using namespace mindspore::dataset::api; | ||||
| using mindspore::dataset::Tensor; | using mindspore::dataset::Tensor; | ||||
| using mindspore::dataset::TensorShape; | using mindspore::dataset::TensorShape; | ||||
| @@ -183,19 +185,19 @@ TEST_F(MindDataTestPipeline, TestImageFolderFailWithWrongSampler) { | |||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestMnistFailWithWrongDatasetDir) { | TEST_F(MindDataTestPipeline, TestMnistFailWithWrongDatasetDir) { | ||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithWrongDatasetDir."; | |||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithWrongDatasetDir."; | |||||
| // Create a Mnist Dataset | |||||
| std::shared_ptr<Dataset> ds = Mnist("", "all", RandomSampler(false, 10)); | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| // Create a Mnist Dataset | |||||
| std::shared_ptr<Dataset> ds = Mnist("", "all", RandomSampler(false, 10)); | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| } | } | ||||
| TEST_F(MindDataTestPipeline, TestMnistFailWithNullSampler) { | TEST_F(MindDataTestPipeline, TestMnistFailWithNullSampler) { | ||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithNullSampler."; | |||||
| MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithNullSampler."; | |||||
| // Create a Mnist Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", nullptr); | |||||
| // Expect failure: sampler can not be nullptr | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| // Create a Mnist Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", nullptr); | |||||
| // Expect failure: sampler can not be nullptr | |||||
| EXPECT_EQ(ds, nullptr); | |||||
| } | } | ||||
| @@ -16,6 +16,9 @@ | |||||
| #include "common/common.h" | #include "common/common.h" | ||||
| #include "minddata/dataset/include/datasets.h" | #include "minddata/dataset/include/datasets.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | |||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| using namespace mindspore::dataset::api; | using namespace mindspore::dataset::api; | ||||
| using mindspore::dataset::Tensor; | using mindspore::dataset::Tensor; | ||||
| @@ -18,6 +18,8 @@ | |||||
| #include "minddata/dataset/include/transforms.h" | #include "minddata/dataset/include/transforms.h" | ||||
| #include "minddata/dataset/include/vision.h" | #include "minddata/dataset/include/vision.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| using namespace mindspore::dataset::api; | using namespace mindspore::dataset::api; | ||||
| using mindspore::dataset::BorderType; | using mindspore::dataset::BorderType; | ||||
| using mindspore::dataset::Tensor; | using mindspore::dataset::Tensor; | ||||
| @@ -18,6 +18,8 @@ | |||||
| #include "minddata/dataset/include/transforms.h" | #include "minddata/dataset/include/transforms.h" | ||||
| #include "minddata/dataset/include/vision.h" | #include "minddata/dataset/include/vision.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | |||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| using namespace mindspore::dataset::api; | using namespace mindspore::dataset::api; | ||||
| using mindspore::dataset::BorderType; | using mindspore::dataset::BorderType; | ||||
| using mindspore::dataset::Tensor; | using mindspore::dataset::Tensor; | ||||
| @@ -20,6 +20,8 @@ | |||||
| #include "minddata/dataset/include/datasets.h" | #include "minddata/dataset/include/datasets.h" | ||||
| #include "minddata/dataset/include/transforms.h" | #include "minddata/dataset/include/transforms.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | |||||
| #include "minddata/dataset/engine/ir/datasetops/batch_node.h" | |||||
| using namespace mindspore::dataset; | using namespace mindspore::dataset; | ||||
| using mindspore::dataset::Tensor; | using mindspore::dataset::Tensor; | ||||