| @@ -17,6 +17,10 @@ else() | |||
| set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") | |||
| endif() | |||
| if (ENABLE_PYTHON) | |||
| add_compile_definitions(ENABLE_PYTHON) | |||
| endif() | |||
| set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC") | |||
| @@ -25,7 +25,7 @@ usage() | |||
| echo "Usage:" | |||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | |||
| echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" | |||
| echo "" | |||
| echo "Options:" | |||
| echo " -d Debug mode" | |||
| @@ -56,6 +56,7 @@ usage() | |||
| echo " -s Enable serving module, default off" | |||
| echo " -B Enable debugger, default off" | |||
| echo " -E Enable IBVERBS for parameter server, default off" | |||
| echo " -l Compile with python dependency, default on" | |||
| } | |||
| # check value of input is 'on' or 'off' | |||
| @@ -98,9 +99,10 @@ checkopts() | |||
| ENABLE_SERVING="off" | |||
| ENABLE_DEBUGGER="off" | |||
| ENABLE_IBVERBS="off" | |||
| ENABLE_PYTHON="on" | |||
| # Process the options | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt | |||
| do | |||
| OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | |||
| case "${opt}" in | |||
| @@ -151,6 +153,10 @@ checkopts() | |||
| check_on_off $OPTARG p | |||
| ENABLE_PROFILE="$OPTARG" | |||
| ;; | |||
| l) | |||
| check_on_off $OPTARG l | |||
| ENABLE_PYTHON="$OPTARG" | |||
| ;; | |||
| i) | |||
| INC_BUILD="on" | |||
| ;; | |||
| @@ -316,6 +322,7 @@ build_mindspore() | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | |||
| fi | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" | |||
| if [[ "X$ENABLE_MPI" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON" | |||
| fi | |||
| @@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF) | |||
| option(ENABLE_AKG "enable akg" OFF) | |||
| option(ENABLE_DEBUGGER "enable debugger" OFF) | |||
| option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF) | |||
| option(ENABLE_PYTHON "Enable python" ON) | |||
| if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") | |||
| if (WIN32) | |||
| @@ -39,6 +39,7 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform) | |||
| include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h | |||
| include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include) | |||
| include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/dataset/include) | |||
| ###################################################################### | |||
| ####################### Flags ######################################## | |||
| @@ -67,7 +68,10 @@ add_dependencies(engine-gnn core) | |||
| add_dependencies(engine core) | |||
| add_dependencies(text core) | |||
| add_dependencies(text-kernels core) | |||
| add_dependencies(APItoPython core) | |||
| add_dependencies(cpp-API core) | |||
| if (ENABLE_PYTHON) | |||
| add_dependencies(APItoPython core) | |||
| endif() | |||
| if (ENABLE_TDTQUE) | |||
| add_dependencies(engine-tdt core) | |||
| endif () | |||
| @@ -78,7 +82,7 @@ set(submodules | |||
| $<TARGET_OBJECTS:kernels> | |||
| $<TARGET_OBJECTS:kernels-image> | |||
| $<TARGET_OBJECTS:kernels-data> | |||
| $<TARGET_OBJECTS:APItoPython> | |||
| $<TARGET_OBJECTS:cpp-API> | |||
| $<TARGET_OBJECTS:engine-datasetops-source> | |||
| $<TARGET_OBJECTS:engine-datasetops-source-sampler> | |||
| $<TARGET_OBJECTS:engine-gnn> | |||
| @@ -90,6 +94,12 @@ set(submodules | |||
| $<TARGET_OBJECTS:text-kernels> | |||
| ) | |||
| if (ENABLE_PYTHON) | |||
| set(submodules | |||
| ${submodules} | |||
| $<TARGET_OBJECTS:APItoPython>) | |||
| endif() | |||
| if (ENABLE_TDTQUE) | |||
| add_library(_c_dataengine SHARED ${submodules} $<TARGET_OBJECTS:engine-tdt>) | |||
| else () | |||
| @@ -1,7 +1,16 @@ | |||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||
| add_library(APItoPython OBJECT | |||
| de_pipeline.cc | |||
| python_bindings.cc | |||
| if (ENABLE_PYTHON) | |||
| add_library(APItoPython OBJECT | |||
| de_pipeline.cc | |||
| python_bindings.cc | |||
| ) | |||
| target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS}) | |||
| endif() | |||
| add_library(cpp-API OBJECT | |||
| datasets.cc | |||
| iterator.cc | |||
| transforms.cc | |||
| samplers.cc | |||
| ) | |||
| target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS}) | |||
| @@ -0,0 +1,446 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <fstream> | |||
| #include "dataset/include/datasets.h" | |||
| #include "dataset/include/transforms.h" | |||
| #include "dataset/include/samplers.h" | |||
| #include "dataset/engine/dataset_iterator.h" | |||
| #include "dataset/engine/datasetops/source/image_folder_op.h" | |||
| #include "dataset/engine/datasetops/source/mnist_op.h" | |||
| #include "dataset/engine/datasetops/source/cifar_op.h" | |||
| #include "dataset/engine/datasetops/batch_op.h" | |||
| #include "dataset/engine/datasetops/map_op.h" | |||
| #include "dataset/engine/datasetops/repeat_op.h" | |||
| #include "dataset/engine/datasetops/shuffle_op.h" | |||
| #include "dataset/engine/datasetops/project_op.h" | |||
| #include "dataset/engine/datasetops/source/sampler/sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/random_sampler.h" | |||
| #include "dataset/core/config_manager.h" | |||
| #include "dataset/util/random.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace api { | |||
| #define RETURN_NULL_IF_ERROR(_s) \ | |||
| do { \ | |||
| Status __rc = (_s); \ | |||
| if (__rc.IsError()) { \ | |||
| return nullptr; \ | |||
| } \ | |||
| } while (false) | |||
| // Function to create the iterator, which will build and launch the execution tree. | |||
| std::shared_ptr<Iterator> Dataset::CreateIterator() { | |||
| std::shared_ptr<Iterator> iter; | |||
| try { | |||
| iter = std::make_shared<Iterator>(); | |||
| Status rc = iter->BuildAndLaunchTree(shared_from_this()); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "CreateIterator failed."; | |||
| return nullptr; | |||
| } | |||
| return iter; | |||
| } catch (const std::exception &err) { | |||
| MS_LOG(ERROR) << "CreateIterator: Iterator exception caught: " << err.what(); | |||
| return nullptr; | |||
| } | |||
| return iter; | |||
| } | |||
| // Constructor | |||
| Dataset::Dataset() { | |||
| // Fetch some default value from config manager | |||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||
| num_workers_ = cfg->num_parallel_workers(); | |||
| rows_per_buffer_ = cfg->rows_per_buffer(); | |||
| connector_que_size_ = cfg->op_connector_size(); | |||
| } | |||
| // Function to create a ImageFolderDataset. | |||
| std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode, | |||
| std::shared_ptr<SamplerObj> sampler, std::set<std::string> extensions, | |||
| std::map<std::string, int32_t> class_indexing) { | |||
| // This arg is exist in ImageFolderOp, but not externalized (in Python API). The default value is false. | |||
| bool recursive = false; | |||
| // Create logical representation of ImageFolderDataset. | |||
| auto ds = std::make_shared<ImageFolderDataset>(dataset_dir, decode, sampler, recursive, extensions, class_indexing); | |||
| // Call derived class validation method. | |||
| return ds->ValidateParams() ? ds : nullptr; | |||
| } | |||
| // Function to create a MnistDataset. | |||
| std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) { | |||
| auto ds = std::make_shared<MnistDataset>(dataset_dir, sampler); | |||
| // Call derived class validation method. | |||
| return ds->ValidateParams() ? ds : nullptr; | |||
| } | |||
| // Function to create a Cifar10Dataset. | |||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples, | |||
| std::shared_ptr<SamplerObj> sampler) { | |||
| auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, num_samples, sampler); | |||
| // Call derived class validation method. | |||
| return ds->ValidateParams() ? ds : nullptr; | |||
| } | |||
| // Function to create a Batch dataset | |||
| std::shared_ptr<BatchDataset> Dataset::Batch(int32_t batch_size, bool drop_remainder) { | |||
| // Default values | |||
| std::vector<std::string> cols_to_map = {}; | |||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map; | |||
| bool pad = false; | |||
| auto ds = std::make_shared<BatchDataset>(batch_size, drop_remainder, pad, cols_to_map, pad_map); | |||
| if (!ds->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| ds->children.push_back(shared_from_this()); | |||
| return ds; | |||
| } | |||
| // Function to create Repeat dataset. | |||
| std::shared_ptr<Dataset> Dataset::Repeat(int32_t count) { | |||
| // Workaround for repeat == 1, do not inject repeat. | |||
| if (count == 1) { | |||
| return shared_from_this(); | |||
| } | |||
| auto ds = std::make_shared<RepeatDataset>(count); | |||
| if (!ds->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| ds->children.push_back(shared_from_this()); | |||
| return ds; | |||
| } | |||
| // Function to create a Map dataset. | |||
| std::shared_ptr<MapDataset> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations, | |||
| std::vector<std::string> input_columns, | |||
| std::vector<std::string> output_columns, | |||
| const std::vector<std::string> &project_columns) { | |||
| auto ds = std::make_shared<MapDataset>(operations, input_columns, output_columns, project_columns); | |||
| if (!ds->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| ds->children.push_back(shared_from_this()); | |||
| return ds; | |||
| } | |||
| // Function to create a ShuffleOp | |||
| std::shared_ptr<ShuffleDataset> Dataset::Shuffle(int32_t shuffle_size) { | |||
| // Pass in reshuffle_each_epoch with true | |||
| auto ds = std::make_shared<ShuffleDataset>(shuffle_size, true); | |||
| if (!ds->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| ds->children.push_back(shared_from_this()); | |||
| return ds; | |||
| } | |||
| // Function to create a ProjectDataset. | |||
| std::shared_ptr<ProjectDataset> Dataset::Project(const std::vector<std::string> &columns) { | |||
| auto ds = std::make_shared<ProjectDataset>(columns); | |||
| // Call derived class validation method. | |||
| if (!ds->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| ds->children.push_back(shared_from_this()); | |||
| return ds; | |||
| } | |||
| // Helper function to create default RandomSampler. | |||
| std::shared_ptr<SamplerObj> CreateDefaultSampler() { | |||
| int32_t num_samples = 0; // 0 means to sample all ids. | |||
| bool replacement = false; | |||
| return std::make_shared<RandomSamplerObj>(replacement, num_samples); | |||
| } | |||
| /* ####################################### Derived Dataset classes ################################# */ | |||
| ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, | |||
| bool recursive, std::set<std::string> extensions, | |||
| std::map<std::string, int32_t> class_indexing) | |||
| : dataset_dir_(dataset_dir), | |||
| decode_(decode), | |||
| sampler_(sampler), | |||
| recursive_(recursive), | |||
| class_indexing_(class_indexing), | |||
| exts_(extensions) {} | |||
| bool ImageFolderDataset::ValidateParams() { | |||
| if (dataset_dir_.empty()) { | |||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ImageFolderDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler, i.e., RandomSampler. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| // Do internal Schema generation. | |||
| // This arg is exist in ImageFolderOp, but not externalized (in Python API). | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| RETURN_NULL_IF_ERROR( | |||
| schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| RETURN_NULL_IF_ERROR( | |||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar))); | |||
| node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||
| recursive_, decode_, exts_, class_indexing_, std::move(schema), | |||
| std::move(sampler_->Build()))); | |||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||
| } | |||
| MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) | |||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | |||
| bool MnistDataset::ValidateParams() { | |||
| if (dataset_dir_.empty()) { | |||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MnistDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler, i.e., RandomSampler. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| // Do internal Schema generation. | |||
| auto schema = std::make_unique<DataSchema>(); | |||
| RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| RETURN_NULL_IF_ERROR( | |||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); | |||
| node_ops.push_back(std::make_shared<MnistOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||
| std::move(schema), std::move(sampler_->Build()))); | |||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||
| } | |||
| BatchDataset::BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map, | |||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map) | |||
| : batch_size_(batch_size), | |||
| drop_remainder_(drop_remainder), | |||
| pad_(pad), | |||
| cols_to_map_(cols_to_map), | |||
| pad_map_(pad_map) {} | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> BatchDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| #ifdef ENABLE_PYTHON | |||
| py::function noop; | |||
| node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, | |||
| cols_to_map_, noop, noop, pad_map_)); | |||
| #else | |||
| node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, | |||
| cols_to_map_, pad_map_)); | |||
| #endif | |||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||
| } | |||
| bool BatchDataset::ValidateParams() { | |||
| if (batch_size_ <= 0) { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| RepeatDataset::RepeatDataset(uint32_t count) : repeat_count_(count) {} | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> RepeatDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| node_ops.push_back(std::make_shared<RepeatOp>(repeat_count_)); | |||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||
| } | |||
| bool RepeatDataset::ValidateParams() { | |||
| if (repeat_count_ <= 0) { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| MapDataset::MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns, | |||
| std::vector<std::string> output_columns, const std::vector<std::string> &project_columns) | |||
| : operations_(operations), | |||
| input_columns_(input_columns), | |||
| output_columns_(output_columns), | |||
| project_columns_(project_columns) {} | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MapDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // Currently default is true, and this is not exposed to user. | |||
| bool perf_mode = true; | |||
| std::vector<std::shared_ptr<TensorOp>> tensor_ops; | |||
| // Build tensorOp from tensorOperation vector | |||
| // This is to ensure each iterator hold its own copy of the tensorOp objects. | |||
| (void)std::transform( | |||
| operations_.begin(), operations_.end(), std::back_inserter(tensor_ops), | |||
| [](std::shared_ptr<TensorOperation> operation) -> std::shared_ptr<TensorOp> { return operation->Build(); }); | |||
| // This parameter will be removed with next rebase | |||
| std::vector<std::string> col_orders; | |||
| auto map_op = | |||
| std::make_shared<MapOp>(input_columns_, output_columns_, tensor_ops, num_workers_, connector_que_size_, perf_mode); | |||
| if (!project_columns_.empty()) { | |||
| auto project_op = std::make_shared<ProjectOp>(project_columns_); | |||
| node_ops.push_back(project_op); | |||
| } | |||
| node_ops.push_back(map_op); | |||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||
| } | |||
| bool MapDataset::ValidateParams() { | |||
| if (operations_.empty()) { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| // Constructor for ShuffleDataset | |||
| ShuffleDataset::ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch) | |||
| : shuffle_size_(shuffle_size), shuffle_seed_(GetSeed()), reset_every_epoch_(reset_every_epoch) {} | |||
| // Function to build the ShuffleOp | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ShuffleDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| node_ops.push_back(std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_, | |||
| rows_per_buffer_)); | |||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||
| } | |||
| // Function to validate the parameters for ShuffleDataset | |||
| bool ShuffleDataset::ValidateParams() { | |||
| if (shuffle_size_ <= 1) { | |||
| MS_LOG(ERROR) << "ShuffleDataset: Invalid input, shuffle_size: " << shuffle_size_; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| // Constructor for Cifar10Dataset | |||
| Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler) | |||
| : dataset_dir_(dataset_dir), num_samples_(num_samples), sampler_(sampler) {} | |||
| bool Cifar10Dataset::ValidateParams() { | |||
| if (dataset_dir_.empty()) { | |||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||
| return false; | |||
| } | |||
| if (num_samples_ < 0) { | |||
| MS_LOG(ERROR) << "Number of samples cannot be negative"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| // Function to build CifarOp | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Cifar10Dataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = CreateDefaultSampler(); | |||
| } | |||
| // Do internal Schema generation. | |||
| auto schema = std::make_unique<DataSchema>(); | |||
| RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| RETURN_NULL_IF_ERROR( | |||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); | |||
| node_ops.push_back(std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, num_workers_, rows_per_buffer_, | |||
| dataset_dir_, connector_que_size_, std::move(schema), | |||
| std::move(sampler_->Build()))); | |||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||
| } | |||
| // Function to build ProjectOp | |||
| ProjectDataset::ProjectDataset(const std::vector<std::string> &columns) : columns_(columns) {} | |||
| bool ProjectDataset::ValidateParams() { | |||
| if (columns_.empty()) { | |||
| MS_LOG(ERROR) << "No columns are specified."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ProjectDataset::Build() { | |||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||
| node_ops.push_back(std::make_shared<ProjectOp>(columns_)); | |||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||
| } | |||
| } // namespace api | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,101 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "dataset/include/iterator.h" | |||
| #include "dataset/core/client.h" | |||
| #include "dataset/include/datasets.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace api { | |||
| // Get the next row from the data pipeline. | |||
| void Iterator::GetNextRow(TensorMap *row) { | |||
| Status rc = iterator_->GetNextAsMap(row); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "GetNextRow: Failed to get next row."; | |||
| row->clear(); | |||
| } | |||
| } | |||
| // Shut down the data pipeline. | |||
| void Iterator::Stop() { | |||
| // Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_. | |||
| iterator_.reset(); | |||
| // Release ownership of tree_ shared pointer. This will decrement the ref count. | |||
| tree_.reset(); | |||
| } | |||
| // Function to build and launch the execution tree. | |||
| Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds) { | |||
| // One time init | |||
| Status rc; | |||
| rc = GlobalInit(); | |||
| RETURN_IF_NOT_OK(rc); | |||
| // Instantiate the execution tree | |||
| tree_ = std::make_shared<ExecutionTree>(); | |||
| // Iterative BFS converting Dataset tree into runtime Execution tree. | |||
| std::queue<std::pair<std::shared_ptr<Dataset>, std::shared_ptr<DatasetOp>>> q; | |||
| if (ds != nullptr) { | |||
| // Convert the current root node. | |||
| auto root_op = ds->Build()->front(); | |||
| RETURN_UNEXPECTED_IF_NULL(root_op); | |||
| RETURN_IF_NOT_OK(tree_->AssociateNode(root_op)); | |||
| q.push(std::make_pair(ds, root_op)); | |||
| // Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes) | |||
| while (!q.empty()) { | |||
| auto node_pair = q.front(); | |||
| q.pop(); | |||
| // Iterate through all the direct children of the first element in our BFS queue | |||
| for (auto child : node_pair.first->children) { | |||
| auto child_ops = child->Build(); | |||
| RETURN_UNEXPECTED_IF_NULL(child_ops); | |||
| auto node_op = node_pair.second; | |||
| // Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them | |||
| // with the execution tree and add the child and parent relationship between the nodes | |||
| // Note that some Dataset objects might return more than one DatasetOps | |||
| // e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset | |||
| for (auto child_op : *child_ops) { | |||
| RETURN_IF_NOT_OK(tree_->AssociateNode(child_op)); | |||
| RETURN_IF_NOT_OK(node_op->AddChild(child_op)); | |||
| node_op = child_op; | |||
| } | |||
| // Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current | |||
| // execution tree) to the BFS queue | |||
| q.push(std::make_pair(child, child_ops->back())); | |||
| } | |||
| } | |||
| RETURN_IF_NOT_OK(tree_->AssignRoot(root_op)); | |||
| } | |||
| // Launch the execution tree. | |||
| RETURN_IF_NOT_OK(tree_->Prepare()); | |||
| RETURN_IF_NOT_OK(tree_->Launch()); | |||
| iterator_ = std::make_unique<DatasetIterator>(tree_); | |||
| RETURN_UNEXPECTED_IF_NULL(iterator_); | |||
| return rc; | |||
| } | |||
| } // namespace api | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -297,7 +297,7 @@ void bindTensor(py::module *m) { | |||
| })) | |||
| .def_buffer([](Tensor &tensor) { | |||
| py::buffer_info info; | |||
| THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info)); | |||
| THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info)); | |||
| return info; | |||
| }) | |||
| .def("__str__", &Tensor::ToString) | |||
| @@ -311,7 +311,7 @@ void bindTensor(py::module *m) { | |||
| return res; | |||
| } | |||
| py::buffer_info info; | |||
| THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info)); | |||
| THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info)); | |||
| return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t); | |||
| }); | |||
| @@ -0,0 +1,224 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "dataset/include/samplers.h" | |||
| #include "dataset/engine/datasetops/source/sampler/sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/random_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/pk_sampler.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace api { | |||
| SamplerObj::SamplerObj() {} | |||
| /// Function to create a Distributed Sampler. | |||
| std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle, | |||
| int64_t num_samples, uint32_t seed) { | |||
| auto sampler = std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed); | |||
| // Input validation | |||
| if (!sampler->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return sampler; | |||
| } | |||
| /// Function to create a PK Sampler. | |||
| std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle, int64_t num_samples) { | |||
| auto sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples); | |||
| // Input validation | |||
| if (!sampler->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return sampler; | |||
| } | |||
| /// Function to create a Random Sampler. | |||
| std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement, int64_t num_samples) { | |||
| auto sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples); | |||
| // Input validation | |||
| if (!sampler->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return sampler; | |||
| } | |||
| /// Function to create a Sequential Sampler. | |||
| std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index, int64_t num_samples) { | |||
| auto sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples); | |||
| // Input validation | |||
| if (!sampler->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return sampler; | |||
| } | |||
| /// Function to create a Subset Random Sampler. | |||
| std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, int64_t num_samples) { | |||
| auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples); | |||
| // Input validation | |||
| if (!sampler->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return sampler; | |||
| } | |||
| /// Function to create a Weighted Random Sampler. | |||
| std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, int64_t num_samples, | |||
| bool replacement) { | |||
| auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement); | |||
| // Input validation | |||
| if (!sampler->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return sampler; | |||
| } | |||
| /* ####################################### Derived Sampler classes ################################# */ | |||
| // DistributedSampler | |||
| DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, | |||
| uint32_t seed) | |||
| : num_shards_(num_shards), shard_id_(shard_id), shuffle_(shuffle), num_samples_(num_samples), seed_(seed) {} | |||
| bool DistributedSamplerObj::ValidateParams() { | |||
| if (num_shards_ <= 0) { | |||
| MS_LOG(ERROR) << "DistributedSampler: invalid num_shards: " << num_shards_; | |||
| return false; | |||
| } | |||
| if (shard_id_ < 0 || shard_id_ >= num_shards_) { | |||
| MS_LOG(ERROR) << "DistributedSampler: invalid input, shard_id: " << shard_id_ << ", num_shards: " << num_shards_; | |||
| return false; | |||
| } | |||
| if (num_samples_ < 0) { | |||
| MS_LOG(ERROR) << "DistributedSampler: invalid num_samples: " << num_samples_; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<Sampler> DistributedSamplerObj::Build() { | |||
| return std::make_shared<dataset::DistributedSampler>(num_samples_, num_shards_, shard_id_, shuffle_, seed_); | |||
| } | |||
| // PKSampler | |||
| PKSamplerObj::PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples) | |||
| : num_val_(num_val), shuffle_(shuffle), num_samples_(num_samples) {} | |||
| bool PKSamplerObj::ValidateParams() { | |||
| if (num_val_ <= 0) { | |||
| MS_LOG(ERROR) << "PKSampler: invalid num_val: " << num_val_; | |||
| return false; | |||
| } | |||
| if (num_samples_ < 0) { | |||
| MS_LOG(ERROR) << "PKSampler: invalid num_samples: " << num_samples_; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<Sampler> PKSamplerObj::Build() { | |||
| return std::make_shared<dataset::PKSampler>(num_samples_, num_val_, shuffle_); | |||
| } | |||
| // RandomSampler | |||
| RandomSamplerObj::RandomSamplerObj(bool replacement, int64_t num_samples) | |||
| : replacement_(replacement), num_samples_(num_samples) {} | |||
| bool RandomSamplerObj::ValidateParams() { | |||
| if (num_samples_ < 0) { | |||
| MS_LOG(ERROR) << "RandomSampler: invalid num_samples: " << num_samples_; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<Sampler> RandomSamplerObj::Build() { | |||
| bool reshuffle_each_epoch = true; | |||
| auto sampler = std::make_shared<dataset::RandomSampler>(num_samples_, replacement_, reshuffle_each_epoch); | |||
| return sampler; | |||
| } | |||
| // SequentialSampler | |||
| SequentialSamplerObj::SequentialSamplerObj(int64_t start_index, int64_t num_samples) | |||
| : start_index_(start_index), num_samples_(num_samples) {} | |||
| bool SequentialSamplerObj::ValidateParams() { | |||
| if (num_samples_ < 0) { | |||
| MS_LOG(ERROR) << "SequentialSampler: invalid num_samples: " << num_samples_; | |||
| return false; | |||
| } | |||
| if (start_index_ < 0) { | |||
| MS_LOG(ERROR) << "SequentialSampler: invalid start_index: " << start_index_; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<Sampler> SequentialSamplerObj::Build() { | |||
| auto sampler = std::make_shared<dataset::SequentialSampler>(num_samples_, start_index_); | |||
| return sampler; | |||
| } | |||
| // SubsetRandomSampler | |||
| SubsetRandomSamplerObj::SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples) | |||
| : indices_(indices), num_samples_(num_samples) {} | |||
| bool SubsetRandomSamplerObj::ValidateParams() { | |||
| if (num_samples_ < 0) { | |||
| MS_LOG(ERROR) << "SubsetRandomSampler: invalid num_samples: " << num_samples_; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<Sampler> SubsetRandomSamplerObj::Build() { | |||
| auto sampler = std::make_shared<dataset::SubsetRandomSampler>(num_samples_, indices_); | |||
| return sampler; | |||
| } | |||
| // WeightedRandomSampler | |||
| WeightedRandomSamplerObj::WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples, | |||
| bool replacement) | |||
| : weights_(weights), num_samples_(num_samples), replacement_(replacement) {} | |||
| bool WeightedRandomSamplerObj::ValidateParams() { | |||
| if (num_samples_ < 0) { | |||
| MS_LOG(ERROR) << "WeightedRandomSampler: invalid num_samples: " << num_samples_; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<Sampler> WeightedRandomSamplerObj::Build() { | |||
| auto sampler = std::make_shared<dataset::WeightedRandomSampler>(num_samples_, weights_, replacement_); | |||
| return sampler; | |||
| } | |||
| } // namespace api | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,491 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "dataset/include/transforms.h" | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include "dataset/kernels/image/normalize_op.h" | |||
| #include "dataset/kernels/image/decode_op.h" | |||
| #include "dataset/kernels/image/resize_op.h" | |||
| #include "dataset/kernels/image/random_crop_op.h" | |||
| #include "dataset/kernels/image/center_crop_op.h" | |||
| #include "dataset/kernels/image/uniform_aug_op.h" | |||
| #include "dataset/kernels/image/random_horizontal_flip_op.h" | |||
| #include "dataset/kernels/image/random_vertical_flip_op.h" | |||
| #include "dataset/kernels/image/random_rotation_op.h" | |||
| #include "dataset/kernels/image/cut_out_op.h" | |||
| #include "dataset/kernels/image/random_color_adjust_op.h" | |||
| #include "dataset/kernels/image/pad_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace api { | |||
| TensorOperation::TensorOperation() {} | |||
| // Transform operations for computer vision. | |||
| namespace vision { | |||
| // Function to create NormalizeOperation. | |||
| std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std) { | |||
| auto op = std::make_shared<NormalizeOperation>(mean, std); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create DecodeOperation. | |||
| std::shared_ptr<DecodeOperation> Decode(bool rgb) { | |||
| auto op = std::make_shared<DecodeOperation>(rgb); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create ResizeOperation. | |||
| std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, InterpolationMode interpolation) { | |||
| auto op = std::make_shared<ResizeOperation>(size, interpolation); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create RandomCropOperation. | |||
| std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding, | |||
| bool pad_if_needed, std::vector<uint8_t> fill_value) { | |||
| auto op = std::make_shared<RandomCropOperation>(size, padding, pad_if_needed, fill_value); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create CenterCropOperation. | |||
| std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size) { | |||
| auto op = std::make_shared<CenterCropOperation>(size); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create UniformAugOperation. | |||
| std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations, | |||
| int32_t num_ops) { | |||
| auto op = std::make_shared<UniformAugOperation>(operations, num_ops); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create RandomHorizontalFlipOperation. | |||
| std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob) { | |||
| auto op = std::make_shared<RandomHorizontalFlipOperation>(prob); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create RandomVerticalFlipOperation. | |||
| std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob) { | |||
| auto op = std::make_shared<RandomVerticalFlipOperation>(prob); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create RandomRotationOperation. | |||
| std::shared_ptr<RandomRotationOperation> RandomRotation(std::vector<float> degrees, InterpolationMode resample, | |||
| bool expand, std::vector<float> center, | |||
| std::vector<uint8_t> fill_value) { | |||
| auto op = std::make_shared<RandomRotationOperation>(degrees, resample, expand, center, fill_value); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create PadOperation. | |||
| std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, | |||
| BorderType padding_mode) { | |||
| auto op = std::make_shared<PadOperation>(padding, fill_value, padding_mode); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create CutOutOp. | |||
| std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches) { | |||
| auto op = std::make_shared<CutOutOperation>(length, num_patches); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| // Function to create RandomColorAdjustOperation. | |||
| std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness, | |||
| std::vector<float> contrast, | |||
| std::vector<float> saturation, std::vector<float> hue) { | |||
| auto op = std::make_shared<RandomColorAdjustOperation>(brightness, contrast, saturation, hue); | |||
| // Input validation | |||
| if (!op->ValidateParams()) { | |||
| return nullptr; | |||
| } | |||
| return op; | |||
| } | |||
| /* ####################################### Derived TensorOperation classes ################################# */ | |||
| // NormalizeOperation | |||
| NormalizeOperation::NormalizeOperation(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {} | |||
| bool NormalizeOperation::ValidateParams() { | |||
| if (mean_.size() != 3) { | |||
| MS_LOG(ERROR) << "Normalize: mean vector has incorrect size: " << mean_.size(); | |||
| return false; | |||
| } | |||
| if (std_.size() != 3) { | |||
| MS_LOG(ERROR) << "Normalize: std vector has incorrect size: " << std_.size(); | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<TensorOp> NormalizeOperation::Build() { | |||
| return std::make_shared<NormalizeOp>(mean_[0], mean_[1], mean_[2], std_[0], std_[1], std_[2]); | |||
| } | |||
| // DecodeOperation | |||
| DecodeOperation::DecodeOperation(bool rgb) : rgb_(rgb) {} | |||
| bool DecodeOperation::ValidateParams() { return true; } | |||
| std::shared_ptr<TensorOp> DecodeOperation::Build() { return std::make_shared<DecodeOp>(rgb_); } | |||
| // ResizeOperation | |||
| ResizeOperation::ResizeOperation(std::vector<int32_t> size, InterpolationMode interpolation) | |||
| : size_(size), interpolation_(interpolation) {} | |||
| bool ResizeOperation::ValidateParams() { | |||
| if (size_.empty() || size_.size() > 2) { | |||
| MS_LOG(ERROR) << "Resize: size vector has incorrect size: " << size_.size(); | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<TensorOp> ResizeOperation::Build() { | |||
| int32_t height = size_[0]; | |||
| int32_t width = 0; | |||
| // User specified the width value. | |||
| if (size_.size() == 2) { | |||
| width = size_[1]; | |||
| } | |||
| return std::make_shared<ResizeOp>(height, width, interpolation_); | |||
| } | |||
| // RandomCropOperation | |||
| RandomCropOperation::RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding, bool pad_if_needed, | |||
| std::vector<uint8_t> fill_value) | |||
| : size_(size), padding_(padding), pad_if_needed_(pad_if_needed), fill_value_(fill_value) {} | |||
| bool RandomCropOperation::ValidateParams() { | |||
| if (size_.empty() || size_.size() > 2) { | |||
| MS_LOG(ERROR) << "RandomCrop: size vector has incorrect size: " << size_.size(); | |||
| return false; | |||
| } | |||
| if (padding_.empty() || padding_.size() != 4) { | |||
| MS_LOG(ERROR) << "RandomCrop: padding vector has incorrect size: padding.size()"; | |||
| return false; | |||
| } | |||
| if (fill_value_.empty() || fill_value_.size() != 3) { | |||
| MS_LOG(ERROR) << "RandomCrop: fill_value vector has incorrect size: fill_value.size()"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<TensorOp> RandomCropOperation::Build() { | |||
| int32_t crop_height = size_[0]; | |||
| int32_t crop_width = 0; | |||
| int32_t pad_top = padding_[0]; | |||
| int32_t pad_bottom = padding_[1]; | |||
| int32_t pad_left = padding_[2]; | |||
| int32_t pad_right = padding_[3]; | |||
| uint8_t fill_r = fill_value_[0]; | |||
| uint8_t fill_g = fill_value_[1]; | |||
| uint8_t fill_b = fill_value_[2]; | |||
| // User has specified the crop_width value. | |||
| if (size_.size() == 2) { | |||
| crop_width = size_[1]; | |||
| } | |||
| auto tensor_op = std::make_shared<RandomCropOp>(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, | |||
| BorderType::kConstant, pad_if_needed_, fill_r, fill_g, fill_b); | |||
| return tensor_op; | |||
| } | |||
| // CenterCropOperation | |||
| CenterCropOperation::CenterCropOperation(std::vector<int32_t> size) : size_(size) {} | |||
| bool CenterCropOperation::ValidateParams() { | |||
| if (size_.empty() || size_.size() > 2) { | |||
| MS_LOG(ERROR) << "CenterCrop: size vector has incorrect size."; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<TensorOp> CenterCropOperation::Build() { | |||
| int32_t crop_height = size_[0]; | |||
| int32_t crop_width = 0; | |||
| // User has specified crop_width. | |||
| if (size_.size() == 2) { | |||
| crop_width = size_[1]; | |||
| } | |||
| std::shared_ptr<CenterCropOp> tensor_op = std::make_shared<CenterCropOp>(crop_height, crop_width); | |||
| return tensor_op; | |||
| } | |||
| // UniformAugOperation | |||
| UniformAugOperation::UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops) | |||
| : operations_(operations), num_ops_(num_ops) {} | |||
| bool UniformAugOperation::ValidateParams() { return true; } | |||
| std::shared_ptr<TensorOp> UniformAugOperation::Build() { | |||
| std::vector<std::shared_ptr<TensorOp>> tensor_ops; | |||
| (void)std::transform(operations_.begin(), operations_.end(), std::back_inserter(tensor_ops), | |||
| [](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); }); | |||
| std::shared_ptr<UniformAugOp> tensor_op = std::make_shared<UniformAugOp>(tensor_ops, num_ops_); | |||
| return tensor_op; | |||
| } | |||
| // RandomHorizontalFlipOperation | |||
| RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability) : probability_(probability) {} | |||
| bool RandomHorizontalFlipOperation::ValidateParams() { return true; } | |||
| std::shared_ptr<TensorOp> RandomHorizontalFlipOperation::Build() { | |||
| std::shared_ptr<RandomHorizontalFlipOp> tensor_op = std::make_shared<RandomHorizontalFlipOp>(probability_); | |||
| return tensor_op; | |||
| } | |||
| // RandomVerticalFlipOperation | |||
| RandomVerticalFlipOperation::RandomVerticalFlipOperation(float probability) : probability_(probability) {} | |||
| bool RandomVerticalFlipOperation::ValidateParams() { return true; } | |||
| std::shared_ptr<TensorOp> RandomVerticalFlipOperation::Build() { | |||
| std::shared_ptr<RandomVerticalFlipOp> tensor_op = std::make_shared<RandomVerticalFlipOp>(probability_); | |||
| return tensor_op; | |||
| } | |||
| // Function to create RandomRotationOperation. | |||
| RandomRotationOperation::RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, | |||
| bool expand, std::vector<float> center, | |||
| std::vector<uint8_t> fill_value) | |||
| : degrees_(degrees), | |||
| interpolation_mode_(interpolation_mode), | |||
| expand_(expand), | |||
| center_(center), | |||
| fill_value_(fill_value) {} | |||
| bool RandomRotationOperation::ValidateParams() { | |||
| if (degrees_.empty() || degrees_.size() != 2) { | |||
| MS_LOG(ERROR) << "RandomRotation: degrees vector has incorrect size: degrees.size()"; | |||
| return false; | |||
| } | |||
| if (center_.empty() || center_.size() != 2) { | |||
| MS_LOG(ERROR) << "RandomRotation: center vector has incorrect size: center.size()"; | |||
| return false; | |||
| } | |||
| if (fill_value_.empty() || fill_value_.size() != 3) { | |||
| MS_LOG(ERROR) << "RandomRotation: fill_value vector has incorrect size: fill_value.size()"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<TensorOp> RandomRotationOperation::Build() { | |||
| std::shared_ptr<RandomRotationOp> tensor_op = | |||
| std::make_shared<RandomRotationOp>(degrees_[0], degrees_[1], center_[0], center_[1], interpolation_mode_, expand_, | |||
| fill_value_[0], fill_value_[1], fill_value_[2]); | |||
| return tensor_op; | |||
| } | |||
| // PadOperation | |||
| PadOperation::PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, BorderType padding_mode) | |||
| : padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {} | |||
| bool PadOperation::ValidateParams() { | |||
| if (padding_.empty() || padding_.size() == 3 || padding_.size() > 4) { | |||
| MS_LOG(ERROR) << "Pad: padding vector has incorrect size: padding.size()"; | |||
| return false; | |||
| } | |||
| if (fill_value_.empty() || (fill_value_.size() != 1 && fill_value_.size() != 3)) { | |||
| MS_LOG(ERROR) << "Pad: fill_value vector has incorrect size: fill_value.size()"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<TensorOp> PadOperation::Build() { | |||
| int32_t pad_top, pad_bottom, pad_left, pad_right; | |||
| switch (padding_.size()) { | |||
| case 1: | |||
| pad_left = padding_[0]; | |||
| pad_top = padding_[0]; | |||
| pad_right = padding_[0]; | |||
| pad_bottom = padding_[0]; | |||
| break; | |||
| case 2: | |||
| pad_left = padding_[0]; | |||
| pad_top = padding_[1]; | |||
| pad_right = padding_[0]; | |||
| pad_bottom = padding_[1]; | |||
| break; | |||
| default: | |||
| pad_left = padding_[0]; | |||
| pad_top = padding_[1]; | |||
| pad_right = padding_[2]; | |||
| pad_bottom = padding_[3]; | |||
| } | |||
| uint8_t fill_r, fill_g, fill_b; | |||
| fill_r = fill_value_[0]; | |||
| fill_g = fill_value_[0]; | |||
| fill_b = fill_value_[0]; | |||
| if (fill_value_.size() == 3) { | |||
| fill_r = fill_value_[0]; | |||
| fill_g = fill_value_[1]; | |||
| fill_b = fill_value_[2]; | |||
| } | |||
| std::shared_ptr<PadOp> tensor_op = | |||
| std::make_shared<PadOp>(pad_top, pad_bottom, pad_left, pad_right, padding_mode_, fill_r, fill_g, fill_b); | |||
| return tensor_op; | |||
| } | |||
| // CutOutOperation | |||
| CutOutOperation::CutOutOperation(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {} | |||
| bool CutOutOperation::ValidateParams() { | |||
| if (length_ < 0) { | |||
| MS_LOG(ERROR) << "CutOut: length cannot be negative"; | |||
| return false; | |||
| } | |||
| if (num_patches_ < 0) { | |||
| MS_LOG(ERROR) << "CutOut: number of patches cannot be negative"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<TensorOp> CutOutOperation::Build() { | |||
| std::shared_ptr<CutOutOp> tensor_op = std::make_shared<CutOutOp>(length_, length_, num_patches_, false, 0, 0, 0); | |||
| return tensor_op; | |||
| } | |||
| // RandomColorAdjustOperation. | |||
| RandomColorAdjustOperation::RandomColorAdjustOperation(std::vector<float> brightness, std::vector<float> contrast, | |||
| std::vector<float> saturation, std::vector<float> hue) | |||
| : brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {} | |||
| bool RandomColorAdjustOperation::ValidateParams() { | |||
| // Do some input validation. | |||
| if (brightness_.empty() || brightness_.size() > 2) { | |||
| MS_LOG(ERROR) << "RandomColorAdjust: brightness must be a vector of one or two values"; | |||
| return false; | |||
| } | |||
| if (contrast_.empty() || contrast_.size() > 2) { | |||
| MS_LOG(ERROR) << "RandomColorAdjust: contrast must be a vector of one or two values"; | |||
| return false; | |||
| } | |||
| if (saturation_.empty() || saturation_.size() > 2) { | |||
| MS_LOG(ERROR) << "RandomColorAdjust: saturation must be a vector of one or two values"; | |||
| return false; | |||
| } | |||
| if (hue_.empty() || hue_.size() > 2) { | |||
| MS_LOG(ERROR) << "RandomColorAdjust: hue must be a vector of one or two values"; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| std::shared_ptr<TensorOp> RandomColorAdjustOperation::Build() { | |||
| float brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub; | |||
| brightness_lb = brightness_[0]; | |||
| brightness_ub = brightness_[0]; | |||
| if (brightness_.size() == 2) brightness_ub = brightness_[1]; | |||
| contrast_lb = contrast_[0]; | |||
| contrast_ub = contrast_[0]; | |||
| if (contrast_.size() == 2) contrast_ub = contrast_[1]; | |||
| saturation_lb = saturation_[0]; | |||
| saturation_ub = saturation_[0]; | |||
| if (saturation_.size() == 2) saturation_ub = saturation_[1]; | |||
| hue_lb = hue_[0]; | |||
| hue_ub = hue_[0]; | |||
| if (hue_.size() == 2) hue_ub = hue_[1]; | |||
| std::shared_ptr<RandomColorAdjustOp> tensor_op = std::make_shared<RandomColorAdjustOp>( | |||
| brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub); | |||
| return tensor_op; | |||
| } | |||
| } // namespace vision | |||
| } // namespace api | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -1,10 +1,6 @@ | |||
| ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto) | |||
| ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto) | |||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||
| add_library(core OBJECT | |||
| ${EXAMPLE_SRCS} | |||
| ${FEATURE_SRCS} | |||
| set(DATASET_CORE_SRC_FILES | |||
| client.cc | |||
| config_manager.cc | |||
| cv_tensor.cc | |||
| @@ -13,6 +9,13 @@ add_library(core OBJECT | |||
| tensor.cc | |||
| tensor_row.cc | |||
| tensor_shape.cc | |||
| ) | |||
| ) | |||
| ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto) | |||
| ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto) | |||
| add_library(core OBJECT ${DATASET_CORE_SRC_FILES} ${EXAMPLE_SRCS} ${FEATURE_SRCS}) | |||
| add_dependencies(core mindspore::protobuf) | |||
| target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS}) | |||
| if (ENABLE_PYTHON) | |||
| target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS}) | |||
| endif() | |||
| @@ -25,21 +25,25 @@ | |||
| #include "dataset/core/tensor_shape.h" | |||
| #include "dataset/engine/data_schema.h" | |||
| #include "dataset/engine/dataset_iterator.h" | |||
| #include "dataset/engine/datasetops/source/mindrecord_op.h" | |||
| #include "dataset/engine/datasetops/source/tf_reader_op.h" | |||
| #ifdef ENABLE_PYTHON | |||
| #include "dataset/engine/datasetops/barrier_op.h" | |||
| #include "dataset/engine/datasetops/batch_op.h" | |||
| #include "dataset/engine/datasetops/filter_op.h" | |||
| #include "dataset/engine/datasetops/source/generator_op.h" | |||
| #include "dataset/engine/datasetops/build_vocab_op.h" | |||
| #endif | |||
| #include "dataset/engine/datasetops/batch_op.h" | |||
| #include "dataset/engine/datasetops/dataset_op.h" | |||
| #include "dataset/engine/datasetops/device_queue_op.h" | |||
| #include "dataset/engine/datasetops/map_op.h" | |||
| #include "dataset/engine/datasetops/project_op.h" | |||
| #include "dataset/engine/datasetops/rename_op.h" | |||
| #include "dataset/engine/datasetops/filter_op.h" | |||
| #include "dataset/engine/datasetops/repeat_op.h" | |||
| #include "dataset/engine/datasetops/skip_op.h" | |||
| #include "dataset/engine/datasetops/shuffle_op.h" | |||
| #include "dataset/engine/datasetops/source/generator_op.h" | |||
| #include "dataset/engine/datasetops/source/mindrecord_op.h" | |||
| #include "dataset/engine/datasetops/source/tf_reader_op.h" | |||
| #include "dataset/engine/datasetops/take_op.h" | |||
| #include "dataset/engine/datasetops/zip_op.h" | |||
| #include "dataset/engine/datasetops/concat_op.h" | |||
| @@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf }; | |||
| // Possible flavours of Tensor implementations | |||
| enum class TensorImpl { kNone, kFlexible, kCv, kNP }; | |||
| // Possible values for Border types | |||
| enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; | |||
| // Possible interpolation modes | |||
| enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; | |||
| // convenience functions for 32bit int bitmask | |||
| inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; } | |||
| @@ -14,11 +14,12 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "dataset/core/data_type.h" | |||
| #ifdef ENABLE_PYTHON | |||
| #include "dataset/core/pybind_support.h" | |||
| #endif | |||
| #include "utils/log_adapter.h" | |||
| #include "dataset/core/pybind_support.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const { | |||
| return 0; | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| py::dtype DataType::AsNumpyType() const { | |||
| if (type_ < DataType::NUM_OF_TYPES) | |||
| return py::dtype(kTypeInfo[type_].pybindType_); | |||
| else | |||
| return py::dtype("unknown"); | |||
| } | |||
| #endif | |||
| uint8_t DataType::AsCVType() const { | |||
| uint8_t res = kCVInvalidType; | |||
| @@ -112,6 +115,7 @@ std::string DataType::ToString() const { | |||
| return "unknown"; | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| DataType DataType::FromNpArray(const py::array &arr) { | |||
| if (py::isinstance<py::array_t<bool>>(arr)) { | |||
| return DataType(DataType::DE_BOOL); | |||
| @@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const { | |||
| } | |||
| return res; | |||
| } | |||
| #endif | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -19,14 +19,16 @@ | |||
| #include <opencv2/core/hal/interface.h> | |||
| #include <string> | |||
| #ifdef ENABLE_PYTHON | |||
| #include "pybind11/numpy.h" | |||
| #include "pybind11/pybind11.h" | |||
| #include "dataset/core/constants.h" | |||
| #include "dataset/core/pybind_support.h" | |||
| namespace py = pybind11; | |||
| #else | |||
| #include "Eigen/Core" | |||
| using float16 = Eigen::half; | |||
| #endif | |||
| #include "dataset/core/constants.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -59,6 +61,7 @@ class DataType { | |||
| const uint8_t cvType_; // OpenCv matching type | |||
| }; | |||
| #ifdef ENABLE_PYTHON | |||
| static inline const TypeInfo kTypeInfo[] = { | |||
| // name, sizeInBytes, pybindTypem formatDescriptor, openCV | |||
| {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN | |||
| @@ -76,19 +79,38 @@ class DataType { | |||
| {"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F}, // DE_FLOAT64 | |||
| {"string", 0, "bytes", "S", kCVInvalidType} // DE_STRING | |||
| }; | |||
| #else | |||
| static inline const TypeInfo kTypeInfo[] = { | |||
| // name, sizeInBytes, pybindTypem formatDescriptor, openCV | |||
| {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN | |||
| {"bool", 1, "bool", "", CV_8U}, // DE_BOOL | |||
| {"int8", 1, "int8", "", CV_8S}, // DE_INT8 | |||
| {"uint8", 1, "uint8", "", CV_8U}, // DE_UINT8 | |||
| {"int16", 2, "int16", "", CV_16S}, // DE_INT16 | |||
| {"uint16", 2, "uint16", "", CV_16U}, // DE_UINT16 | |||
| {"int32", 4, "int32", "", CV_32S}, // DE_INT32 | |||
| {"uint32", 4, "uint32", "", kCVInvalidType}, // DE_UINT32 | |||
| {"int64", 8, "int64", "", kCVInvalidType}, // DE_INT64 | |||
| {"uint64", 8, "uint64", "", kCVInvalidType}, // DE_UINT64 | |||
| {"float16", 2, "float16", "", CV_16F}, // DE_FLOAT16 | |||
| {"float32", 4, "float32", "", CV_32F}, // DE_FLOAT32 | |||
| {"float64", 8, "double", "", CV_64F}, // DE_FLOAT64 | |||
| {"string", 0, "bytes", "", kCVInvalidType} // DE_STRING | |||
| }; | |||
| #endif | |||
| // No arg constructor to create an unknown shape | |||
| DataType() : type_(DE_UNKNOWN) {} | |||
| // Create a type from a given string | |||
| // @param type_str | |||
| /// \param type_str | |||
| explicit DataType(const std::string &type_str); | |||
| // Default destructor | |||
| ~DataType() = default; | |||
| // Create a type from a given enum | |||
| // @param d | |||
| /// \param d | |||
| constexpr explicit DataType(Type d) : type_(d) {} | |||
| constexpr bool operator==(const DataType a) const { return type_ == a.type_; } | |||
| @@ -100,49 +122,49 @@ class DataType { | |||
| constexpr bool operator!=(const Type a) const { return type_ != a; } | |||
| // Disable this usage `if(d)` where d is of type DataType | |||
| // @return | |||
| /// \return | |||
| operator bool() = delete; | |||
| // To be used in Switch/case | |||
| // @return | |||
| /// \return | |||
| operator Type() const { return type_; } | |||
| // The number of bytes needed to store one value of this type | |||
| // @return | |||
| /// \return | |||
| uint8_t SizeInBytes() const; | |||
| // Convert from DataType to OpenCV type | |||
| // @return | |||
| /// \return | |||
| uint8_t AsCVType() const; | |||
| // Convert from OpenCV type to DataType | |||
| // @param cv_type | |||
| // @return | |||
| /// \param cv_type | |||
| /// \return | |||
| static DataType FromCVType(int cv_type); | |||
| // Returns a string representation of the type | |||
| // @return | |||
| /// \return | |||
| std::string ToString() const; | |||
| // returns true if the template type is the same as the Tensor type_ | |||
| // @tparam T | |||
| // @return true or false | |||
| /// \tparam T | |||
| /// \return true or false | |||
| template <typename T> | |||
| bool IsCompatible() const { | |||
| return type_ == FromCType<T>(); | |||
| } | |||
| // returns true if the template type is the same as the Tensor type_ | |||
| // @tparam T | |||
| // @return true or false | |||
| /// \tparam T | |||
| /// \return true or false | |||
| template <typename T> | |||
| bool IsLooselyCompatible() const; | |||
| // << Stream output operator overload | |||
| // @notes This allows you to print the info using stream operators | |||
| // @param out - reference to the output stream being overloaded | |||
| // @param rO - reference to the DataType to display | |||
| // @return - the output stream must be returned | |||
| /// \notes This allows you to print the info using stream operators | |||
| /// \param out - reference to the output stream being overloaded | |||
| /// \param rO - reference to the DataType to display | |||
| /// \return - the output stream must be returned | |||
| friend std::ostream &operator<<(std::ostream &out, const DataType &so) { | |||
| out << so.ToString(); | |||
| return out; | |||
| @@ -151,22 +173,24 @@ class DataType { | |||
| template <typename T> | |||
| static DataType FromCType(); | |||
| #ifdef ENABLE_PYTHON | |||
| // Convert from DataType to Pybind type | |||
| // @return | |||
| /// \return | |||
| py::dtype AsNumpyType() const; | |||
| // Convert from NP type to DataType | |||
| // @param type | |||
| // @return | |||
| /// \param type | |||
| /// \return | |||
| static DataType FromNpType(const py::dtype &type); | |||
| // Convert from NP array to DataType | |||
| // @param py array | |||
| // @return | |||
| /// \param py array | |||
| /// \return | |||
| static DataType FromNpArray(const py::array &arr); | |||
| #endif | |||
| // Get the buffer string format of the current type. Used in pybind buffer protocol. | |||
| // @return | |||
| /// \return | |||
| std::string GetPybindFormat() const; | |||
| bool IsSignedInt() const { | |||
| @@ -28,10 +28,12 @@ | |||
| #include "dataset/core/constants.h" | |||
| #include "dataset/core/cv_tensor.h" | |||
| #include "dataset/core/global_context.h" | |||
| #ifdef ENABLE_PYTHON | |||
| #include "dataset/core/pybind_support.h" | |||
| namespace py = pybind11; | |||
| #endif | |||
| #include "dataset/core/tensor_shape.h" | |||
| namespace py = pybind11; | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Helper macros for printing tensor elements | |||
| @@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape | |||
| MS_ASSERT(num_bytes == 0); | |||
| if (shape.known()) Tensor::Reshape(shape); | |||
| } | |||
| Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape) | |||
| : Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) { | |||
| // total bytes needed = offset array + strings | |||
| @@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape | |||
| MS_ASSERT(num_bytes == 0); | |||
| if (shape.known()) Tensor::Reshape(shape); | |||
| } | |||
| Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape, | |||
| DataType type, const unsigned char *data) { | |||
| if (!shape.known()) { | |||
| @@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl | |||
| return Status::OK(); // returns base-class shared_ptr | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) { | |||
| std::vector<dsize_t> shape; | |||
| for (dsize_t i = 0; i < arr.ndim(); i++) { | |||
| @@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) { | |||
| return Status::OK(); // returns base-class shared_ptr | |||
| } | |||
| #endif | |||
| Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, | |||
| const TensorShape &shape) { | |||
| @@ -698,21 +704,24 @@ std::vector<dsize_t> Tensor::Strides() { | |||
| return strides; | |||
| } | |||
| Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); | |||
| #ifdef ENABLE_PYTHON | |||
| Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(t); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); | |||
| std::string format_desc = t.type().GetPybindFormat(); | |||
| std::string format_desc = t->type().GetPybindFormat(); | |||
| if (format_desc.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format"); | |||
| } | |||
| *out = py::buffer_info(t.GetMutableBuffer(), /* Pointer to buffer */ | |||
| t.type().SizeInBytes(), /* Size of one scalar */ | |||
| format_desc, /* Python struct-style format descriptor */ | |||
| t.Rank(), /* Number of dimensions */ | |||
| t.shape().AsVector(), /* Buffer dimensions */ | |||
| t.Strides()); | |||
| *out = py::buffer_info(t->GetMutableBuffer(), /* Pointer to buffer */ | |||
| t->type().SizeInBytes(), /* Size of one scalar */ | |||
| format_desc, /* Python struct-style format descriptor */ | |||
| t->Rank(), /* Number of dimensions */ | |||
| t->shape().AsVector(), /* Buffer dimensions */ | |||
| t->Strides()); | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| template <typename T> | |||
| Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const { | |||
| @@ -752,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) | |||
| o->swap(sv); | |||
| return Status::OK(); | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| // return data as numpy, should return status | |||
| Status Tensor::GetDataAsNumpy(py::array *data) { | |||
| RETURN_UNEXPECTED_IF_NULL(data_); | |||
| @@ -815,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { | |||
| data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data)); | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| void Tensor::Squeeze() { shape_ = shape_.Squeeze(); } | |||
| @@ -26,20 +26,27 @@ | |||
| #undef HAVE_STDDEF_H | |||
| #undef HAVE_STDLIB_H | |||
| #endif | |||
| #ifdef ENABLE_PYTHON | |||
| #include "pybind11/numpy.h" | |||
| #include "pybind11/pybind11.h" | |||
| #include "pybind11/stl.h" | |||
| #endif | |||
| #include "dataset/core/constants.h" | |||
| #include "dataset/core/data_type.h" | |||
| #include "dataset/core/tensor_shape.h" | |||
| #include "dataset/util/allocator.h" | |||
| #include "dataset/util/status.h" | |||
| #include "proto/example.pb.h" | |||
| #ifdef ENABLE_PYTHON | |||
| namespace py = pybind11; | |||
| #endif | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class Tensor; | |||
| template <typename T> | |||
| class Allocator; | |||
| using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>; | |||
| using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors | |||
| @@ -114,16 +121,17 @@ class Tensor { | |||
| static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type, | |||
| const unsigned char *data = nullptr); | |||
| /// Create a copy of the input tensor | |||
| /// \param out [out] output tensor to be generated | |||
| /// \param in [in] orginal tensor to be copied | |||
| /// \return Status | |||
| // Create a copy of the input tensor | |||
| // @param out [out] output tensor to be generated | |||
| // @param in [in] orginal tensor to be copied | |||
| // @return Status | |||
| static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) { | |||
| const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | |||
| *out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes()); | |||
| return Status::OK(); | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| // A static factory method to create a Tensor from a given py::array. | |||
| // @param ptr output argument to hold the created Tensor | |||
| // @param arr py::array | |||
| @@ -132,6 +140,7 @@ class Tensor { | |||
| // Helper function to create a tensor from Numpy of strings | |||
| static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr); | |||
| #endif | |||
| // A static factory method to create a Tensor from a given list of strings. | |||
| // @param ptr output argument to hold the created Tensor | |||
| @@ -170,6 +179,7 @@ class Tensor { | |||
| static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) { | |||
| return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar()); | |||
| } | |||
| // Create tensor from protobuf bytelist with uint8 or int8 types | |||
| static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, | |||
| const TensorShape &shape, const DataType &type, dsize_t pad_size); | |||
| @@ -346,12 +356,12 @@ class Tensor { | |||
| virtual void Squeeze(); | |||
| /// Calculates the strides of the Tensor | |||
| /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) | |||
| /// The strides will be {6,2,1}. | |||
| /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) | |||
| /// The strides will be {24,8,4}. | |||
| /// @return vector of integers | |||
| // Calculates the strides of the Tensor | |||
| // Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) | |||
| // The strides will be {6,2,1}. | |||
| // Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) | |||
| // The strides will be {24,8,4}. | |||
| // @return vector of integers | |||
| std::vector<dsize_t> Strides(); | |||
| std::string ToString() { | |||
| @@ -376,6 +386,7 @@ class Tensor { | |||
| // Slice string tensors | |||
| Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); | |||
| #ifdef ENABLE_PYTHON | |||
| // Constructs numpy array from input tensor | |||
| // @param data this data is the location of python data | |||
| // @return Status code | |||
| @@ -383,7 +394,8 @@ class Tensor { | |||
| Status GetDataAsNumpyStrings(py::array *data); | |||
| static Status GetBufferInfo(Tensor &t, py::buffer_info *out); | |||
| static Status GetBufferInfo(Tensor *t, py::buffer_info *out); | |||
| #endif | |||
| // Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor | |||
| Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); | |||
| @@ -570,7 +582,7 @@ class Tensor { | |||
| // Return a TensorIterator that points to the start of the Tensor. | |||
| // It's the user responsibility to use the correct type that matches the Tensor type | |||
| // @tparam T The type of values in the Tensor | |||
| // @param T The type of values in the Tensor | |||
| // @return TensorIterator | |||
| template <typename T> | |||
| TensorIterator<T> begin() { | |||
| @@ -18,7 +18,6 @@ | |||
| #include "dataset/core/tensor_row.h" | |||
| namespace py = pybind11; | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape) | |||
| known_ = shape.known_; // override with the input shape in case of unknown-rank tensor shape. | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| TensorShape::TensorShape(py::list l) | |||
| : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) { | |||
| std::vector<dsize_t> list_c; | |||
| @@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l) | |||
| } | |||
| AddListToShape(list_c); | |||
| } | |||
| #endif | |||
| TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type) | |||
| : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) { | |||
| @@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const { | |||
| return TensorShape(vec); | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| py::list TensorShape::AsPyList() { | |||
| py::list list; | |||
| for (auto i : raw_shape_) { | |||
| @@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() { | |||
| } | |||
| return list; | |||
| } | |||
| #endif | |||
| TensorShape TensorShape::Squeeze() const { | |||
| std::vector<dsize_t> new_shape; | |||
| @@ -24,13 +24,16 @@ | |||
| #include <opencv2/core/mat.hpp> | |||
| #ifdef ENABLE_PYTHON | |||
| #include "pybind11/pybind11.h" | |||
| namespace py = pybind11; | |||
| #endif | |||
| #include "dataset/core/constants.h" | |||
| #include "dataset/util/status.h" | |||
| #include "dataset/core/global_context.h" | |||
| #include "dataset/util/allocator.h" | |||
| namespace py = pybind11; | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Class that represents a shape of a Tensor. A shape can be: | |||
| @@ -43,7 +46,8 @@ namespace dataset { | |||
| // -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n | |||
| // Example: <3,?> (the 1st dim is unknown)\n | |||
| // <2,?,?,?> (all dims but the 0th dim are unknown) | |||
| // TensorShape supports any dim > 0 and < 2^31-1 | |||
| /// \brief TensorShape supports any dim > 0 and < 2^31-1 | |||
| class TensorShape { | |||
| public: | |||
| static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension | |||
| @@ -51,57 +55,59 @@ class TensorShape { | |||
| // Force the compiler to not create a no-arg constructor | |||
| TensorShape() = delete; | |||
| // Create a Shape from an initialization list (e.g., TensorShape s = {2,2}). | |||
| // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||
| // @param list | |||
| /// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}). | |||
| /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||
| /// \param[in] list | |||
| explicit TensorShape(const std::initializer_list<dsize_t> &list); | |||
| // Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ). | |||
| // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||
| // @param list | |||
| /// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ). | |||
| /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||
| /// \param[in] list | |||
| explicit TensorShape(const std::vector<dsize_t> &list); | |||
| // Copy constructor | |||
| // @param shape | |||
| /// \brief Copy constructor | |||
| /// \param[in] shape | |||
| TensorShape(const TensorShape &shape); | |||
| // construct a TensorShape via a python list | |||
| // @param py::list l - a list object from python | |||
| #ifdef ENABLE_PYTHON | |||
| /// \brief construct a TensorShape via a python list | |||
| /// \param[in] py::list l - a list object from python | |||
| explicit TensorShape(py::list l); | |||
| #endif | |||
| ~TensorShape() = default; | |||
| // Create a scalar Shape (i.e., empty shape with mKnown = true) | |||
| // @return TensorShape | |||
| /// \brief Create a scalar Shape (i.e., empty shape with mKnown = true) | |||
| /// \return TensorShape | |||
| static TensorShape CreateScalar() { return TensorShape({}); } | |||
| // Create a shape with an unknown rank. | |||
| // @return TensorShape | |||
| /// \brief Create a shape with an unknown rank. | |||
| /// \return TensorShape | |||
| static TensorShape CreateUnknownRankShape(); | |||
| // Create a shape with a known rank . | |||
| // @return TensorShape | |||
| /// \brief Create a shape with a known rank . | |||
| /// \return TensorShape | |||
| static TensorShape CreateUnknownShapeWithRank(dsize_t rank); | |||
| // Insert a new dim into a copy of the current shape. | |||
| // @param dim to be added | |||
| // @param axis the index where dim should be added | |||
| // @return New modified shape | |||
| /// \brief Insert a new dim into a copy of the current shape. | |||
| /// \param[in] dim to be added | |||
| /// \param[in] axis the index where dim should be added | |||
| /// \return New modified shape | |||
| TensorShape InsertDim(dsize_t axis, dsize_t dim) const; | |||
| // Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4> | |||
| // @param dim | |||
| // @return | |||
| /// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4> | |||
| /// \param[in] dim | |||
| /// \return | |||
| TensorShape PrependDim(dsize_t dim) const; | |||
| // Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> | |||
| // @param dim | |||
| // @return | |||
| /// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> | |||
| /// \param[in] dim | |||
| /// \return | |||
| TensorShape AppendDim(dsize_t dim) const; | |||
| // Create a shape based on OpenCV shape and type | |||
| // @param cv_size | |||
| // @param type int that represent the type in OpenCV, example CV_8U, CV_64S | |||
| /// \brief Create a shape based on OpenCV shape and type | |||
| /// \param[in] cv_size | |||
| /// \param[in] type int that represent the type in OpenCV, example CV_8U, CV_64S | |||
| TensorShape(cv::MatSize cv_size, uint32_t type); | |||
| dsize_t Size() const { return raw_shape_.size(); } | |||
| @@ -123,47 +129,50 @@ class TensorShape { | |||
| return raw_shape_[index]; | |||
| } | |||
| // Return the Shape as a vector | |||
| // @return | |||
| /// \brief Return the Shape as a vector | |||
| /// \return | |||
| std::vector<dsize_t> AsVector() const; | |||
| // Returns the class info as a string | |||
| // @return | |||
| /// \brief Returns the class info as a string | |||
| /// \return | |||
| std::string ToString() const { | |||
| std::stringstream ss; | |||
| ss << *this; | |||
| return ss.str(); | |||
| } | |||
| // Actual print function used by operator<< | |||
| // @param out output string stream | |||
| /// \brief Actual print function used by operator<< | |||
| /// \param out output string stream | |||
| void Print(std::ostream &out) const; | |||
| // << Stream output operator overload | |||
| // @notes This allows you to print the info using stream operators | |||
| // @param out - reference to the output stream being overloaded | |||
| // @param rO - reference to the TensorShape to display | |||
| // @return - the output stream must be returned | |||
| /// \brief << Stream output operator overload | |||
| /// This allows you to print the info using stream operators | |||
| /// \param[in] out - reference to the output stream being overloaded | |||
| /// \param[in] rO - reference to the TensorShape to display | |||
| /// \return - the output stream must be returned | |||
| friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) { | |||
| so.Print(out); | |||
| return out; | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| py::list AsPyList(); | |||
| #endif | |||
| // Checks if the given index is a valid index for this tensor. | |||
| // For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not. | |||
| // @param index | |||
| // @return bool | |||
| /// \brief Checks if the given index is a valid index for this tensor. | |||
| /// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not. | |||
| /// \param[in] index | |||
| /// \return bool | |||
| bool IsValidIndex(const std::vector<dsize_t> &index) const; | |||
| TensorShape Squeeze() const; | |||
| std::vector<dsize_t> Strides() const; | |||
| // Returns the location of the item assuming row major memory layout. | |||
| // @param index | |||
| // @return | |||
| /// \brief Returns the location of the item assuming row major memory layout. | |||
| /// \param[in] index | |||
| /// \param[out] flat_index | |||
| /// \return | |||
| Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const; | |||
| private: | |||
| @@ -174,11 +183,11 @@ class TensorShape { | |||
| // Vector to keep the strides of the shape. The size is rank+1 | |||
| std::vector<dsize_t, IntAlloc> strides_; | |||
| // Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape. | |||
| // @tparam T list | |||
| // @param list Iterable list | |||
| // @return true if the shape is valid and no overflow would be generated when counting the number of elements. | |||
| // False otherwise. | |||
| /// \brief Internal utility function to iterate over a list, | |||
| /// check if the dim is valid and then insert it into the shape. | |||
| /// \param[in] list Iterable list | |||
| /// \return true if the shape is valid and no overflow would be generated when counting the number of elements. | |||
| /// False otherwise. | |||
| template <typename T> | |||
| void AddListToShape(const T &list); | |||
| }; | |||
| @@ -2,13 +2,12 @@ add_subdirectory(source) | |||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||
| add_library(engine-datasetops OBJECT | |||
| set(DATASET_ENGINE_DATASETOPS_SRC_FILES | |||
| dataset_op.cc | |||
| parallel_op.cc | |||
| pipeline_op.cc | |||
| barrier_op.cc | |||
| batch_op.cc | |||
| bucket_batch_by_length_op.cc | |||
| device_queue_op.cc | |||
| map_op.cc | |||
| project_op.cc | |||
| @@ -18,8 +17,18 @@ add_library(engine-datasetops OBJECT | |||
| take_op.cc | |||
| shuffle_op.cc | |||
| zip_op.cc | |||
| concat_op.cc | |||
| filter_op.cc | |||
| build_vocab_op.cc | |||
| concat_op.cc | |||
| ) | |||
| if (ENABLE_PYTHON) | |||
| set(DATASET_ENGINE_DATASETOPS_SRC_FILES | |||
| ${DATASET_ENGINE_DATASETOPS_SRC_FILES} | |||
| bucket_batch_by_length_op.cc | |||
| barrier_op.cc | |||
| filter_op.cc | |||
| build_vocab_op.cc | |||
| ) | |||
| endif() | |||
| add_library(engine-datasetops OBJECT ${DATASET_ENGINE_DATASETOPS_SRC_FILES}) | |||
| @@ -19,7 +19,9 @@ | |||
| #include <iomanip> | |||
| #include "common/utils.h" | |||
| #ifdef ENABLE_PYTHON | |||
| #include "dataset/core/pybind_support.h" | |||
| #endif | |||
| #include "dataset/engine/data_buffer.h" | |||
| #include "dataset/engine/db_connector.h" | |||
| #include "dataset/engine/opt/pass.h" | |||
| @@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa | |||
| Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) { | |||
| RETURN_IF_NOT_OK(SanityCheck()); | |||
| #ifdef ENABLE_PYTHON | |||
| *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, | |||
| builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_, | |||
| builder_batch_map_func_, builder_pad_map_); | |||
| #else | |||
| *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, | |||
| builder_num_workers_, builder_cols_to_map_, builder_pad_map_); | |||
| #endif | |||
| return Status::OK(); | |||
| } | |||
| @@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() { | |||
| return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err)); | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | |||
| const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func, | |||
| PadInfo pad_map) | |||
| @@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, | |||
| pad_info_(pad_map) { | |||
| worker_queues_.Init(num_workers, op_queue_size); | |||
| } | |||
| #else | |||
| BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | |||
| const std::vector<std::string> &cols_to_map, PadInfo pad_map) | |||
| : ParallelOp(num_workers, op_queue_size), | |||
| start_batch_size_(batch_size), | |||
| drop_(drop), | |||
| pad_(pad), | |||
| pyfunc_column_names_(cols_to_map), | |||
| pad_info_(pad_map) { | |||
| worker_queues_.Init(num_workers, op_queue_size); | |||
| } | |||
| #endif | |||
| Status BatchOp::operator()() { | |||
| Status rc = LaunchThreadsAndInitOp(); | |||
| @@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) { | |||
| Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, | |||
| std::unique_ptr<DataBuffer> *db) { | |||
| RETURN_UNEXPECTED_IF_NULL(table_pair.first); | |||
| if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc | |||
| #ifdef ENABLE_PYTHON | |||
| if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc | |||
| #endif | |||
| if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_)); // do padding if needed | |||
| (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>(); | |||
| @@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) { | |||
| return Status::OK(); | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) { | |||
| TensorBatchTable input_table; | |||
| input_table.reserve(pyfunc_column_names_.size()); | |||
| @@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { | |||
| #ifdef ENABLE_PYTHON | |||
| if (batch_size_func_ != nullptr) { | |||
| RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info)); | |||
| } else { | |||
| (*batch_size) = start_batch_size_; | |||
| } | |||
| #else | |||
| (*batch_size) = start_batch_size_; | |||
| #endif | |||
| return Status::OK(); | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { | |||
| { | |||
| // Acquire Python GIL | |||
| @@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou | |||
| } | |||
| return Status(StatusCode::kOK); | |||
| } | |||
| #endif | |||
| Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &pad_info, | |||
| const std::unordered_map<std::string, int32_t> &column_name_id_map) { | |||
| @@ -89,6 +89,7 @@ class BatchOp : public ParallelOp { | |||
| return *this; | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| // set columns to perform map on | |||
| // @param const std::vector<std::string> & cols_to_map - name of columns to perform map on | |||
| // @return Builder & reference to builder class object | |||
| @@ -104,6 +105,7 @@ class BatchOp : public ParallelOp { | |||
| builder_batch_size_func_ = batch_size_func; | |||
| return *this; | |||
| } | |||
| #endif | |||
| // @param std::shared_ptr<BatchOp> *ptr pointer to shared_ptr, actual return arg | |||
| // @return Status - The error code return | |||
| @@ -121,8 +123,10 @@ class BatchOp : public ParallelOp { | |||
| int32_t builder_op_connector_size_; | |||
| std::vector<std::string> builder_cols_to_map_; | |||
| PadInfo builder_pad_map_; | |||
| #ifdef ENABLE_PYTHON | |||
| py::function builder_batch_size_func_; | |||
| py::function builder_batch_map_func_; | |||
| #endif | |||
| }; | |||
| enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 }; | |||
| @@ -144,6 +148,7 @@ class BatchOp : public ParallelOp { | |||
| const int64_t get_epoch_num() const { return epoch_num_; } | |||
| }; | |||
| #ifdef ENABLE_PYTHON | |||
| // BatchOp constructor | |||
| // @param int32_t batch_size | |||
| // @param bool drop | |||
| @@ -152,6 +157,10 @@ class BatchOp : public ParallelOp { | |||
| // @param int32_t num_workers | |||
| BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | |||
| const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map); | |||
| #else | |||
| BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | |||
| const std::vector<std::string> &, PadInfo pad_map); | |||
| #endif | |||
| // BatchOp destructor | |||
| ~BatchOp() {} | |||
| @@ -219,10 +228,13 @@ class BatchOp : public ParallelOp { | |||
| // @return Status - The error code return | |||
| Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, | |||
| std::unique_ptr<DataBuffer> *db); | |||
| #ifdef ENABLE_PYTHON | |||
| // Function that calls pyfunc to perform map on batch | |||
| // @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor | |||
| // @return Status - The error code return | |||
| Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair); | |||
| #endif | |||
| // @param const PadInfo &pad_info pad info to unpack | |||
| // @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping | |||
| @@ -247,6 +259,7 @@ class BatchOp : public ParallelOp { | |||
| // @return Status - The error code return | |||
| Status LaunchThreadsAndInitOp(); | |||
| #ifdef ENABLE_PYTHON | |||
| // Invoke batch size function with current BatchInfo to generate batch size. | |||
| // @return Status - The error code return | |||
| Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info); | |||
| @@ -254,6 +267,7 @@ class BatchOp : public ParallelOp { | |||
| // Invoke batch map function with current BatchInfo to generate tensors to batch. | |||
| // @return Status - The error code return | |||
| Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info); | |||
| #endif | |||
| int32_t start_batch_size_; | |||
| bool drop_; // bool for whether to drop remainder or not | |||
| @@ -262,8 +276,10 @@ class BatchOp : public ParallelOp { | |||
| PadInfo pad_info_; // column names to perform padding on | |||
| std::unique_ptr<ChildIterator> child_iterator_; // child iterator for fetching TensorRows 1 by 1 | |||
| QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_; // internal queue for syncing worker | |||
| #ifdef ENABLE_PYTHON | |||
| py::function batch_size_func_; // Function pointer of batch size function | |||
| py::function batch_map_func_; // Function pointer of per batch map function | |||
| #endif | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -1,19 +1,32 @@ | |||
| add_subdirectory(sampler) | |||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||
| add_library(engine-datasetops-source OBJECT | |||
| generator_op.cc | |||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||
| io_block.cc | |||
| mindrecord_op.cc | |||
| tf_reader_op.cc | |||
| image_folder_op.cc | |||
| mnist_op.cc | |||
| voc_op.cc | |||
| coco_op.cc | |||
| manifest_op.cc | |||
| cifar_op.cc | |||
| random_data_op.cc | |||
| celeba_op.cc | |||
| text_file_op.cc | |||
| clue_op.cc | |||
| ) | |||
| ) | |||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||
| ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||
| mindrecord_op.cc | |||
| tf_reader_op.cc | |||
| ) | |||
| if (ENABLE_PYTHON) | |||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||
| ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||
| generator_op.cc | |||
| voc_op.cc | |||
| manifest_op.cc | |||
| ) | |||
| endif() | |||
| add_library(engine-datasetops-source OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}) | |||
| @@ -1,12 +1,21 @@ | |||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||
| add_library(engine-datasetops-source-sampler OBJECT | |||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES | |||
| distributed_sampler.cc | |||
| pk_sampler.cc | |||
| python_sampler.cc | |||
| random_sampler.cc | |||
| sampler.cc | |||
| sequential_sampler.cc | |||
| subset_random_sampler.cc | |||
| weighted_random_sampler.cc | |||
| ) | |||
| if (ENABLE_PYTHON) | |||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES | |||
| ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} | |||
| python_sampler.cc | |||
| ) | |||
| endif() | |||
| add_library(engine-datasetops-source-sampler OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}) | |||
| @@ -89,6 +89,7 @@ void Sampler::Print(std::ostream &out, bool show_all) const { | |||
| } | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| Status Sampler::GetAllIdsThenReset(py::array *data) { | |||
| std::unique_ptr<DataBuffer> db; | |||
| std::shared_ptr<Tensor> sample_ids; | |||
| @@ -120,6 +121,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) { | |||
| RETURN_IF_NOT_OK(ResetSampler()); | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| Status Sampler::SetNumSamples(int64_t num_samples) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative"); | |||
| @@ -74,8 +74,11 @@ class Sampler { | |||
| // @return - The error code return | |||
| virtual Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) = 0; | |||
| // This function only called by python layer. Not needed by Android. | |||
| #ifdef ENABLE_PYTHON | |||
| // return all ids in one epoch as a numpy array, then call reset | |||
| Status GetAllIdsThenReset(py::array *data); | |||
| #endif | |||
| // for next epoch of sampleIds | |||
| // @return - The error code return | |||
| @@ -155,5 +158,4 @@ class Sampler { | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_ | |||
| @@ -429,6 +429,7 @@ Status Graph::GetMetaInfo(MetaInfo *meta_info) { | |||
| return Status::OK(); | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| Status Graph::GraphInfo(py::dict *out) { | |||
| MetaInfo meta_info; | |||
| RETURN_IF_NOT_OK(GetMetaInfo(&meta_info)); | |||
| @@ -440,6 +441,7 @@ Status Graph::GraphInfo(py::dict *out) { | |||
| (*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type); | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| Status Graph::LoadNodeAndEdge() { | |||
| GraphLoader gl(dataset_file_, num_workers_); | |||
| @@ -140,8 +140,10 @@ class Graph { | |||
| // @return Status - The error code return | |||
| Status GetMetaInfo(MetaInfo *meta_info); | |||
| #ifdef ENABLE_PYTHON | |||
| // Return meta information to python layer | |||
| Status GraphInfo(py::dict *out); | |||
| #endif | |||
| Status Init(); | |||
| @@ -21,13 +21,15 @@ | |||
| #include "dataset/engine/datasetops/map_op.h" | |||
| #include "dataset/engine/datasetops/project_op.h" | |||
| #include "dataset/engine/datasetops/rename_op.h" | |||
| #include "dataset/engine/datasetops/filter_op.h" | |||
| #include "dataset/engine/datasetops/repeat_op.h" | |||
| #include "dataset/engine/datasetops/skip_op.h" | |||
| #include "dataset/engine/datasetops/shuffle_op.h" | |||
| #include "dataset/engine/datasetops/source/generator_op.h" | |||
| #include "dataset/engine/datasetops/source/mindrecord_op.h" | |||
| #include "dataset/engine/datasetops/source/tf_reader_op.h" | |||
| #ifdef ENABLE_PYTHON | |||
| #include "dataset/engine/datasetops/filter_op.h" | |||
| #include "dataset/engine/datasetops/source/generator_op.h" | |||
| #endif | |||
| #include "dataset/engine/datasetops/source/image_folder_op.h" | |||
| #include "dataset/engine/datasetops/take_op.h" | |||
| #include "dataset/engine/datasetops/zip_op.h" | |||
| @@ -111,35 +113,37 @@ Status NodePass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) { | |||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | |||
| } | |||
| Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) { | |||
| Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) { | |||
| // Fallback to base class visitor by default | |||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | |||
| } | |||
| Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) { | |||
| Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) { | |||
| // Fallback to base class visitor by default | |||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | |||
| } | |||
| Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) { | |||
| Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) { | |||
| // Fallback to base class visitor by default | |||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | |||
| } | |||
| Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) { | |||
| Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) { | |||
| // Fallback to base class visitor by default | |||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | |||
| } | |||
| Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) { | |||
| #ifdef ENABLE_PYTHON | |||
| Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) { | |||
| // Fallback to base class visitor by default | |||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | |||
| } | |||
| Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) { | |||
| Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) { | |||
| // Fallback to base class visitor by default | |||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | |||
| } | |||
| #endif | |||
| Status NodePass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) { | |||
| // Fallback to base class visitor by default | |||
| @@ -33,18 +33,20 @@ class ProjectOp; | |||
| class RenameOp; | |||
| class FilterOp; | |||
| class SkipOp; | |||
| class ShuffleOp; | |||
| class GeneratorOp; | |||
| class MindRecordOp; | |||
| class TFReaderOp; | |||
| #ifdef ENABLE_PYTHON | |||
| class FilterOp; | |||
| class GeneratorOp; | |||
| #endif | |||
| class TakeOp; | |||
| class ZipOp; | |||
| @@ -122,18 +124,20 @@ class NodePass : public Pass { | |||
| virtual Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified); | |||
| virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified); | |||
| virtual Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified); | |||
| virtual Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified); | |||
| virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified); | |||
| virtual Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified); | |||
| virtual Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified); | |||
| #ifdef ENABLE_PYTHON | |||
| virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified); | |||
| virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified); | |||
| #endif | |||
| virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified); | |||
| virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified); | |||
| @@ -50,12 +50,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) { | |||
| return Status::OK(); | |||
| } | |||
| Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) { | |||
| *modified = false; | |||
| std::cout << "Visiting FilterOp" << '\n'; | |||
| return Status::OK(); | |||
| } | |||
| Status PrinterPass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) { | |||
| *modified = false; | |||
| std::cout << "Visiting SkipOp" << '\n'; | |||
| @@ -67,11 +61,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) { | |||
| return Status::OK(); | |||
| } | |||
| Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) { | |||
| *modified = false; | |||
| std::cout << "Visiting GeneratorOp" << '\n'; | |||
| return Status::OK(); | |||
| } | |||
| Status PrinterPass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) { | |||
| *modified = false; | |||
| std::cout << "Visiting MindRecordOp" << '\n'; | |||
| @@ -84,6 +73,20 @@ Status PrinterPass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) | |||
| return Status::OK(); | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) { | |||
| *modified = false; | |||
| std::cout << "Visiting FilterOp" << '\n'; | |||
| return Status::OK(); | |||
| } | |||
| Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) { | |||
| *modified = false; | |||
| std::cout << "Visiting GeneratorOp" << '\n'; | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| Status PrinterPass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) { | |||
| *modified = false; | |||
| std::cout << "Visiting TakeOp" << '\n'; | |||
| @@ -35,18 +35,20 @@ class PrinterPass : public NodePass { | |||
| Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) override; | |||
| Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override; | |||
| Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) override; | |||
| Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) override; | |||
| Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override; | |||
| Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override; | |||
| Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override; | |||
| #ifdef ENABLE_PYTHON | |||
| Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override; | |||
| Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override; | |||
| #endif | |||
| Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) override; | |||
| Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified) override; | |||
| @@ -0,0 +1 @@ | |||
| ../../../core/constants.h | |||
| @@ -0,0 +1 @@ | |||
| ../../../core/data_type.h | |||
| @@ -0,0 +1 @@ | |||
| ../../../core/tensor_shape.h | |||
| @@ -0,0 +1 @@ | |||
| ../../../util/status.h | |||
| @@ -0,0 +1,357 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_INCLUDE_DATASETS_H_ | |||
| #define DATASET_INCLUDE_DATASETS_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <set> | |||
| #include <map> | |||
| #include <utility> | |||
| #include <string> | |||
| #include "dataset/include/tensor.h" | |||
| #include "dataset/include/iterator.h" | |||
| #include "dataset/include/samplers.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Forward declare | |||
| class DatasetOp; | |||
| class DataSchema; | |||
| class Tensor; | |||
| class TensorShape; | |||
| namespace api { | |||
| class TensorOperation; | |||
| class SamplerObj; | |||
| class ImageFolderDataset; | |||
| class MnistDataset; | |||
| class BatchDataset; | |||
| class RepeatDataset; | |||
| class MapDataset; | |||
| class ShuffleDataset; | |||
| class Cifar10Dataset; | |||
| class ProjectDataset; | |||
| /// \brief Function to create an ImageFolderDataset | |||
| /// \notes A source dataset that reads images from a tree of directories | |||
| /// All images within one folder have the same label | |||
| /// The generated dataset has two columns ['image', 'label'] | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||
| /// \param[in] decode A flag to decode in ImageFolder | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, | |||
| /// A `RandomSampler` will be used to randomly iterate the entire dataset | |||
| /// \param[in] extensions File extensions to be read | |||
| /// \param[in] class_indexing a class name to label map | |||
| /// \return Shared pointer to the current ImageFolderDataset | |||
| std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode = false, | |||
| std::shared_ptr<SamplerObj> sampler = nullptr, | |||
| std::set<std::string> extensions = {}, | |||
| std::map<std::string, int32_t> class_indexing = {}); | |||
| /// \brief Function to create a MnistDataset | |||
| /// \notes The generated dataset has two columns ['image', 'label'] | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, | |||
| /// A `RandomSampler` will be used to randomly iterate the entire dataset | |||
| /// \return Shared pointer to the current MnistDataset | |||
| std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr); | |||
| /// \brief Function to create a Cifar10 Dataset | |||
| /// \notes The generated dataset has two columns ['image', 'label'] | |||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||
| /// \param[in] num_samples The number of images to be included in the dataset | |||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||
| /// will be used to randomly iterate the entire dataset | |||
| /// \return Shared pointer to the current Dataset | |||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples, | |||
| std::shared_ptr<SamplerObj> sampler); | |||
| /// \class Dataset datasets.h | |||
| /// \brief A base class to represent a dataset in the data pipeline. | |||
| class Dataset : public std::enable_shared_from_this<Dataset> { | |||
| public: | |||
| friend class Iterator; | |||
| /// \brief Constructor | |||
| Dataset(); | |||
| /// \brief Destructor | |||
| ~Dataset() = default; | |||
| /// \brief Pure virtual function to convert a Dataset class into a runtime dataset object | |||
| /// \return shared pointer to the list of newly created DatasetOps | |||
| virtual std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() = 0; | |||
| /// \brief Pure virtual function for derived class to implement parameters validation | |||
| /// \return bool True if all the params are valid | |||
| virtual bool ValidateParams() = 0; | |||
| /// \brief Setter function for runtime number of workers | |||
| /// \param[in] num_workers The number of threads in this operator | |||
| /// \return Shared pointer to the original object | |||
| std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers) { | |||
| num_workers_ = num_workers; | |||
| return shared_from_this(); | |||
| } | |||
| /// \brief Function to create an Iterator over the Dataset pipeline | |||
| /// \return Shared pointer to the Iterator | |||
| std::shared_ptr<Iterator> CreateIterator(); | |||
| /// \brief Function to create a BatchDataset | |||
| /// \notes Combines batch_size number of consecutive rows into batches | |||
| /// \param[in] batch_size Path to the root directory that contains the dataset | |||
| /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete | |||
| /// batch. If true, and if there are less than batch_size rows | |||
| /// available to make the last batch, then those rows will | |||
| /// be dropped and not propagated to the next node | |||
| /// \return Shared pointer to the current BatchDataset | |||
| std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false); | |||
| /// \brief Function to create a RepeatDataset | |||
| /// \notes Repeats this dataset count times. Repeat indefinitely if count is -1 | |||
| /// \param[in] count Number of times the dataset should be repeated | |||
| /// \return Shared pointer to the current Dataset | |||
| /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset` | |||
| /// due to a limitation in the current implementation | |||
| std::shared_ptr<Dataset> Repeat(int32_t count = -1); | |||
| /// \brief Function to create a MapDataset | |||
| /// \notes Applies each operation in operations to this dataset | |||
| /// \param[in] operations Vector of operations to be applied on the dataset. Operations are | |||
| /// applied in the order they appear in this list | |||
| /// \param[in] input_columns Vector of the names of the columns that will be passed to the first | |||
| /// operation as input. The size of this list must match the number of | |||
| /// input columns expected by the first operator. The default input_columns | |||
| /// is the first column | |||
| /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation | |||
| /// This parameter is mandatory if len(input_columns) != len(output_columns) | |||
| /// The size of this list must match the number of output columns of the | |||
| /// last operation. The default output_columns will have the same | |||
| /// name as the input columns, i.e., the columns will be replaced | |||
| /// \param[in] project_columns A list of column names to project | |||
| /// \return Shared pointer to the current MapDataset | |||
| std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations, | |||
| std::vector<std::string> input_columns = {}, | |||
| std::vector<std::string> output_columns = {}, | |||
| const std::vector<std::string> &project_columns = {}); | |||
| /// \brief Function to create a Shuffle Dataset | |||
| /// \notes Randomly shuffles the rows of this dataset | |||
| /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling | |||
| /// \return Shared pointer to the current ShuffleDataset | |||
| std::shared_ptr<ShuffleDataset> Shuffle(int32_t shuffle_size); | |||
| /// \brief Function to create a Project Dataset | |||
| /// \notes Applies project to the dataset | |||
| /// \param[in] columns The name of columns to project | |||
| /// \return Shared pointer to the current Dataset | |||
| std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns); | |||
| protected: | |||
| std::vector<std::shared_ptr<Dataset>> children; | |||
| std::shared_ptr<Dataset> parent; | |||
| int32_t num_workers_; | |||
| int32_t rows_per_buffer_; | |||
| int32_t connector_que_size_; | |||
| }; | |||
| /* ####################################### Derived Dataset classes ################################# */ | |||
| /// \class ImageFolderDataset | |||
| /// \brief A Dataset derived class to represent ImageFolder dataset | |||
| class ImageFolderDataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor | |||
| ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive, | |||
| std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing); | |||
| /// \brief Destructor | |||
| ~ImageFolderDataset() = default; | |||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||
| /// \return shared pointer to the list of newly created DatasetOps | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||
| /// \brief Parameters validation | |||
| /// \return bool true if all the params are valid | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::string dataset_dir_; | |||
| bool decode_; | |||
| bool recursive_; | |||
| std::shared_ptr<SamplerObj> sampler_; | |||
| std::map<std::string, int32_t> class_indexing_; | |||
| std::set<std::string> exts_; | |||
| }; | |||
| class MnistDataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor | |||
| MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler); | |||
| /// \brief Destructor | |||
| ~MnistDataset() = default; | |||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||
| /// \return shared pointer to the list of newly created DatasetOps | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||
| /// \brief Parameters validation | |||
| /// \return bool true if all the params are valid | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::string dataset_dir_; | |||
| std::shared_ptr<SamplerObj> sampler_; | |||
| }; | |||
| class BatchDataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor | |||
| BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map, | |||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map); | |||
| /// \brief Destructor | |||
| ~BatchDataset() = default; | |||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||
| /// \return shared pointer to the list of newly created DatasetOps | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||
| /// \brief Parameters validation | |||
| /// \return bool true if all the params are valid | |||
| bool ValidateParams() override; | |||
| private: | |||
| int32_t batch_size_; | |||
| bool drop_remainder_; | |||
| bool pad_; | |||
| std::vector<std::string> cols_to_map_; | |||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_; | |||
| }; | |||
| class RepeatDataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor | |||
| explicit RepeatDataset(uint32_t count); | |||
| /// \brief Destructor | |||
| ~RepeatDataset() = default; | |||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||
| /// \return shared pointer to the list of newly created DatasetOps | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||
| /// \brief Parameters validation | |||
| /// \return bool true if all the params are valid | |||
| bool ValidateParams() override; | |||
| private: | |||
| uint32_t repeat_count_; | |||
| }; | |||
| class ShuffleDataset : public Dataset { | |||
| public: | |||
| ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch); | |||
| ~ShuffleDataset() = default; | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| int32_t shuffle_size_; | |||
| uint32_t shuffle_seed_; | |||
| bool reset_every_epoch_; | |||
| }; | |||
| class MapDataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor | |||
| MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns = {}, | |||
| std::vector<std::string> output_columns = {}, const std::vector<std::string> &columns = {}); | |||
| /// \brief Destructor | |||
| ~MapDataset() = default; | |||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||
| /// \return shared pointer to the list of newly created DatasetOps | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||
| /// \brief Parameters validation | |||
| /// \return bool true if all the params are valid | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<std::shared_ptr<TensorOperation>> operations_; | |||
| std::vector<std::string> input_columns_; | |||
| std::vector<std::string> output_columns_; | |||
| std::vector<std::string> project_columns_; | |||
| }; | |||
| class Cifar10Dataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor | |||
| Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler); | |||
| /// \brief Destructor | |||
| ~Cifar10Dataset() = default; | |||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||
| /// \return shared pointer to the list of newly created DatasetOps | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||
| /// \brief Parameters validation | |||
| /// \return bool true if all the params are valid | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::string dataset_dir_; | |||
| int32_t num_samples_; | |||
| std::shared_ptr<SamplerObj> sampler_; | |||
| }; | |||
| class ProjectDataset : public Dataset { | |||
| public: | |||
| /// \brief Constructor | |||
| explicit ProjectDataset(const std::vector<std::string> &columns); | |||
| /// \brief Destructor | |||
| ~ProjectDataset() = default; | |||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||
| /// \return shared pointer to the list of newly created DatasetOps | |||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||
| /// \brief Parameters validation | |||
| /// \return bool true if all the params are valid | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<std::string> columns_; | |||
| }; | |||
| } // namespace api | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_INCLUDE_DATASETS_H_ | |||
| @@ -0,0 +1,115 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_INCLUDE_ITERATOR_H_ | |||
| #define DATASET_INCLUDE_ITERATOR_H_ | |||
| #include <unordered_map> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "dataset/include/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Forward declare | |||
| class ExecutionTree; | |||
| class DatasetIterator; | |||
| class DatasetOp; | |||
| class Tensor; | |||
| namespace api { | |||
| class Dataset; | |||
| using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>; | |||
| // Abstract class for iterating over the dataset. | |||
| class Iterator { | |||
| public: | |||
| /// \brief Constructor | |||
| Iterator() = default; | |||
| /// \brief Destructor | |||
| ~Iterator() = default; | |||
| /// \brief Method for building and launching the pipeline. | |||
| /// \param[in] ops - a vector of DatasetOp in the data pipeline. | |||
| /// \return - a Status error code, returns OK if no error encountered. | |||
| Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds); | |||
| /// \brief Function to get the next row from the data pipeline. | |||
| /// \param[out] row - the output tensor row. | |||
| void GetNextRow(TensorMap *row); | |||
| /// \brief Function to shut down the data pipeline. | |||
| void Stop(); | |||
| class _Iterator { | |||
| public: | |||
| explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} { | |||
| if (lt_) { | |||
| cur_row_ = new TensorMap(); | |||
| lt_->GetNextRow(cur_row_); | |||
| } | |||
| } | |||
| // Destructor | |||
| ~_Iterator() { | |||
| if (cur_row_) { | |||
| delete cur_row_; | |||
| } | |||
| } | |||
| _Iterator &operator++() { | |||
| if (lt_) { | |||
| ++ind_; | |||
| lt_->GetNextRow(cur_row_); | |||
| } | |||
| if (cur_row_ && cur_row_->size() == 0) { | |||
| delete cur_row_; | |||
| cur_row_ = nullptr; | |||
| } | |||
| return *this; | |||
| } // prefix ++ overload | |||
| TensorMap &operator*() { return *cur_row_; } // dereference operator | |||
| TensorMap *operator->() { return cur_row_; } | |||
| bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; } | |||
| private: | |||
| int ind_; // the cur node our Iterator points to | |||
| Iterator *lt_; | |||
| TensorMap *cur_row_; | |||
| }; | |||
| _Iterator begin() { return _Iterator(this); } | |||
| _Iterator end() { return _Iterator(nullptr); } | |||
| private: | |||
| // Runtime tree. | |||
| // Use shared_ptr instead of unique_ptr because the DatasetIterator constructor takes in a shared_ptr type. | |||
| std::shared_ptr<ExecutionTree> tree_; | |||
| // Runtime iterator | |||
| std::unique_ptr<DatasetIterator> iterator_; | |||
| }; | |||
| } // namespace api | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_INCLUDE_ITERATOR_H_ | |||
| @@ -0,0 +1,199 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_API_SAMPLERS_H_ | |||
| #define DATASET_API_SAMPLERS_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Internal Sampler class forward declaration | |||
| class Sampler; | |||
| namespace api { | |||
| class SamplerObj : public std::enable_shared_from_this<SamplerObj> { | |||
| public: | |||
| SamplerObj(); | |||
| ~SamplerObj() = default; | |||
| virtual std::shared_ptr<Sampler> Build() = 0; | |||
| virtual bool ValidateParams() = 0; | |||
| }; | |||
| class DistributedSamplerObj; | |||
| class PKSamplerObj; | |||
| class RandomSamplerObj; | |||
| class SequentialSamplerObj; | |||
| class SubsetRandomSamplerObj; | |||
| class WeightedRandomSamplerObj; | |||
| /// Function to create a Distributed Sampler. | |||
| /// \notes A Sampler that access a shard of the dataset. | |||
| /// \param[in] num_shards - Number of shards to divide the dataset into. | |||
| /// \param[in] shard_id - Shard ID of the current shard within num_shards. | |||
| /// \param[in] shuffle - If true, the indices are shuffled. | |||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||
| /// \param[in] seed - The seed in use when shuffle is true. | |||
| /// \return Shared pointer to the current Sampler. | |||
| std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, | |||
| int64_t num_samples = 0, uint32_t seed = 1); | |||
| /// Function to create a PK Sampler. | |||
| /// \notes Samples K elements for each P class in the dataset. | |||
| /// This will sample all classes. | |||
| /// \param[in] num_val - Number of elements to sample for each class. | |||
| /// \param[in] shuffle - If true, the class IDs are shuffled. | |||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||
| /// \return Shared pointer to the current Sampler. | |||
| std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0); | |||
| /// Function to create a Random Sampler. | |||
| /// \notes Samples the elements randomly. | |||
| /// \param[in] replacement - If True, put the sample ID back for the next draw. | |||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||
| /// \return Shared pointer to the current Sampler. | |||
| std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0); | |||
| /// Function to create a Sequential Sampler. | |||
| /// \notes Samples the dataset elements sequentially, same as not having a sampler. | |||
| /// \param[in] start_index - Index to start sampling at (dafault to start at first id). | |||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||
| /// \return Shared pointer to the current Sampler. | |||
| std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0); | |||
| /// Function to create a Subset Random Sampler. | |||
| /// \notes Samples the elements randomly from a sequence of indices. | |||
| /// \param[in] indices - A vector sequence of indices. | |||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||
| /// \return Shared pointer to the current Sampler. | |||
| std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, | |||
| int64_t num_samples = 0); | |||
| /// Function to create a Weighted Random Sampler. | |||
| /// \notes Samples the elements from [0, len(weights) - 1] randomly with the given | |||
| /// weights (probabilities). | |||
| /// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1. | |||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||
| /// \param[in] replacement - If True, put the sample ID back for the next draw. | |||
| /// \return Shared pointer to the current Sampler. | |||
| std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, | |||
| int64_t num_samples = 0, bool replacement = true); | |||
| /* ####################################### Derived Sampler classes ################################# */ | |||
| class DistributedSamplerObj : public SamplerObj { | |||
| public: | |||
| DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed); | |||
| ~DistributedSamplerObj() = default; | |||
| std::shared_ptr<Sampler> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| int64_t num_shards_; | |||
| int64_t shard_id_; | |||
| bool shuffle_; | |||
| int64_t num_samples_; | |||
| uint32_t seed_; | |||
| }; | |||
| class PKSamplerObj : public SamplerObj { | |||
| public: | |||
| PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples); | |||
| ~PKSamplerObj() = default; | |||
| std::shared_ptr<Sampler> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| int64_t num_val_; | |||
| bool shuffle_; | |||
| int64_t num_samples_; | |||
| }; | |||
| class RandomSamplerObj : public SamplerObj { | |||
| public: | |||
| RandomSamplerObj(bool replacement, int64_t num_samples); | |||
| ~RandomSamplerObj() = default; | |||
| std::shared_ptr<Sampler> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| bool replacement_; | |||
| int64_t num_samples_; | |||
| }; | |||
| class SequentialSamplerObj : public SamplerObj { | |||
| public: | |||
| SequentialSamplerObj(int64_t start_index, int64_t num_samples); | |||
| ~SequentialSamplerObj() = default; | |||
| std::shared_ptr<Sampler> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| int64_t start_index_; | |||
| int64_t num_samples_; | |||
| }; | |||
| class SubsetRandomSamplerObj : public SamplerObj { | |||
| public: | |||
| SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples); | |||
| ~SubsetRandomSamplerObj() = default; | |||
| std::shared_ptr<Sampler> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| const std::vector<int64_t> &indices_; | |||
| int64_t num_samples_; | |||
| }; | |||
| class WeightedRandomSamplerObj : public SamplerObj { | |||
| public: | |||
| explicit WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples = 0, | |||
| bool replacement = true); | |||
| ~WeightedRandomSamplerObj() = default; | |||
| std::shared_ptr<Sampler> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| const std::vector<double> &weights_; | |||
| int64_t num_samples_; | |||
| bool replacement_; | |||
| }; | |||
| } // namespace api | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_API_SAMPLERS_H_ | |||
| @@ -0,0 +1 @@ | |||
| ../util/status.h | |||
| @@ -0,0 +1 @@ | |||
| ../core/tensor.h | |||
| @@ -0,0 +1,380 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_API_TRANSFORMS_H_ | |||
| #define DATASET_API_TRANSFORMS_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "dataset/core/constants.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class TensorOp; | |||
| namespace api { | |||
| // Abstract class to represent a dataset in the data pipeline. | |||
| class TensorOperation : public std::enable_shared_from_this<TensorOperation> { | |||
| public: | |||
| /// \brief Constructor | |||
| TensorOperation(); | |||
| /// \brief Destructor | |||
| ~TensorOperation() = default; | |||
| /// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object. | |||
| /// \return shared pointer to the newly created TensorOp. | |||
| virtual std::shared_ptr<TensorOp> Build() = 0; | |||
| virtual bool ValidateParams() = 0; | |||
| }; | |||
| // Transform operations for performing computer vision. | |||
| namespace vision { | |||
| class NormalizeOperation; | |||
| class DecodeOperation; | |||
| class ResizeOperation; | |||
| class RandomCropOperation; | |||
| class CenterCropOperation; | |||
| class UniformAugOperation; | |||
| class RandomHorizontalFlipOperation; | |||
| class RandomVerticalFlipOperation; | |||
| class RandomRotationOperation; | |||
| class PadOperation; | |||
| class CutOutOperation; | |||
| class RandomColorAdjustOperation; | |||
| /// \brief Function to create a Normalize TensorOperation. | |||
| /// \notes Normalize the input image with respect to mean and standard deviation. | |||
| /// \param[in] mean - a vector of mean values for each channel, w.r.t channel order. | |||
| /// \param[in] std - a vector of standard deviations for each channel, w.r.t. channel order. | |||
| /// \return Shared pointer to the current TensorOperation. | |||
| std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std); | |||
| /// \brief Function to create a Decode TensorOperation. | |||
| /// \notes Decode the input image in RGB mode. | |||
| /// \param[in] rgb - a boolean of whether to decode in RGB mode or not. | |||
| /// \return Shared pointer to the current TensorOperation. | |||
| std::shared_ptr<DecodeOperation> Decode(bool rgb = true); | |||
| /// \brief Function to create a Resize TensorOperation. | |||
| /// \notes Resize the input image to the given size.. | |||
| /// \param[in] size - a vector representing the output size of the resized image. | |||
| /// If size is a single value, the image will be resized to this value with | |||
| /// the same image aspect ratio. If size has 2 values, it should be (height, width). | |||
| /// \param[in] interpolation An enum for the mode of interpolation | |||
| /// \return Shared pointer to the current TensorOperation. | |||
| std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, | |||
| InterpolationMode interpolation = InterpolationMode::kLinear); | |||
| /// \brief Function to create a RandomCrop TensorOperation. | |||
| /// \notes Crop the input image at a random location. | |||
| /// \param[in] size - a vector representing the output size of the cropped image. | |||
| /// If size is a single value, a square crop of size (size, size) is returned. | |||
| /// If size has 2 values, it should be (height, width). | |||
| /// \param[in] padding - a vector with the value of pixels to pad the image. If 4 values are provided, | |||
| /// it pads the left, top, right and bottom respectively. | |||
| /// \param[in] pad_if_needed - a boolean whether to pad the image if either side is smaller than | |||
| /// the given output size. | |||
| /// \param[in] fill_value - a vector representing the pixel intensity of the borders, it is used to | |||
| /// fill R, G, B channels respectively. | |||
| /// \return Shared pointer to the current TensorOperation. | |||
| std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0}, | |||
| bool pad_if_needed = false, | |||
| std::vector<uint8_t> fill_value = {0, 0, 0}); | |||
| /// \brief Function to create a CenterCrop TensorOperation. | |||
| /// \notes Crops the input image at the center to the given size. | |||
| /// \param[in] size - a vector representing the output size of the cropped image. | |||
| /// If size is a single value, a square crop of size (size, size) is returned. | |||
| /// If size has 2 values, it should be (height, width). | |||
| /// \return Shared pointer to the current TensorOperation. | |||
| std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size); | |||
| /// \brief Function to create a UniformAugment TensorOperation. | |||
| /// \notes Tensor operation to perform randomly selected augmentation. | |||
| /// \param[in] operations - a vector of TensorOperation operations. | |||
| /// \param[in] num_ops - integer representing the number of OPs to be selected and applied. | |||
| /// \return Shared pointer to the current TensorOperation. | |||
| std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations, | |||
| int32_t num_ops = 2); | |||
| /// \brief Function to create a RandomHorizontalFlip TensorOperation. | |||
| /// \notes Tensor operation to perform random horizontal flip. | |||
| /// \param[in] prob - float representing the probability of flip. | |||
| /// \return Shared pointer to the current TensorOperation. | |||
| std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob = 0.5); | |||
| /// \brief Function to create a RandomVerticalFlip TensorOperation. | |||
| /// \notes Tensor operation to perform random vertical flip. | |||
| /// \param[in] prob - float representing the probability of flip. | |||
| /// \return Shared pointer to the current TensorOperation. | |||
| std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob = 0.5); | |||
| /// \brief Function to create a RandomRotation TensorOp | |||
| /// \notes Rotates the image according to parameters | |||
| /// \param[in] degrees A float vector size 2, representing the starting and ending degree | |||
| /// \param[in] resample An enum for the mode of interpolation | |||
| /// \param[in] expand A boolean representing whether the image is expanded after rotation | |||
| /// \param[in] center A float vector size 2, representing the x and y center of rotation. | |||
| /// \param[in] fill_value A uint8_t vector size 3, representing the rgb value of the fill color | |||
| /// \return Shared pointer to the current TensorOp | |||
| std::shared_ptr<RandomRotationOperation> RandomRotation( | |||
| std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false, | |||
| std::vector<float> center = {-1, -1}, std::vector<uint8_t> fill_value = {0, 0, 0}); | |||
| /// \brief Function to create a Pad TensorOp | |||
| /// \notes Pads the image according to padding parameters | |||
| /// \param[in] padding A vector representing the number of pixels to pad the image | |||
| /// If vector has one value, it pads all sides of the image with that value | |||
| /// If vector has two values, it pads left and right with the first and | |||
| /// top and bottom with the second value | |||
| /// If vector has four values, it pads left, top, right, and bottom with | |||
| /// those values respectively | |||
| /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is | |||
| /// BorderType.kConstant. If 3 values are provided, | |||
| /// it is used to fill R, G, B channels respectively | |||
| /// \param[in] padding_mode The method of padding (default=BorderType.kConstant) | |||
| /// Can be any of | |||
| /// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric] | |||
| /// - BorderType.kConstant, means it fills the border with constant values | |||
| /// - BorderType.kEdge, means it pads with the last value on the edge | |||
| /// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge | |||
| /// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge | |||
| /// \return Shared pointer to the current TensorOp | |||
| std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0}, | |||
| BorderType padding_mode = BorderType::kConstant); | |||
| /// \brief Function to create a CutOut TensorOp | |||
| /// \notes Randomly cut (mask) out a given number of square patches from the input image | |||
| /// \param[in] length Integer representing the side length of each square patch | |||
| /// \param[in] num_patches Integer representing the number of patches to be cut out of an image | |||
| /// \return Shared pointer to the current TensorOp | |||
| std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches = 1); | |||
| /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image | |||
| /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values | |||
| /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} | |||
| /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values | |||
| /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} | |||
| /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values | |||
| /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} | |||
| /// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values | |||
| /// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5 | |||
| /// Default value is {0, 0} | |||
| /// \return Shared pointer to the current TensorOp | |||
| std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, | |||
| std::vector<float> contrast = {1.0, 1.0}, | |||
| std::vector<float> saturation = {1.0, 1.0}, | |||
| std::vector<float> hue = {0.0, 0.0}); | |||
| /* ####################################### Derived TensorOperation classes ################################# */ | |||
| class NormalizeOperation : public TensorOperation { | |||
| public: | |||
| NormalizeOperation(std::vector<float> mean, std::vector<float> std); | |||
| ~NormalizeOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<float> mean_; | |||
| std::vector<float> std_; | |||
| }; | |||
| class DecodeOperation : public TensorOperation { | |||
| public: | |||
| explicit DecodeOperation(bool rgb = true); | |||
| ~DecodeOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| bool rgb_; | |||
| }; | |||
| class ResizeOperation : public TensorOperation { | |||
| public: | |||
| explicit ResizeOperation(std::vector<int32_t> size, | |||
| InterpolationMode interpolation_mode = InterpolationMode::kLinear); | |||
| ~ResizeOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<int32_t> size_; | |||
| InterpolationMode interpolation_; | |||
| }; | |||
| class RandomCropOperation : public TensorOperation { | |||
| public: | |||
| RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0}, | |||
| bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0}); | |||
| ~RandomCropOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<int32_t> size_; | |||
| std::vector<int32_t> padding_; | |||
| bool pad_if_needed_; | |||
| std::vector<uint8_t> fill_value_; | |||
| }; | |||
| class CenterCropOperation : public TensorOperation { | |||
| public: | |||
| explicit CenterCropOperation(std::vector<int32_t> size); | |||
| ~CenterCropOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<int32_t> size_; | |||
| }; | |||
| class UniformAugOperation : public TensorOperation { | |||
| public: | |||
| explicit UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops = 2); | |||
| ~UniformAugOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<std::shared_ptr<TensorOperation>> operations_; | |||
| int32_t num_ops_; | |||
| }; | |||
| class RandomHorizontalFlipOperation : public TensorOperation { | |||
| public: | |||
| explicit RandomHorizontalFlipOperation(float probability = 0.5); | |||
| ~RandomHorizontalFlipOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| float probability_; | |||
| }; | |||
| class RandomVerticalFlipOperation : public TensorOperation { | |||
| public: | |||
| explicit RandomVerticalFlipOperation(float probability = 0.5); | |||
| ~RandomVerticalFlipOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| float probability_; | |||
| }; | |||
| class RandomRotationOperation : public TensorOperation { | |||
| public: | |||
| RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand, | |||
| std::vector<float> center, std::vector<uint8_t> fill_value); | |||
| ~RandomRotationOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<float> degrees_; | |||
| InterpolationMode interpolation_mode_; | |||
| std::vector<float> center_; | |||
| bool expand_; | |||
| std::vector<uint8_t> fill_value_; | |||
| }; | |||
| class PadOperation : public TensorOperation { | |||
| public: | |||
| PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0}, | |||
| BorderType padding_mode = BorderType::kConstant); | |||
| ~PadOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<int32_t> padding_; | |||
| std::vector<uint8_t> fill_value_; | |||
| BorderType padding_mode_; | |||
| }; | |||
| class CutOutOperation : public TensorOperation { | |||
| public: | |||
| explicit CutOutOperation(int32_t length, int32_t num_patches = 1); | |||
| ~CutOutOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| int32_t length_; | |||
| int32_t num_patches_; | |||
| }; | |||
| class RandomColorAdjustOperation : public TensorOperation { | |||
| public: | |||
| RandomColorAdjustOperation(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0}, | |||
| std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0}); | |||
| ~RandomColorAdjustOperation() = default; | |||
| std::shared_ptr<TensorOp> Build() override; | |||
| bool ValidateParams() override; | |||
| private: | |||
| std::vector<float> brightness_; | |||
| std::vector<float> contrast_; | |||
| std::vector<float> saturation_; | |||
| std::vector<float> hue_; | |||
| }; | |||
| } // namespace vision | |||
| } // namespace api | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // DATASET_API_TRANSFORMS_H_ | |||
| @@ -0,0 +1 @@ | |||
| ../../../utils/log_adapter.h | |||
| @@ -0,0 +1 @@ | |||
| ../../../utils/overload.h | |||
| @@ -2,7 +2,13 @@ add_subdirectory(image) | |||
| add_subdirectory(data) | |||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | |||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | |||
| add_library(kernels OBJECT | |||
| py_func_op.cc | |||
| tensor_op.cc) | |||
| target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS}) | |||
| if (ENABLE_PYTHON) | |||
| add_library(kernels OBJECT | |||
| py_func_op.cc | |||
| tensor_op.cc) | |||
| target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS}) | |||
| else() | |||
| add_library(kernels OBJECT | |||
| tensor_op.cc) | |||
| endif() | |||
| @@ -23,7 +23,9 @@ | |||
| #include "dataset/core/constants.h" | |||
| #include "dataset/core/data_type.h" | |||
| #ifdef ENABLE_PYTHON | |||
| #include "dataset/core/pybind_support.h" | |||
| #endif | |||
| #include "dataset/core/tensor.h" | |||
| #include "dataset/core/tensor_shape.h" | |||
| #include "dataset/kernels/data/type_cast_op.h" | |||
| @@ -729,7 +729,6 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output | |||
| int num_channels = input_cv->shape()[2]; | |||
| if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2); | |||
| *output = std::static_pointer_cast<Tensor>(output_cv); | |||
| return Status::OK(); | |||
| } catch (const cv::Exception &e) { | |||
| RETURN_STATUS_UNEXPECTED("Unexpected error in pad"); | |||
| @@ -35,10 +35,6 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; | |||
| enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; | |||
| void JpegErrorExitCustom(j_common_ptr cinfo); | |||
| struct JpegErrorManagerCustom { | |||
| @@ -16,6 +16,7 @@ | |||
| #include "dataset/kernels/image/pad_op.h" | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include "dataset/core/constants.h" | |||
| #include "dataset/util/status.h" | |||
| namespace mindspore { | |||
| @@ -21,7 +21,7 @@ | |||
| #include "dataset/core/tensor.h" | |||
| #include "dataset/kernels/tensor_op.h" | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include "dataset/core/constants.h" | |||
| #include "dataset/util/status.h" | |||
| namespace mindspore { | |||
| @@ -18,7 +18,6 @@ | |||
| #include "dataset/kernels/image/image_utils.h" | |||
| #include "dataset/util/status.h" | |||
| #include "dataset/core/cv_tensor.h" | |||
| #include "dataset/core/pybind_support.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -16,8 +16,6 @@ | |||
| #ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ | |||
| #define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ | |||
| #include <pybind11/numpy.h> | |||
| #include <pybind11/stl.h> | |||
| #include <memory> | |||
| #include <random> | |||
| #include <cstdlib> | |||
| @@ -26,8 +24,6 @@ | |||
| #include "dataset/kernels/tensor_op.h" | |||
| #include "dataset/util/random.h" | |||
| #include "dataset/util/status.h" | |||
| #include "pybind11/pybind11.h" | |||
| #include "pybind11/stl_bind.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -27,7 +27,6 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| namespace py = pybind11; | |||
| class NgramOp : public TensorOp { | |||
| public: | |||
| @@ -32,7 +32,15 @@ if(ENABLE_MINDDATA) | |||
| endif() | |||
| # fetch ut test files | |||
| if(ENABLE_MINDDATA) | |||
| file(GLOB_RECURSE UT_SRCS ./*.cc) | |||
| file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./*.cc) | |||
| if(NOT ENABLE_PYTHON) | |||
| set(PYTHON_RELATED_SRCS | |||
| dataset/filter_op_test.cc | |||
| dataset/voc_op_test.cc | |||
| dataset/manifest_op_test.cc | |||
| ) | |||
| list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS}) | |||
| endif() | |||
| else() | |||
| file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc) | |||
| foreach(OBJ ${TEMP_UT_SRCS}) | |||
| @@ -90,6 +90,7 @@ SET(DE_UT_SRCS | |||
| concatenate_op_test.cc | |||
| cyclic_array_test.cc | |||
| perf_data_test.cc | |||
| c_api_test.cc | |||
| ) | |||
| add_executable(de_ut_tests ${DE_UT_SRCS}) | |||
| @@ -0,0 +1,771 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <fstream> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "utils/log_adapter.h" | |||
| #include "common/utils.h" | |||
| #include "common/common.h" | |||
| #include "gtest/gtest.h" | |||
| #include "securec.h" | |||
| #include "dataset/include/datasets.h" | |||
| #include "dataset/include/status.h" | |||
| #include "dataset/include/transforms.h" | |||
| #include "dataset/include/iterator.h" | |||
| #include "dataset/core/constants.h" | |||
| #include "dataset/include/samplers.h" | |||
| using namespace mindspore::dataset::api; | |||
| using mindspore::MsLogLevel::ERROR; | |||
| using mindspore::ExceptionType::NoExceptionType; | |||
| using mindspore::LogStream; | |||
| using mindspore::dataset::Tensor; | |||
| using mindspore::dataset::Status; | |||
| using mindspore::dataset::BorderType; | |||
| class MindDataTestPipeline : public UT::DatasetOpTesting { | |||
| protected: | |||
| }; | |||
| TEST_F(MindDataTestPipeline, TestBatchAndRepeat) { | |||
| // Create a Mnist Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 2; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 10); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) { | |||
| // Create a Mnist Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30}); | |||
| EXPECT_TRUE(resize_op != nullptr); | |||
| std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16}); | |||
| EXPECT_TRUE(center_crop_op != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({resize_op, center_crop_op}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 40); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestUniformAugWithOps) { | |||
| // Create a Mnist Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 1; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30}); | |||
| EXPECT_TRUE(resize_op != nullptr); | |||
| std::shared_ptr<TensorOperation> random_crop_op = vision::RandomCrop({28, 28}); | |||
| EXPECT_TRUE(random_crop_op != nullptr); | |||
| std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16}); | |||
| EXPECT_TRUE(center_crop_op != nullptr); | |||
| std::shared_ptr<TensorOperation> uniform_aug_op = vision::UniformAugment({random_crop_op, center_crop_op}, 2); | |||
| EXPECT_TRUE(uniform_aug_op != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({resize_op, uniform_aug_op}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRandomFlip) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5); | |||
| EXPECT_TRUE(random_vertical_flip_op != nullptr); | |||
| std::shared_ptr<TensorOperation> random_horizontal_flip_op = vision::RandomHorizontalFlip(0.5); | |||
| EXPECT_TRUE(random_horizontal_flip_op != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({random_vertical_flip_op, random_horizontal_flip_op}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 2; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 10); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { | |||
| std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1); | |||
| EXPECT_NE(sampl, nullptr); | |||
| sampl = PKSampler(3); | |||
| EXPECT_NE(sampl, nullptr); | |||
| sampl = RandomSampler(false, 12); | |||
| EXPECT_NE(sampl, nullptr); | |||
| sampl = SequentialSampler(0, 12); | |||
| EXPECT_NE(sampl, nullptr); | |||
| std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1}; | |||
| sampl = WeightedRandomSampler(weights, 12); | |||
| EXPECT_NE(sampl, nullptr); | |||
| std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23}; | |||
| sampl = SubsetRandomSampler(indices); | |||
| EXPECT_NE(sampl, nullptr); | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 2; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_NE(ds, nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_NE(iter, nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 12); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestPad) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> pad_op1 = vision::Pad({1, 2, 3, 4}, {0}, BorderType::kSymmetric); | |||
| EXPECT_TRUE(pad_op1 != nullptr); | |||
| std::shared_ptr<TensorOperation> pad_op2 = vision::Pad({1}, {1, 1, 1}, BorderType::kEdge); | |||
| EXPECT_TRUE(pad_op2 != nullptr); | |||
| std::shared_ptr<TensorOperation> pad_op3 = vision::Pad({1, 4}); | |||
| EXPECT_TRUE(pad_op3 != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({pad_op1, pad_op2, pad_op3}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestCutOut) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> cut_out1 = vision::CutOut(30, 5); | |||
| EXPECT_TRUE(cut_out1!= nullptr); | |||
| std::shared_ptr<TensorOperation> cut_out2 = vision::CutOut(30); | |||
| EXPECT_TRUE(cut_out2 != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({cut_out1, cut_out2}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestNormalize) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> normalize = vision::Normalize({121.0, 115.0, 100.0}, {70.0, 68.0, 71.0}); | |||
| EXPECT_TRUE(normalize != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({normalize}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestDecode) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> decode = vision::Decode(true); | |||
| EXPECT_TRUE(decode != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({decode}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_EQ(i, 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestShuffleDataset) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Shuffle operation on ds | |||
| int32_t shuffle_size = 10; | |||
| ds = ds->Shuffle(shuffle_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 2; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 10); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestCifar10Dataset) { | |||
| // Create a Cifar10 Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; | |||
| std::shared_ptr<Dataset> ds = Cifar10(folder_path, 0, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 2; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 10); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRandomColorAdjust) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> random_color_adjust1 = vision::RandomColorAdjust({1.0}, {0.0}, {0.5}, {0.5}); | |||
| EXPECT_TRUE(random_color_adjust1 != nullptr); | |||
| std::shared_ptr<TensorOperation> random_color_adjust2 = vision::RandomColorAdjust({1.0, 1.0}, {0.0, 0.0}, {0.5, 0.5}, | |||
| {0.5, 0.5}); | |||
| EXPECT_TRUE(random_color_adjust2 != nullptr); | |||
| std::shared_ptr<TensorOperation> random_color_adjust3 = vision::RandomColorAdjust({0.5, 1.0}, {0.0, 0.5}, {0.25, 0.5}, | |||
| {0.25, 0.5}); | |||
| EXPECT_TRUE(random_color_adjust3 != nullptr); | |||
| std::shared_ptr<TensorOperation> random_color_adjust4 = vision::RandomColorAdjust(); | |||
| EXPECT_TRUE(random_color_adjust4 != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({random_color_adjust1, random_color_adjust2, random_color_adjust3, random_color_adjust4}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestRandomRotation) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> random_rotation_op = vision::RandomRotation({-180, 180}); | |||
| EXPECT_TRUE(random_rotation_op != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({random_rotation_op}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| TEST_F(MindDataTestPipeline, TestProjectMap) { | |||
| // Create an ImageFolder Dataset | |||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 2; | |||
| ds = ds->Repeat(repeat_num); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5); | |||
| EXPECT_TRUE(random_vertical_flip_op != nullptr); | |||
| // Create a Map operation on ds | |||
| ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"}); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Project operation on ds | |||
| std::vector<std::string> column_project = {"label"}; | |||
| ds = ds->Project(column_project); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create a Batch operation on ds | |||
| int32_t batch_size = 1; | |||
| ds = ds->Batch(batch_size); | |||
| EXPECT_TRUE(ds != nullptr); | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| EXPECT_TRUE(iter != nullptr); | |||
| // Iterate the dataset and get each row | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| iter->GetNextRow(&row); | |||
| uint64_t i = 0; | |||
| while (row.size() != 0) { | |||
| i++; | |||
| auto image = row["image"]; | |||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||
| iter->GetNextRow(&row); | |||
| } | |||
| EXPECT_TRUE(i == 20); | |||
| // Manually terminate the pipeline | |||
| iter->Stop(); | |||
| } | |||
| @@ -23,8 +23,6 @@ | |||
| using namespace mindspore::dataset; | |||
| namespace py = pybind11; | |||
| class MindDataTestDatatype : public UT::Common { | |||
| public: | |||
| MindDataTestDatatype() = default; | |||