| @@ -17,6 +17,10 @@ else() | |||||
| set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") | set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") | ||||
| endif() | endif() | ||||
| if (ENABLE_PYTHON) | |||||
| add_compile_definitions(ENABLE_PYTHON) | |||||
| endif() | |||||
| set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp") | set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp") | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC") | ||||
| @@ -25,7 +25,7 @@ usage() | |||||
| echo "Usage:" | echo "Usage:" | ||||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | ||||
| echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | ||||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]" | |||||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" | |||||
| echo "" | echo "" | ||||
| echo "Options:" | echo "Options:" | ||||
| echo " -d Debug mode" | echo " -d Debug mode" | ||||
| @@ -56,6 +56,7 @@ usage() | |||||
| echo " -s Enable serving module, default off" | echo " -s Enable serving module, default off" | ||||
| echo " -B Enable debugger, default off" | echo " -B Enable debugger, default off" | ||||
| echo " -E Enable IBVERBS for parameter server, default off" | echo " -E Enable IBVERBS for parameter server, default off" | ||||
| echo " -l Compile with python dependency, default on" | |||||
| } | } | ||||
| # check value of input is 'on' or 'off' | # check value of input is 'on' or 'off' | ||||
| @@ -98,9 +99,10 @@ checkopts() | |||||
| ENABLE_SERVING="off" | ENABLE_SERVING="off" | ||||
| ENABLE_DEBUGGER="off" | ENABLE_DEBUGGER="off" | ||||
| ENABLE_IBVERBS="off" | ENABLE_IBVERBS="off" | ||||
| ENABLE_PYTHON="on" | |||||
| # Process the options | # Process the options | ||||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt | |||||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt | |||||
| do | do | ||||
| OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') | ||||
| case "${opt}" in | case "${opt}" in | ||||
| @@ -151,6 +153,10 @@ checkopts() | |||||
| check_on_off $OPTARG p | check_on_off $OPTARG p | ||||
| ENABLE_PROFILE="$OPTARG" | ENABLE_PROFILE="$OPTARG" | ||||
| ;; | ;; | ||||
| l) | |||||
| check_on_off $OPTARG l | |||||
| ENABLE_PYTHON="$OPTARG" | |||||
| ;; | |||||
| i) | i) | ||||
| INC_BUILD="on" | INC_BUILD="on" | ||||
| ;; | ;; | ||||
| @@ -316,6 +322,7 @@ build_mindspore() | |||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" | ||||
| fi | fi | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" | |||||
| if [[ "X$ENABLE_MPI" = "Xon" ]]; then | if [[ "X$ENABLE_MPI" = "Xon" ]]; then | ||||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON" | CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON" | ||||
| fi | fi | ||||
| @@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF) | |||||
| option(ENABLE_AKG "enable akg" OFF) | option(ENABLE_AKG "enable akg" OFF) | ||||
| option(ENABLE_DEBUGGER "enable debugger" OFF) | option(ENABLE_DEBUGGER "enable debugger" OFF) | ||||
| option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF) | option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF) | ||||
| option(ENABLE_PYTHON "Enable python" ON) | |||||
| if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") | if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") | ||||
| if (WIN32) | if (WIN32) | ||||
| @@ -39,6 +39,7 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform) | |||||
| include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h | include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h | ||||
| include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include) | include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include) | ||||
| include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/dataset/include) | |||||
| ###################################################################### | ###################################################################### | ||||
| ####################### Flags ######################################## | ####################### Flags ######################################## | ||||
| @@ -67,7 +68,10 @@ add_dependencies(engine-gnn core) | |||||
| add_dependencies(engine core) | add_dependencies(engine core) | ||||
| add_dependencies(text core) | add_dependencies(text core) | ||||
| add_dependencies(text-kernels core) | add_dependencies(text-kernels core) | ||||
| add_dependencies(APItoPython core) | |||||
| add_dependencies(cpp-API core) | |||||
| if (ENABLE_PYTHON) | |||||
| add_dependencies(APItoPython core) | |||||
| endif() | |||||
| if (ENABLE_TDTQUE) | if (ENABLE_TDTQUE) | ||||
| add_dependencies(engine-tdt core) | add_dependencies(engine-tdt core) | ||||
| endif () | endif () | ||||
| @@ -78,7 +82,7 @@ set(submodules | |||||
| $<TARGET_OBJECTS:kernels> | $<TARGET_OBJECTS:kernels> | ||||
| $<TARGET_OBJECTS:kernels-image> | $<TARGET_OBJECTS:kernels-image> | ||||
| $<TARGET_OBJECTS:kernels-data> | $<TARGET_OBJECTS:kernels-data> | ||||
| $<TARGET_OBJECTS:APItoPython> | |||||
| $<TARGET_OBJECTS:cpp-API> | |||||
| $<TARGET_OBJECTS:engine-datasetops-source> | $<TARGET_OBJECTS:engine-datasetops-source> | ||||
| $<TARGET_OBJECTS:engine-datasetops-source-sampler> | $<TARGET_OBJECTS:engine-datasetops-source-sampler> | ||||
| $<TARGET_OBJECTS:engine-gnn> | $<TARGET_OBJECTS:engine-gnn> | ||||
| @@ -90,6 +94,12 @@ set(submodules | |||||
| $<TARGET_OBJECTS:text-kernels> | $<TARGET_OBJECTS:text-kernels> | ||||
| ) | ) | ||||
| if (ENABLE_PYTHON) | |||||
| set(submodules | |||||
| ${submodules} | |||||
| $<TARGET_OBJECTS:APItoPython>) | |||||
| endif() | |||||
| if (ENABLE_TDTQUE) | if (ENABLE_TDTQUE) | ||||
| add_library(_c_dataengine SHARED ${submodules} $<TARGET_OBJECTS:engine-tdt>) | add_library(_c_dataengine SHARED ${submodules} $<TARGET_OBJECTS:engine-tdt>) | ||||
| else () | else () | ||||
| @@ -1,7 +1,16 @@ | |||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | ||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | ||||
| add_library(APItoPython OBJECT | |||||
| de_pipeline.cc | |||||
| python_bindings.cc | |||||
| if (ENABLE_PYTHON) | |||||
| add_library(APItoPython OBJECT | |||||
| de_pipeline.cc | |||||
| python_bindings.cc | |||||
| ) | |||||
| target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS}) | |||||
| endif() | |||||
| add_library(cpp-API OBJECT | |||||
| datasets.cc | |||||
| iterator.cc | |||||
| transforms.cc | |||||
| samplers.cc | |||||
| ) | ) | ||||
| target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS}) | |||||
| @@ -0,0 +1,446 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <fstream> | |||||
| #include "dataset/include/datasets.h" | |||||
| #include "dataset/include/transforms.h" | |||||
| #include "dataset/include/samplers.h" | |||||
| #include "dataset/engine/dataset_iterator.h" | |||||
| #include "dataset/engine/datasetops/source/image_folder_op.h" | |||||
| #include "dataset/engine/datasetops/source/mnist_op.h" | |||||
| #include "dataset/engine/datasetops/source/cifar_op.h" | |||||
| #include "dataset/engine/datasetops/batch_op.h" | |||||
| #include "dataset/engine/datasetops/map_op.h" | |||||
| #include "dataset/engine/datasetops/repeat_op.h" | |||||
| #include "dataset/engine/datasetops/shuffle_op.h" | |||||
| #include "dataset/engine/datasetops/project_op.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/sampler.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/random_sampler.h" | |||||
| #include "dataset/core/config_manager.h" | |||||
| #include "dataset/util/random.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| namespace api { | |||||
| #define RETURN_NULL_IF_ERROR(_s) \ | |||||
| do { \ | |||||
| Status __rc = (_s); \ | |||||
| if (__rc.IsError()) { \ | |||||
| return nullptr; \ | |||||
| } \ | |||||
| } while (false) | |||||
| // Function to create the iterator, which will build and launch the execution tree. | |||||
| std::shared_ptr<Iterator> Dataset::CreateIterator() { | |||||
| std::shared_ptr<Iterator> iter; | |||||
| try { | |||||
| iter = std::make_shared<Iterator>(); | |||||
| Status rc = iter->BuildAndLaunchTree(shared_from_this()); | |||||
| if (rc.IsError()) { | |||||
| MS_LOG(ERROR) << "CreateIterator failed."; | |||||
| return nullptr; | |||||
| } | |||||
| return iter; | |||||
| } catch (const std::exception &err) { | |||||
| MS_LOG(ERROR) << "CreateIterator: Iterator exception caught: " << err.what(); | |||||
| return nullptr; | |||||
| } | |||||
| return iter; | |||||
| } | |||||
| // Constructor | |||||
| Dataset::Dataset() { | |||||
| // Fetch some default value from config manager | |||||
| std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); | |||||
| num_workers_ = cfg->num_parallel_workers(); | |||||
| rows_per_buffer_ = cfg->rows_per_buffer(); | |||||
| connector_que_size_ = cfg->op_connector_size(); | |||||
| } | |||||
| // Function to create a ImageFolderDataset. | |||||
| std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode, | |||||
| std::shared_ptr<SamplerObj> sampler, std::set<std::string> extensions, | |||||
| std::map<std::string, int32_t> class_indexing) { | |||||
| // This arg is exist in ImageFolderOp, but not externalized (in Python API). The default value is false. | |||||
| bool recursive = false; | |||||
| // Create logical representation of ImageFolderDataset. | |||||
| auto ds = std::make_shared<ImageFolderDataset>(dataset_dir, decode, sampler, recursive, extensions, class_indexing); | |||||
| // Call derived class validation method. | |||||
| return ds->ValidateParams() ? ds : nullptr; | |||||
| } | |||||
| // Function to create a MnistDataset. | |||||
| std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) { | |||||
| auto ds = std::make_shared<MnistDataset>(dataset_dir, sampler); | |||||
| // Call derived class validation method. | |||||
| return ds->ValidateParams() ? ds : nullptr; | |||||
| } | |||||
| // Function to create a Cifar10Dataset. | |||||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples, | |||||
| std::shared_ptr<SamplerObj> sampler) { | |||||
| auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, num_samples, sampler); | |||||
| // Call derived class validation method. | |||||
| return ds->ValidateParams() ? ds : nullptr; | |||||
| } | |||||
| // Function to create a Batch dataset | |||||
| std::shared_ptr<BatchDataset> Dataset::Batch(int32_t batch_size, bool drop_remainder) { | |||||
| // Default values | |||||
| std::vector<std::string> cols_to_map = {}; | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map; | |||||
| bool pad = false; | |||||
| auto ds = std::make_shared<BatchDataset>(batch_size, drop_remainder, pad, cols_to_map, pad_map); | |||||
| if (!ds->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| ds->children.push_back(shared_from_this()); | |||||
| return ds; | |||||
| } | |||||
| // Function to create Repeat dataset. | |||||
| std::shared_ptr<Dataset> Dataset::Repeat(int32_t count) { | |||||
| // Workaround for repeat == 1, do not inject repeat. | |||||
| if (count == 1) { | |||||
| return shared_from_this(); | |||||
| } | |||||
| auto ds = std::make_shared<RepeatDataset>(count); | |||||
| if (!ds->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| ds->children.push_back(shared_from_this()); | |||||
| return ds; | |||||
| } | |||||
| // Function to create a Map dataset. | |||||
| std::shared_ptr<MapDataset> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations, | |||||
| std::vector<std::string> input_columns, | |||||
| std::vector<std::string> output_columns, | |||||
| const std::vector<std::string> &project_columns) { | |||||
| auto ds = std::make_shared<MapDataset>(operations, input_columns, output_columns, project_columns); | |||||
| if (!ds->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| ds->children.push_back(shared_from_this()); | |||||
| return ds; | |||||
| } | |||||
| // Function to create a ShuffleOp | |||||
| std::shared_ptr<ShuffleDataset> Dataset::Shuffle(int32_t shuffle_size) { | |||||
| // Pass in reshuffle_each_epoch with true | |||||
| auto ds = std::make_shared<ShuffleDataset>(shuffle_size, true); | |||||
| if (!ds->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| ds->children.push_back(shared_from_this()); | |||||
| return ds; | |||||
| } | |||||
| // Function to create a ProjectDataset. | |||||
| std::shared_ptr<ProjectDataset> Dataset::Project(const std::vector<std::string> &columns) { | |||||
| auto ds = std::make_shared<ProjectDataset>(columns); | |||||
| // Call derived class validation method. | |||||
| if (!ds->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| ds->children.push_back(shared_from_this()); | |||||
| return ds; | |||||
| } | |||||
| // Helper function to create default RandomSampler. | |||||
| std::shared_ptr<SamplerObj> CreateDefaultSampler() { | |||||
| int32_t num_samples = 0; // 0 means to sample all ids. | |||||
| bool replacement = false; | |||||
| return std::make_shared<RandomSamplerObj>(replacement, num_samples); | |||||
| } | |||||
| /* ####################################### Derived Dataset classes ################################# */ | |||||
| ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, | |||||
| bool recursive, std::set<std::string> extensions, | |||||
| std::map<std::string, int32_t> class_indexing) | |||||
| : dataset_dir_(dataset_dir), | |||||
| decode_(decode), | |||||
| sampler_(sampler), | |||||
| recursive_(recursive), | |||||
| class_indexing_(class_indexing), | |||||
| exts_(extensions) {} | |||||
| bool ImageFolderDataset::ValidateParams() { | |||||
| if (dataset_dir_.empty()) { | |||||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ImageFolderDataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| // If user does not specify Sampler, create a default sampler, i.e., RandomSampler. | |||||
| if (sampler_ == nullptr) { | |||||
| sampler_ = CreateDefaultSampler(); | |||||
| } | |||||
| // Do internal Schema generation. | |||||
| // This arg is exist in ImageFolderOp, but not externalized (in Python API). | |||||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||||
| TensorShape scalar = TensorShape::CreateScalar(); | |||||
| RETURN_NULL_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||||
| RETURN_NULL_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar))); | |||||
| node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||||
| recursive_, decode_, exts_, class_indexing_, std::move(schema), | |||||
| std::move(sampler_->Build()))); | |||||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||||
| } | |||||
| MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) | |||||
| : dataset_dir_(dataset_dir), sampler_(sampler) {} | |||||
| bool MnistDataset::ValidateParams() { | |||||
| if (dataset_dir_.empty()) { | |||||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MnistDataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| // If user does not specify Sampler, create a default sampler, i.e., RandomSampler. | |||||
| if (sampler_ == nullptr) { | |||||
| sampler_ = CreateDefaultSampler(); | |||||
| } | |||||
| // Do internal Schema generation. | |||||
| auto schema = std::make_unique<DataSchema>(); | |||||
| RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||||
| TensorShape scalar = TensorShape::CreateScalar(); | |||||
| RETURN_NULL_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); | |||||
| node_ops.push_back(std::make_shared<MnistOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, | |||||
| std::move(schema), std::move(sampler_->Build()))); | |||||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||||
| } | |||||
| BatchDataset::BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map, | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map) | |||||
| : batch_size_(batch_size), | |||||
| drop_remainder_(drop_remainder), | |||||
| pad_(pad), | |||||
| cols_to_map_(cols_to_map), | |||||
| pad_map_(pad_map) {} | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> BatchDataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| #ifdef ENABLE_PYTHON | |||||
| py::function noop; | |||||
| node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, | |||||
| cols_to_map_, noop, noop, pad_map_)); | |||||
| #else | |||||
| node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, | |||||
| cols_to_map_, pad_map_)); | |||||
| #endif | |||||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||||
| } | |||||
| bool BatchDataset::ValidateParams() { | |||||
| if (batch_size_ <= 0) { | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| RepeatDataset::RepeatDataset(uint32_t count) : repeat_count_(count) {} | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> RepeatDataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| node_ops.push_back(std::make_shared<RepeatOp>(repeat_count_)); | |||||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||||
| } | |||||
| bool RepeatDataset::ValidateParams() { | |||||
| if (repeat_count_ <= 0) { | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| MapDataset::MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns, | |||||
| std::vector<std::string> output_columns, const std::vector<std::string> &project_columns) | |||||
| : operations_(operations), | |||||
| input_columns_(input_columns), | |||||
| output_columns_(output_columns), | |||||
| project_columns_(project_columns) {} | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MapDataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| // Currently default is true, and this is not exposed to user. | |||||
| bool perf_mode = true; | |||||
| std::vector<std::shared_ptr<TensorOp>> tensor_ops; | |||||
| // Build tensorOp from tensorOperation vector | |||||
| // This is to ensure each iterator hold its own copy of the tensorOp objects. | |||||
| (void)std::transform( | |||||
| operations_.begin(), operations_.end(), std::back_inserter(tensor_ops), | |||||
| [](std::shared_ptr<TensorOperation> operation) -> std::shared_ptr<TensorOp> { return operation->Build(); }); | |||||
| // This parameter will be removed with next rebase | |||||
| std::vector<std::string> col_orders; | |||||
| auto map_op = | |||||
| std::make_shared<MapOp>(input_columns_, output_columns_, tensor_ops, num_workers_, connector_que_size_, perf_mode); | |||||
| if (!project_columns_.empty()) { | |||||
| auto project_op = std::make_shared<ProjectOp>(project_columns_); | |||||
| node_ops.push_back(project_op); | |||||
| } | |||||
| node_ops.push_back(map_op); | |||||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||||
| } | |||||
| bool MapDataset::ValidateParams() { | |||||
| if (operations_.empty()) { | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| // Constructor for ShuffleDataset | |||||
| ShuffleDataset::ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch) | |||||
| : shuffle_size_(shuffle_size), shuffle_seed_(GetSeed()), reset_every_epoch_(reset_every_epoch) {} | |||||
| // Function to build the ShuffleOp | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ShuffleDataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| node_ops.push_back(std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_, | |||||
| rows_per_buffer_)); | |||||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||||
| } | |||||
| // Function to validate the parameters for ShuffleDataset | |||||
| bool ShuffleDataset::ValidateParams() { | |||||
| if (shuffle_size_ <= 1) { | |||||
| MS_LOG(ERROR) << "ShuffleDataset: Invalid input, shuffle_size: " << shuffle_size_; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| // Constructor for Cifar10Dataset | |||||
| Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler) | |||||
| : dataset_dir_(dataset_dir), num_samples_(num_samples), sampler_(sampler) {} | |||||
| bool Cifar10Dataset::ValidateParams() { | |||||
| if (dataset_dir_.empty()) { | |||||
| MS_LOG(ERROR) << "No dataset path is specified."; | |||||
| return false; | |||||
| } | |||||
| if (num_samples_ < 0) { | |||||
| MS_LOG(ERROR) << "Number of samples cannot be negative"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| // Function to build CifarOp | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Cifar10Dataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| // If user does not specify Sampler, create a default sampler based on the shuffle variable. | |||||
| if (sampler_ == nullptr) { | |||||
| sampler_ = CreateDefaultSampler(); | |||||
| } | |||||
| // Do internal Schema generation. | |||||
| auto schema = std::make_unique<DataSchema>(); | |||||
| RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||||
| TensorShape scalar = TensorShape::CreateScalar(); | |||||
| RETURN_NULL_IF_ERROR( | |||||
| schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); | |||||
| node_ops.push_back(std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, num_workers_, rows_per_buffer_, | |||||
| dataset_dir_, connector_que_size_, std::move(schema), | |||||
| std::move(sampler_->Build()))); | |||||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||||
| } | |||||
| // Function to build ProjectOp | |||||
| ProjectDataset::ProjectDataset(const std::vector<std::string> &columns) : columns_(columns) {} | |||||
| bool ProjectDataset::ValidateParams() { | |||||
| if (columns_.empty()) { | |||||
| MS_LOG(ERROR) << "No columns are specified."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ProjectDataset::Build() { | |||||
| // A vector containing shared pointer to the Dataset Ops that this object will create | |||||
| std::vector<std::shared_ptr<DatasetOp>> node_ops; | |||||
| node_ops.push_back(std::make_shared<ProjectOp>(columns_)); | |||||
| return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops); | |||||
| } | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,101 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "dataset/include/iterator.h" | |||||
| #include "dataset/core/client.h" | |||||
| #include "dataset/include/datasets.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| namespace api { | |||||
| // Get the next row from the data pipeline. | |||||
| void Iterator::GetNextRow(TensorMap *row) { | |||||
| Status rc = iterator_->GetNextAsMap(row); | |||||
| if (rc.IsError()) { | |||||
| MS_LOG(ERROR) << "GetNextRow: Failed to get next row."; | |||||
| row->clear(); | |||||
| } | |||||
| } | |||||
| // Shut down the data pipeline. | |||||
| void Iterator::Stop() { | |||||
| // Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_. | |||||
| iterator_.reset(); | |||||
| // Release ownership of tree_ shared pointer. This will decrement the ref count. | |||||
| tree_.reset(); | |||||
| } | |||||
| // Function to build and launch the execution tree. | |||||
| Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds) { | |||||
| // One time init | |||||
| Status rc; | |||||
| rc = GlobalInit(); | |||||
| RETURN_IF_NOT_OK(rc); | |||||
| // Instantiate the execution tree | |||||
| tree_ = std::make_shared<ExecutionTree>(); | |||||
| // Iterative BFS converting Dataset tree into runtime Execution tree. | |||||
| std::queue<std::pair<std::shared_ptr<Dataset>, std::shared_ptr<DatasetOp>>> q; | |||||
| if (ds != nullptr) { | |||||
| // Convert the current root node. | |||||
| auto root_op = ds->Build()->front(); | |||||
| RETURN_UNEXPECTED_IF_NULL(root_op); | |||||
| RETURN_IF_NOT_OK(tree_->AssociateNode(root_op)); | |||||
| q.push(std::make_pair(ds, root_op)); | |||||
| // Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes) | |||||
| while (!q.empty()) { | |||||
| auto node_pair = q.front(); | |||||
| q.pop(); | |||||
| // Iterate through all the direct children of the first element in our BFS queue | |||||
| for (auto child : node_pair.first->children) { | |||||
| auto child_ops = child->Build(); | |||||
| RETURN_UNEXPECTED_IF_NULL(child_ops); | |||||
| auto node_op = node_pair.second; | |||||
| // Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them | |||||
| // with the execution tree and add the child and parent relationship between the nodes | |||||
| // Note that some Dataset objects might return more than one DatasetOps | |||||
| // e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset | |||||
| for (auto child_op : *child_ops) { | |||||
| RETURN_IF_NOT_OK(tree_->AssociateNode(child_op)); | |||||
| RETURN_IF_NOT_OK(node_op->AddChild(child_op)); | |||||
| node_op = child_op; | |||||
| } | |||||
| // Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current | |||||
| // execution tree) to the BFS queue | |||||
| q.push(std::make_pair(child, child_ops->back())); | |||||
| } | |||||
| } | |||||
| RETURN_IF_NOT_OK(tree_->AssignRoot(root_op)); | |||||
| } | |||||
| // Launch the execution tree. | |||||
| RETURN_IF_NOT_OK(tree_->Prepare()); | |||||
| RETURN_IF_NOT_OK(tree_->Launch()); | |||||
| iterator_ = std::make_unique<DatasetIterator>(tree_); | |||||
| RETURN_UNEXPECTED_IF_NULL(iterator_); | |||||
| return rc; | |||||
| } | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| @@ -297,7 +297,7 @@ void bindTensor(py::module *m) { | |||||
| })) | })) | ||||
| .def_buffer([](Tensor &tensor) { | .def_buffer([](Tensor &tensor) { | ||||
| py::buffer_info info; | py::buffer_info info; | ||||
| THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info)); | |||||
| THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info)); | |||||
| return info; | return info; | ||||
| }) | }) | ||||
| .def("__str__", &Tensor::ToString) | .def("__str__", &Tensor::ToString) | ||||
| @@ -311,7 +311,7 @@ void bindTensor(py::module *m) { | |||||
| return res; | return res; | ||||
| } | } | ||||
| py::buffer_info info; | py::buffer_info info; | ||||
| THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info)); | |||||
| THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info)); | |||||
| return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t); | return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t); | ||||
| }); | }); | ||||
| @@ -0,0 +1,224 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "dataset/include/samplers.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/sampler.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/random_sampler.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" | |||||
| #include "dataset/engine/datasetops/source/sampler/pk_sampler.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| namespace api { | |||||
| SamplerObj::SamplerObj() {} | |||||
| /// Function to create a Distributed Sampler. | |||||
| std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle, | |||||
| int64_t num_samples, uint32_t seed) { | |||||
| auto sampler = std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed); | |||||
| // Input validation | |||||
| if (!sampler->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| /// Function to create a PK Sampler. | |||||
| std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle, int64_t num_samples) { | |||||
| auto sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples); | |||||
| // Input validation | |||||
| if (!sampler->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| /// Function to create a Random Sampler. | |||||
| std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement, int64_t num_samples) { | |||||
| auto sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples); | |||||
| // Input validation | |||||
| if (!sampler->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| /// Function to create a Sequential Sampler. | |||||
| std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index, int64_t num_samples) { | |||||
| auto sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples); | |||||
| // Input validation | |||||
| if (!sampler->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| /// Function to create a Subset Random Sampler. | |||||
| std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, int64_t num_samples) { | |||||
| auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples); | |||||
| // Input validation | |||||
| if (!sampler->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| /// Function to create a Weighted Random Sampler. | |||||
| std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, int64_t num_samples, | |||||
| bool replacement) { | |||||
| auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement); | |||||
| // Input validation | |||||
| if (!sampler->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| /* ####################################### Derived Sampler classes ################################# */ | |||||
| // DistributedSampler | |||||
| DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, | |||||
| uint32_t seed) | |||||
| : num_shards_(num_shards), shard_id_(shard_id), shuffle_(shuffle), num_samples_(num_samples), seed_(seed) {} | |||||
| bool DistributedSamplerObj::ValidateParams() { | |||||
| if (num_shards_ <= 0) { | |||||
| MS_LOG(ERROR) << "DistributedSampler: invalid num_shards: " << num_shards_; | |||||
| return false; | |||||
| } | |||||
| if (shard_id_ < 0 || shard_id_ >= num_shards_) { | |||||
| MS_LOG(ERROR) << "DistributedSampler: invalid input, shard_id: " << shard_id_ << ", num_shards: " << num_shards_; | |||||
| return false; | |||||
| } | |||||
| if (num_samples_ < 0) { | |||||
| MS_LOG(ERROR) << "DistributedSampler: invalid num_samples: " << num_samples_; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<Sampler> DistributedSamplerObj::Build() { | |||||
| return std::make_shared<dataset::DistributedSampler>(num_samples_, num_shards_, shard_id_, shuffle_, seed_); | |||||
| } | |||||
| // PKSampler | |||||
| PKSamplerObj::PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples) | |||||
| : num_val_(num_val), shuffle_(shuffle), num_samples_(num_samples) {} | |||||
| bool PKSamplerObj::ValidateParams() { | |||||
| if (num_val_ <= 0) { | |||||
| MS_LOG(ERROR) << "PKSampler: invalid num_val: " << num_val_; | |||||
| return false; | |||||
| } | |||||
| if (num_samples_ < 0) { | |||||
| MS_LOG(ERROR) << "PKSampler: invalid num_samples: " << num_samples_; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<Sampler> PKSamplerObj::Build() { | |||||
| return std::make_shared<dataset::PKSampler>(num_samples_, num_val_, shuffle_); | |||||
| } | |||||
| // RandomSampler | |||||
| RandomSamplerObj::RandomSamplerObj(bool replacement, int64_t num_samples) | |||||
| : replacement_(replacement), num_samples_(num_samples) {} | |||||
| bool RandomSamplerObj::ValidateParams() { | |||||
| if (num_samples_ < 0) { | |||||
| MS_LOG(ERROR) << "RandomSampler: invalid num_samples: " << num_samples_; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<Sampler> RandomSamplerObj::Build() { | |||||
| bool reshuffle_each_epoch = true; | |||||
| auto sampler = std::make_shared<dataset::RandomSampler>(num_samples_, replacement_, reshuffle_each_epoch); | |||||
| return sampler; | |||||
| } | |||||
| // SequentialSampler | |||||
| SequentialSamplerObj::SequentialSamplerObj(int64_t start_index, int64_t num_samples) | |||||
| : start_index_(start_index), num_samples_(num_samples) {} | |||||
| bool SequentialSamplerObj::ValidateParams() { | |||||
| if (num_samples_ < 0) { | |||||
| MS_LOG(ERROR) << "SequentialSampler: invalid num_samples: " << num_samples_; | |||||
| return false; | |||||
| } | |||||
| if (start_index_ < 0) { | |||||
| MS_LOG(ERROR) << "SequentialSampler: invalid start_index: " << start_index_; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<Sampler> SequentialSamplerObj::Build() { | |||||
| auto sampler = std::make_shared<dataset::SequentialSampler>(num_samples_, start_index_); | |||||
| return sampler; | |||||
| } | |||||
| // SubsetRandomSampler | |||||
| SubsetRandomSamplerObj::SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples) | |||||
| : indices_(indices), num_samples_(num_samples) {} | |||||
| bool SubsetRandomSamplerObj::ValidateParams() { | |||||
| if (num_samples_ < 0) { | |||||
| MS_LOG(ERROR) << "SubsetRandomSampler: invalid num_samples: " << num_samples_; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<Sampler> SubsetRandomSamplerObj::Build() { | |||||
| auto sampler = std::make_shared<dataset::SubsetRandomSampler>(num_samples_, indices_); | |||||
| return sampler; | |||||
| } | |||||
| // WeightedRandomSampler | |||||
| WeightedRandomSamplerObj::WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples, | |||||
| bool replacement) | |||||
| : weights_(weights), num_samples_(num_samples), replacement_(replacement) {} | |||||
| bool WeightedRandomSamplerObj::ValidateParams() { | |||||
| if (num_samples_ < 0) { | |||||
| MS_LOG(ERROR) << "WeightedRandomSampler: invalid num_samples: " << num_samples_; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<Sampler> WeightedRandomSamplerObj::Build() { | |||||
| auto sampler = std::make_shared<dataset::WeightedRandomSampler>(num_samples_, weights_, replacement_); | |||||
| return sampler; | |||||
| } | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,491 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "dataset/include/transforms.h" | |||||
| #include "dataset/kernels/image/image_utils.h" | |||||
| #include "dataset/kernels/image/normalize_op.h" | |||||
| #include "dataset/kernels/image/decode_op.h" | |||||
| #include "dataset/kernels/image/resize_op.h" | |||||
| #include "dataset/kernels/image/random_crop_op.h" | |||||
| #include "dataset/kernels/image/center_crop_op.h" | |||||
| #include "dataset/kernels/image/uniform_aug_op.h" | |||||
| #include "dataset/kernels/image/random_horizontal_flip_op.h" | |||||
| #include "dataset/kernels/image/random_vertical_flip_op.h" | |||||
| #include "dataset/kernels/image/random_rotation_op.h" | |||||
| #include "dataset/kernels/image/cut_out_op.h" | |||||
| #include "dataset/kernels/image/random_color_adjust_op.h" | |||||
| #include "dataset/kernels/image/pad_op.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| namespace api { | |||||
| TensorOperation::TensorOperation() {} | |||||
| // Transform operations for computer vision. | |||||
| namespace vision { | |||||
| // Function to create NormalizeOperation. | |||||
| std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std) { | |||||
| auto op = std::make_shared<NormalizeOperation>(mean, std); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create DecodeOperation. | |||||
| std::shared_ptr<DecodeOperation> Decode(bool rgb) { | |||||
| auto op = std::make_shared<DecodeOperation>(rgb); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create ResizeOperation. | |||||
| std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, InterpolationMode interpolation) { | |||||
| auto op = std::make_shared<ResizeOperation>(size, interpolation); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create RandomCropOperation. | |||||
| std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding, | |||||
| bool pad_if_needed, std::vector<uint8_t> fill_value) { | |||||
| auto op = std::make_shared<RandomCropOperation>(size, padding, pad_if_needed, fill_value); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create CenterCropOperation. | |||||
| std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size) { | |||||
| auto op = std::make_shared<CenterCropOperation>(size); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create UniformAugOperation. | |||||
| std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations, | |||||
| int32_t num_ops) { | |||||
| auto op = std::make_shared<UniformAugOperation>(operations, num_ops); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create RandomHorizontalFlipOperation. | |||||
| std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob) { | |||||
| auto op = std::make_shared<RandomHorizontalFlipOperation>(prob); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create RandomVerticalFlipOperation. | |||||
| std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob) { | |||||
| auto op = std::make_shared<RandomVerticalFlipOperation>(prob); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create RandomRotationOperation. | |||||
| std::shared_ptr<RandomRotationOperation> RandomRotation(std::vector<float> degrees, InterpolationMode resample, | |||||
| bool expand, std::vector<float> center, | |||||
| std::vector<uint8_t> fill_value) { | |||||
| auto op = std::make_shared<RandomRotationOperation>(degrees, resample, expand, center, fill_value); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create PadOperation. | |||||
| std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, | |||||
| BorderType padding_mode) { | |||||
| auto op = std::make_shared<PadOperation>(padding, fill_value, padding_mode); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create CutOutOp. | |||||
| std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches) { | |||||
| auto op = std::make_shared<CutOutOperation>(length, num_patches); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| // Function to create RandomColorAdjustOperation. | |||||
| std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness, | |||||
| std::vector<float> contrast, | |||||
| std::vector<float> saturation, std::vector<float> hue) { | |||||
| auto op = std::make_shared<RandomColorAdjustOperation>(brightness, contrast, saturation, hue); | |||||
| // Input validation | |||||
| if (!op->ValidateParams()) { | |||||
| return nullptr; | |||||
| } | |||||
| return op; | |||||
| } | |||||
| /* ####################################### Derived TensorOperation classes ################################# */ | |||||
| // NormalizeOperation | |||||
| NormalizeOperation::NormalizeOperation(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {} | |||||
| bool NormalizeOperation::ValidateParams() { | |||||
| if (mean_.size() != 3) { | |||||
| MS_LOG(ERROR) << "Normalize: mean vector has incorrect size: " << mean_.size(); | |||||
| return false; | |||||
| } | |||||
| if (std_.size() != 3) { | |||||
| MS_LOG(ERROR) << "Normalize: std vector has incorrect size: " << std_.size(); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<TensorOp> NormalizeOperation::Build() { | |||||
| return std::make_shared<NormalizeOp>(mean_[0], mean_[1], mean_[2], std_[0], std_[1], std_[2]); | |||||
| } | |||||
| // DecodeOperation | |||||
| DecodeOperation::DecodeOperation(bool rgb) : rgb_(rgb) {} | |||||
| bool DecodeOperation::ValidateParams() { return true; } | |||||
| std::shared_ptr<TensorOp> DecodeOperation::Build() { return std::make_shared<DecodeOp>(rgb_); } | |||||
| // ResizeOperation | |||||
| ResizeOperation::ResizeOperation(std::vector<int32_t> size, InterpolationMode interpolation) | |||||
| : size_(size), interpolation_(interpolation) {} | |||||
| bool ResizeOperation::ValidateParams() { | |||||
| if (size_.empty() || size_.size() > 2) { | |||||
| MS_LOG(ERROR) << "Resize: size vector has incorrect size: " << size_.size(); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<TensorOp> ResizeOperation::Build() { | |||||
| int32_t height = size_[0]; | |||||
| int32_t width = 0; | |||||
| // User specified the width value. | |||||
| if (size_.size() == 2) { | |||||
| width = size_[1]; | |||||
| } | |||||
| return std::make_shared<ResizeOp>(height, width, interpolation_); | |||||
| } | |||||
| // RandomCropOperation | |||||
| RandomCropOperation::RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding, bool pad_if_needed, | |||||
| std::vector<uint8_t> fill_value) | |||||
| : size_(size), padding_(padding), pad_if_needed_(pad_if_needed), fill_value_(fill_value) {} | |||||
| bool RandomCropOperation::ValidateParams() { | |||||
| if (size_.empty() || size_.size() > 2) { | |||||
| MS_LOG(ERROR) << "RandomCrop: size vector has incorrect size: " << size_.size(); | |||||
| return false; | |||||
| } | |||||
| if (padding_.empty() || padding_.size() != 4) { | |||||
| MS_LOG(ERROR) << "RandomCrop: padding vector has incorrect size: padding.size()"; | |||||
| return false; | |||||
| } | |||||
| if (fill_value_.empty() || fill_value_.size() != 3) { | |||||
| MS_LOG(ERROR) << "RandomCrop: fill_value vector has incorrect size: fill_value.size()"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<TensorOp> RandomCropOperation::Build() { | |||||
| int32_t crop_height = size_[0]; | |||||
| int32_t crop_width = 0; | |||||
| int32_t pad_top = padding_[0]; | |||||
| int32_t pad_bottom = padding_[1]; | |||||
| int32_t pad_left = padding_[2]; | |||||
| int32_t pad_right = padding_[3]; | |||||
| uint8_t fill_r = fill_value_[0]; | |||||
| uint8_t fill_g = fill_value_[1]; | |||||
| uint8_t fill_b = fill_value_[2]; | |||||
| // User has specified the crop_width value. | |||||
| if (size_.size() == 2) { | |||||
| crop_width = size_[1]; | |||||
| } | |||||
| auto tensor_op = std::make_shared<RandomCropOp>(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, | |||||
| BorderType::kConstant, pad_if_needed_, fill_r, fill_g, fill_b); | |||||
| return tensor_op; | |||||
| } | |||||
| // CenterCropOperation | |||||
| CenterCropOperation::CenterCropOperation(std::vector<int32_t> size) : size_(size) {} | |||||
| bool CenterCropOperation::ValidateParams() { | |||||
| if (size_.empty() || size_.size() > 2) { | |||||
| MS_LOG(ERROR) << "CenterCrop: size vector has incorrect size."; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<TensorOp> CenterCropOperation::Build() { | |||||
| int32_t crop_height = size_[0]; | |||||
| int32_t crop_width = 0; | |||||
| // User has specified crop_width. | |||||
| if (size_.size() == 2) { | |||||
| crop_width = size_[1]; | |||||
| } | |||||
| std::shared_ptr<CenterCropOp> tensor_op = std::make_shared<CenterCropOp>(crop_height, crop_width); | |||||
| return tensor_op; | |||||
| } | |||||
| // UniformAugOperation | |||||
| UniformAugOperation::UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops) | |||||
| : operations_(operations), num_ops_(num_ops) {} | |||||
| bool UniformAugOperation::ValidateParams() { return true; } | |||||
| std::shared_ptr<TensorOp> UniformAugOperation::Build() { | |||||
| std::vector<std::shared_ptr<TensorOp>> tensor_ops; | |||||
| (void)std::transform(operations_.begin(), operations_.end(), std::back_inserter(tensor_ops), | |||||
| [](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); }); | |||||
| std::shared_ptr<UniformAugOp> tensor_op = std::make_shared<UniformAugOp>(tensor_ops, num_ops_); | |||||
| return tensor_op; | |||||
| } | |||||
| // RandomHorizontalFlipOperation | |||||
| RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability) : probability_(probability) {} | |||||
| bool RandomHorizontalFlipOperation::ValidateParams() { return true; } | |||||
| std::shared_ptr<TensorOp> RandomHorizontalFlipOperation::Build() { | |||||
| std::shared_ptr<RandomHorizontalFlipOp> tensor_op = std::make_shared<RandomHorizontalFlipOp>(probability_); | |||||
| return tensor_op; | |||||
| } | |||||
| // RandomVerticalFlipOperation | |||||
| RandomVerticalFlipOperation::RandomVerticalFlipOperation(float probability) : probability_(probability) {} | |||||
| bool RandomVerticalFlipOperation::ValidateParams() { return true; } | |||||
| std::shared_ptr<TensorOp> RandomVerticalFlipOperation::Build() { | |||||
| std::shared_ptr<RandomVerticalFlipOp> tensor_op = std::make_shared<RandomVerticalFlipOp>(probability_); | |||||
| return tensor_op; | |||||
| } | |||||
| // Function to create RandomRotationOperation. | |||||
| RandomRotationOperation::RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, | |||||
| bool expand, std::vector<float> center, | |||||
| std::vector<uint8_t> fill_value) | |||||
| : degrees_(degrees), | |||||
| interpolation_mode_(interpolation_mode), | |||||
| expand_(expand), | |||||
| center_(center), | |||||
| fill_value_(fill_value) {} | |||||
| bool RandomRotationOperation::ValidateParams() { | |||||
| if (degrees_.empty() || degrees_.size() != 2) { | |||||
| MS_LOG(ERROR) << "RandomRotation: degrees vector has incorrect size: degrees.size()"; | |||||
| return false; | |||||
| } | |||||
| if (center_.empty() || center_.size() != 2) { | |||||
| MS_LOG(ERROR) << "RandomRotation: center vector has incorrect size: center.size()"; | |||||
| return false; | |||||
| } | |||||
| if (fill_value_.empty() || fill_value_.size() != 3) { | |||||
| MS_LOG(ERROR) << "RandomRotation: fill_value vector has incorrect size: fill_value.size()"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<TensorOp> RandomRotationOperation::Build() { | |||||
| std::shared_ptr<RandomRotationOp> tensor_op = | |||||
| std::make_shared<RandomRotationOp>(degrees_[0], degrees_[1], center_[0], center_[1], interpolation_mode_, expand_, | |||||
| fill_value_[0], fill_value_[1], fill_value_[2]); | |||||
| return tensor_op; | |||||
| } | |||||
| // PadOperation | |||||
| PadOperation::PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, BorderType padding_mode) | |||||
| : padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {} | |||||
| bool PadOperation::ValidateParams() { | |||||
| if (padding_.empty() || padding_.size() == 3 || padding_.size() > 4) { | |||||
| MS_LOG(ERROR) << "Pad: padding vector has incorrect size: padding.size()"; | |||||
| return false; | |||||
| } | |||||
| if (fill_value_.empty() || (fill_value_.size() != 1 && fill_value_.size() != 3)) { | |||||
| MS_LOG(ERROR) << "Pad: fill_value vector has incorrect size: fill_value.size()"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<TensorOp> PadOperation::Build() { | |||||
| int32_t pad_top, pad_bottom, pad_left, pad_right; | |||||
| switch (padding_.size()) { | |||||
| case 1: | |||||
| pad_left = padding_[0]; | |||||
| pad_top = padding_[0]; | |||||
| pad_right = padding_[0]; | |||||
| pad_bottom = padding_[0]; | |||||
| break; | |||||
| case 2: | |||||
| pad_left = padding_[0]; | |||||
| pad_top = padding_[1]; | |||||
| pad_right = padding_[0]; | |||||
| pad_bottom = padding_[1]; | |||||
| break; | |||||
| default: | |||||
| pad_left = padding_[0]; | |||||
| pad_top = padding_[1]; | |||||
| pad_right = padding_[2]; | |||||
| pad_bottom = padding_[3]; | |||||
| } | |||||
| uint8_t fill_r, fill_g, fill_b; | |||||
| fill_r = fill_value_[0]; | |||||
| fill_g = fill_value_[0]; | |||||
| fill_b = fill_value_[0]; | |||||
| if (fill_value_.size() == 3) { | |||||
| fill_r = fill_value_[0]; | |||||
| fill_g = fill_value_[1]; | |||||
| fill_b = fill_value_[2]; | |||||
| } | |||||
| std::shared_ptr<PadOp> tensor_op = | |||||
| std::make_shared<PadOp>(pad_top, pad_bottom, pad_left, pad_right, padding_mode_, fill_r, fill_g, fill_b); | |||||
| return tensor_op; | |||||
| } | |||||
| // CutOutOperation | |||||
| CutOutOperation::CutOutOperation(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {} | |||||
| bool CutOutOperation::ValidateParams() { | |||||
| if (length_ < 0) { | |||||
| MS_LOG(ERROR) << "CutOut: length cannot be negative"; | |||||
| return false; | |||||
| } | |||||
| if (num_patches_ < 0) { | |||||
| MS_LOG(ERROR) << "CutOut: number of patches cannot be negative"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<TensorOp> CutOutOperation::Build() { | |||||
| std::shared_ptr<CutOutOp> tensor_op = std::make_shared<CutOutOp>(length_, length_, num_patches_, false, 0, 0, 0); | |||||
| return tensor_op; | |||||
| } | |||||
| // RandomColorAdjustOperation. | |||||
| RandomColorAdjustOperation::RandomColorAdjustOperation(std::vector<float> brightness, std::vector<float> contrast, | |||||
| std::vector<float> saturation, std::vector<float> hue) | |||||
| : brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {} | |||||
| bool RandomColorAdjustOperation::ValidateParams() { | |||||
| // Do some input validation. | |||||
| if (brightness_.empty() || brightness_.size() > 2) { | |||||
| MS_LOG(ERROR) << "RandomColorAdjust: brightness must be a vector of one or two values"; | |||||
| return false; | |||||
| } | |||||
| if (contrast_.empty() || contrast_.size() > 2) { | |||||
| MS_LOG(ERROR) << "RandomColorAdjust: contrast must be a vector of one or two values"; | |||||
| return false; | |||||
| } | |||||
| if (saturation_.empty() || saturation_.size() > 2) { | |||||
| MS_LOG(ERROR) << "RandomColorAdjust: saturation must be a vector of one or two values"; | |||||
| return false; | |||||
| } | |||||
| if (hue_.empty() || hue_.size() > 2) { | |||||
| MS_LOG(ERROR) << "RandomColorAdjust: hue must be a vector of one or two values"; | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::shared_ptr<TensorOp> RandomColorAdjustOperation::Build() { | |||||
| float brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub; | |||||
| brightness_lb = brightness_[0]; | |||||
| brightness_ub = brightness_[0]; | |||||
| if (brightness_.size() == 2) brightness_ub = brightness_[1]; | |||||
| contrast_lb = contrast_[0]; | |||||
| contrast_ub = contrast_[0]; | |||||
| if (contrast_.size() == 2) contrast_ub = contrast_[1]; | |||||
| saturation_lb = saturation_[0]; | |||||
| saturation_ub = saturation_[0]; | |||||
| if (saturation_.size() == 2) saturation_ub = saturation_[1]; | |||||
| hue_lb = hue_[0]; | |||||
| hue_ub = hue_[0]; | |||||
| if (hue_.size() == 2) hue_ub = hue_[1]; | |||||
| std::shared_ptr<RandomColorAdjustOp> tensor_op = std::make_shared<RandomColorAdjustOp>( | |||||
| brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub); | |||||
| return tensor_op; | |||||
| } | |||||
| } // namespace vision | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| @@ -1,10 +1,6 @@ | |||||
| ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto) | |||||
| ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto) | |||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | ||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | ||||
| add_library(core OBJECT | |||||
| ${EXAMPLE_SRCS} | |||||
| ${FEATURE_SRCS} | |||||
| set(DATASET_CORE_SRC_FILES | |||||
| client.cc | client.cc | ||||
| config_manager.cc | config_manager.cc | ||||
| cv_tensor.cc | cv_tensor.cc | ||||
| @@ -13,6 +9,13 @@ add_library(core OBJECT | |||||
| tensor.cc | tensor.cc | ||||
| tensor_row.cc | tensor_row.cc | ||||
| tensor_shape.cc | tensor_shape.cc | ||||
| ) | |||||
| ) | |||||
| ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto) | |||||
| ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto) | |||||
| add_library(core OBJECT ${DATASET_CORE_SRC_FILES} ${EXAMPLE_SRCS} ${FEATURE_SRCS}) | |||||
| add_dependencies(core mindspore::protobuf) | add_dependencies(core mindspore::protobuf) | ||||
| target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS}) | |||||
| if (ENABLE_PYTHON) | |||||
| target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS}) | |||||
| endif() | |||||
| @@ -25,21 +25,25 @@ | |||||
| #include "dataset/core/tensor_shape.h" | #include "dataset/core/tensor_shape.h" | ||||
| #include "dataset/engine/data_schema.h" | #include "dataset/engine/data_schema.h" | ||||
| #include "dataset/engine/dataset_iterator.h" | #include "dataset/engine/dataset_iterator.h" | ||||
| #include "dataset/engine/datasetops/source/mindrecord_op.h" | |||||
| #include "dataset/engine/datasetops/source/tf_reader_op.h" | |||||
| #ifdef ENABLE_PYTHON | |||||
| #include "dataset/engine/datasetops/barrier_op.h" | #include "dataset/engine/datasetops/barrier_op.h" | ||||
| #include "dataset/engine/datasetops/batch_op.h" | |||||
| #include "dataset/engine/datasetops/filter_op.h" | |||||
| #include "dataset/engine/datasetops/source/generator_op.h" | |||||
| #include "dataset/engine/datasetops/build_vocab_op.h" | #include "dataset/engine/datasetops/build_vocab_op.h" | ||||
| #endif | |||||
| #include "dataset/engine/datasetops/batch_op.h" | |||||
| #include "dataset/engine/datasetops/dataset_op.h" | #include "dataset/engine/datasetops/dataset_op.h" | ||||
| #include "dataset/engine/datasetops/device_queue_op.h" | #include "dataset/engine/datasetops/device_queue_op.h" | ||||
| #include "dataset/engine/datasetops/map_op.h" | #include "dataset/engine/datasetops/map_op.h" | ||||
| #include "dataset/engine/datasetops/project_op.h" | #include "dataset/engine/datasetops/project_op.h" | ||||
| #include "dataset/engine/datasetops/rename_op.h" | #include "dataset/engine/datasetops/rename_op.h" | ||||
| #include "dataset/engine/datasetops/filter_op.h" | |||||
| #include "dataset/engine/datasetops/repeat_op.h" | #include "dataset/engine/datasetops/repeat_op.h" | ||||
| #include "dataset/engine/datasetops/skip_op.h" | #include "dataset/engine/datasetops/skip_op.h" | ||||
| #include "dataset/engine/datasetops/shuffle_op.h" | #include "dataset/engine/datasetops/shuffle_op.h" | ||||
| #include "dataset/engine/datasetops/source/generator_op.h" | |||||
| #include "dataset/engine/datasetops/source/mindrecord_op.h" | |||||
| #include "dataset/engine/datasetops/source/tf_reader_op.h" | |||||
| #include "dataset/engine/datasetops/take_op.h" | #include "dataset/engine/datasetops/take_op.h" | ||||
| #include "dataset/engine/datasetops/zip_op.h" | #include "dataset/engine/datasetops/zip_op.h" | ||||
| #include "dataset/engine/datasetops/concat_op.h" | #include "dataset/engine/datasetops/concat_op.h" | ||||
| @@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf }; | |||||
| // Possible flavours of Tensor implementations | // Possible flavours of Tensor implementations | ||||
| enum class TensorImpl { kNone, kFlexible, kCv, kNP }; | enum class TensorImpl { kNone, kFlexible, kCv, kNP }; | ||||
| // Possible values for Border types | |||||
| enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; | |||||
| // Possible interpolation modes | |||||
| enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; | |||||
| // convenience functions for 32bit int bitmask | // convenience functions for 32bit int bitmask | ||||
| inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; } | inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; } | ||||
| @@ -14,11 +14,12 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "dataset/core/data_type.h" | #include "dataset/core/data_type.h" | ||||
| #ifdef ENABLE_PYTHON | |||||
| #include "dataset/core/pybind_support.h" | |||||
| #endif | |||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "dataset/core/pybind_support.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| @@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const { | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| py::dtype DataType::AsNumpyType() const { | py::dtype DataType::AsNumpyType() const { | ||||
| if (type_ < DataType::NUM_OF_TYPES) | if (type_ < DataType::NUM_OF_TYPES) | ||||
| return py::dtype(kTypeInfo[type_].pybindType_); | return py::dtype(kTypeInfo[type_].pybindType_); | ||||
| else | else | ||||
| return py::dtype("unknown"); | return py::dtype("unknown"); | ||||
| } | } | ||||
| #endif | |||||
| uint8_t DataType::AsCVType() const { | uint8_t DataType::AsCVType() const { | ||||
| uint8_t res = kCVInvalidType; | uint8_t res = kCVInvalidType; | ||||
| @@ -112,6 +115,7 @@ std::string DataType::ToString() const { | |||||
| return "unknown"; | return "unknown"; | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| DataType DataType::FromNpArray(const py::array &arr) { | DataType DataType::FromNpArray(const py::array &arr) { | ||||
| if (py::isinstance<py::array_t<bool>>(arr)) { | if (py::isinstance<py::array_t<bool>>(arr)) { | ||||
| return DataType(DataType::DE_BOOL); | return DataType(DataType::DE_BOOL); | ||||
| @@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const { | |||||
| } | } | ||||
| return res; | return res; | ||||
| } | } | ||||
| #endif | |||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -19,14 +19,16 @@ | |||||
| #include <opencv2/core/hal/interface.h> | #include <opencv2/core/hal/interface.h> | ||||
| #include <string> | #include <string> | ||||
| #ifdef ENABLE_PYTHON | |||||
| #include "pybind11/numpy.h" | #include "pybind11/numpy.h" | ||||
| #include "pybind11/pybind11.h" | #include "pybind11/pybind11.h" | ||||
| #include "dataset/core/constants.h" | |||||
| #include "dataset/core/pybind_support.h" | #include "dataset/core/pybind_support.h" | ||||
| namespace py = pybind11; | namespace py = pybind11; | ||||
| #else | |||||
| #include "Eigen/Core" | |||||
| using float16 = Eigen::half; | |||||
| #endif | |||||
| #include "dataset/core/constants.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| @@ -59,6 +61,7 @@ class DataType { | |||||
| const uint8_t cvType_; // OpenCv matching type | const uint8_t cvType_; // OpenCv matching type | ||||
| }; | }; | ||||
| #ifdef ENABLE_PYTHON | |||||
| static inline const TypeInfo kTypeInfo[] = { | static inline const TypeInfo kTypeInfo[] = { | ||||
| // name, sizeInBytes, pybindTypem formatDescriptor, openCV | // name, sizeInBytes, pybindTypem formatDescriptor, openCV | ||||
| {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN | {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN | ||||
| @@ -76,19 +79,38 @@ class DataType { | |||||
| {"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F}, // DE_FLOAT64 | {"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F}, // DE_FLOAT64 | ||||
| {"string", 0, "bytes", "S", kCVInvalidType} // DE_STRING | {"string", 0, "bytes", "S", kCVInvalidType} // DE_STRING | ||||
| }; | }; | ||||
| #else | |||||
| static inline const TypeInfo kTypeInfo[] = { | |||||
| // name, sizeInBytes, pybindTypem formatDescriptor, openCV | |||||
| {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN | |||||
| {"bool", 1, "bool", "", CV_8U}, // DE_BOOL | |||||
| {"int8", 1, "int8", "", CV_8S}, // DE_INT8 | |||||
| {"uint8", 1, "uint8", "", CV_8U}, // DE_UINT8 | |||||
| {"int16", 2, "int16", "", CV_16S}, // DE_INT16 | |||||
| {"uint16", 2, "uint16", "", CV_16U}, // DE_UINT16 | |||||
| {"int32", 4, "int32", "", CV_32S}, // DE_INT32 | |||||
| {"uint32", 4, "uint32", "", kCVInvalidType}, // DE_UINT32 | |||||
| {"int64", 8, "int64", "", kCVInvalidType}, // DE_INT64 | |||||
| {"uint64", 8, "uint64", "", kCVInvalidType}, // DE_UINT64 | |||||
| {"float16", 2, "float16", "", CV_16F}, // DE_FLOAT16 | |||||
| {"float32", 4, "float32", "", CV_32F}, // DE_FLOAT32 | |||||
| {"float64", 8, "double", "", CV_64F}, // DE_FLOAT64 | |||||
| {"string", 0, "bytes", "", kCVInvalidType} // DE_STRING | |||||
| }; | |||||
| #endif | |||||
| // No arg constructor to create an unknown shape | // No arg constructor to create an unknown shape | ||||
| DataType() : type_(DE_UNKNOWN) {} | DataType() : type_(DE_UNKNOWN) {} | ||||
| // Create a type from a given string | // Create a type from a given string | ||||
| // @param type_str | |||||
| /// \param type_str | |||||
| explicit DataType(const std::string &type_str); | explicit DataType(const std::string &type_str); | ||||
| // Default destructor | // Default destructor | ||||
| ~DataType() = default; | ~DataType() = default; | ||||
| // Create a type from a given enum | // Create a type from a given enum | ||||
| // @param d | |||||
| /// \param d | |||||
| constexpr explicit DataType(Type d) : type_(d) {} | constexpr explicit DataType(Type d) : type_(d) {} | ||||
| constexpr bool operator==(const DataType a) const { return type_ == a.type_; } | constexpr bool operator==(const DataType a) const { return type_ == a.type_; } | ||||
| @@ -100,49 +122,49 @@ class DataType { | |||||
| constexpr bool operator!=(const Type a) const { return type_ != a; } | constexpr bool operator!=(const Type a) const { return type_ != a; } | ||||
| // Disable this usage `if(d)` where d is of type DataType | // Disable this usage `if(d)` where d is of type DataType | ||||
| // @return | |||||
| /// \return | |||||
| operator bool() = delete; | operator bool() = delete; | ||||
| // To be used in Switch/case | // To be used in Switch/case | ||||
| // @return | |||||
| /// \return | |||||
| operator Type() const { return type_; } | operator Type() const { return type_; } | ||||
| // The number of bytes needed to store one value of this type | // The number of bytes needed to store one value of this type | ||||
| // @return | |||||
| /// \return | |||||
| uint8_t SizeInBytes() const; | uint8_t SizeInBytes() const; | ||||
| // Convert from DataType to OpenCV type | // Convert from DataType to OpenCV type | ||||
| // @return | |||||
| /// \return | |||||
| uint8_t AsCVType() const; | uint8_t AsCVType() const; | ||||
| // Convert from OpenCV type to DataType | // Convert from OpenCV type to DataType | ||||
| // @param cv_type | |||||
| // @return | |||||
| /// \param cv_type | |||||
| /// \return | |||||
| static DataType FromCVType(int cv_type); | static DataType FromCVType(int cv_type); | ||||
| // Returns a string representation of the type | // Returns a string representation of the type | ||||
| // @return | |||||
| /// \return | |||||
| std::string ToString() const; | std::string ToString() const; | ||||
| // returns true if the template type is the same as the Tensor type_ | // returns true if the template type is the same as the Tensor type_ | ||||
| // @tparam T | |||||
| // @return true or false | |||||
| /// \tparam T | |||||
| /// \return true or false | |||||
| template <typename T> | template <typename T> | ||||
| bool IsCompatible() const { | bool IsCompatible() const { | ||||
| return type_ == FromCType<T>(); | return type_ == FromCType<T>(); | ||||
| } | } | ||||
| // returns true if the template type is the same as the Tensor type_ | // returns true if the template type is the same as the Tensor type_ | ||||
| // @tparam T | |||||
| // @return true or false | |||||
| /// \tparam T | |||||
| /// \return true or false | |||||
| template <typename T> | template <typename T> | ||||
| bool IsLooselyCompatible() const; | bool IsLooselyCompatible() const; | ||||
| // << Stream output operator overload | // << Stream output operator overload | ||||
| // @notes This allows you to print the info using stream operators | |||||
| // @param out - reference to the output stream being overloaded | |||||
| // @param rO - reference to the DataType to display | |||||
| // @return - the output stream must be returned | |||||
| /// \notes This allows you to print the info using stream operators | |||||
| /// \param out - reference to the output stream being overloaded | |||||
| /// \param rO - reference to the DataType to display | |||||
| /// \return - the output stream must be returned | |||||
| friend std::ostream &operator<<(std::ostream &out, const DataType &so) { | friend std::ostream &operator<<(std::ostream &out, const DataType &so) { | ||||
| out << so.ToString(); | out << so.ToString(); | ||||
| return out; | return out; | ||||
| @@ -151,22 +173,24 @@ class DataType { | |||||
| template <typename T> | template <typename T> | ||||
| static DataType FromCType(); | static DataType FromCType(); | ||||
| #ifdef ENABLE_PYTHON | |||||
| // Convert from DataType to Pybind type | // Convert from DataType to Pybind type | ||||
| // @return | |||||
| /// \return | |||||
| py::dtype AsNumpyType() const; | py::dtype AsNumpyType() const; | ||||
| // Convert from NP type to DataType | // Convert from NP type to DataType | ||||
| // @param type | |||||
| // @return | |||||
| /// \param type | |||||
| /// \return | |||||
| static DataType FromNpType(const py::dtype &type); | static DataType FromNpType(const py::dtype &type); | ||||
| // Convert from NP array to DataType | // Convert from NP array to DataType | ||||
| // @param py array | |||||
| // @return | |||||
| /// \param py array | |||||
| /// \return | |||||
| static DataType FromNpArray(const py::array &arr); | static DataType FromNpArray(const py::array &arr); | ||||
| #endif | |||||
| // Get the buffer string format of the current type. Used in pybind buffer protocol. | // Get the buffer string format of the current type. Used in pybind buffer protocol. | ||||
| // @return | |||||
| /// \return | |||||
| std::string GetPybindFormat() const; | std::string GetPybindFormat() const; | ||||
| bool IsSignedInt() const { | bool IsSignedInt() const { | ||||
| @@ -28,10 +28,12 @@ | |||||
| #include "dataset/core/constants.h" | #include "dataset/core/constants.h" | ||||
| #include "dataset/core/cv_tensor.h" | #include "dataset/core/cv_tensor.h" | ||||
| #include "dataset/core/global_context.h" | #include "dataset/core/global_context.h" | ||||
| #ifdef ENABLE_PYTHON | |||||
| #include "dataset/core/pybind_support.h" | #include "dataset/core/pybind_support.h" | ||||
| namespace py = pybind11; | |||||
| #endif | |||||
| #include "dataset/core/tensor_shape.h" | #include "dataset/core/tensor_shape.h" | ||||
| namespace py = pybind11; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| // Helper macros for printing tensor elements | // Helper macros for printing tensor elements | ||||
| @@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape | |||||
| MS_ASSERT(num_bytes == 0); | MS_ASSERT(num_bytes == 0); | ||||
| if (shape.known()) Tensor::Reshape(shape); | if (shape.known()) Tensor::Reshape(shape); | ||||
| } | } | ||||
| Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape) | Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape) | ||||
| : Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) { | : Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) { | ||||
| // total bytes needed = offset array + strings | // total bytes needed = offset array + strings | ||||
| @@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape | |||||
| MS_ASSERT(num_bytes == 0); | MS_ASSERT(num_bytes == 0); | ||||
| if (shape.known()) Tensor::Reshape(shape); | if (shape.known()) Tensor::Reshape(shape); | ||||
| } | } | ||||
| Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape, | Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape, | ||||
| DataType type, const unsigned char *data) { | DataType type, const unsigned char *data) { | ||||
| if (!shape.known()) { | if (!shape.known()) { | ||||
| @@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl | |||||
| return Status::OK(); // returns base-class shared_ptr | return Status::OK(); // returns base-class shared_ptr | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) { | Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) { | ||||
| std::vector<dsize_t> shape; | std::vector<dsize_t> shape; | ||||
| for (dsize_t i = 0; i < arr.ndim(); i++) { | for (dsize_t i = 0; i < arr.ndim(); i++) { | ||||
| @@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) { | |||||
| return Status::OK(); // returns base-class shared_ptr | return Status::OK(); // returns base-class shared_ptr | ||||
| } | } | ||||
| #endif | |||||
| Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, | Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, | ||||
| const TensorShape &shape) { | const TensorShape &shape) { | ||||
| @@ -698,21 +704,24 @@ std::vector<dsize_t> Tensor::Strides() { | |||||
| return strides; | return strides; | ||||
| } | } | ||||
| Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) { | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); | |||||
| #ifdef ENABLE_PYTHON | |||||
| Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { | |||||
| RETURN_UNEXPECTED_IF_NULL(t); | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); | |||||
| std::string format_desc = t.type().GetPybindFormat(); | |||||
| std::string format_desc = t->type().GetPybindFormat(); | |||||
| if (format_desc.empty()) { | if (format_desc.empty()) { | ||||
| RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format"); | RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format"); | ||||
| } | } | ||||
| *out = py::buffer_info(t.GetMutableBuffer(), /* Pointer to buffer */ | |||||
| t.type().SizeInBytes(), /* Size of one scalar */ | |||||
| format_desc, /* Python struct-style format descriptor */ | |||||
| t.Rank(), /* Number of dimensions */ | |||||
| t.shape().AsVector(), /* Buffer dimensions */ | |||||
| t.Strides()); | |||||
| *out = py::buffer_info(t->GetMutableBuffer(), /* Pointer to buffer */ | |||||
| t->type().SizeInBytes(), /* Size of one scalar */ | |||||
| format_desc, /* Python struct-style format descriptor */ | |||||
| t->Rank(), /* Number of dimensions */ | |||||
| t->shape().AsVector(), /* Buffer dimensions */ | |||||
| t->Strides()); | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #endif | |||||
| template <typename T> | template <typename T> | ||||
| Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const { | Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const { | ||||
| @@ -752,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) | |||||
| o->swap(sv); | o->swap(sv); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| // return data as numpy, should return status | // return data as numpy, should return status | ||||
| Status Tensor::GetDataAsNumpy(py::array *data) { | Status Tensor::GetDataAsNumpy(py::array *data) { | ||||
| RETURN_UNEXPECTED_IF_NULL(data_); | RETURN_UNEXPECTED_IF_NULL(data_); | ||||
| @@ -815,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { | |||||
| data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data)); | data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data)); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #endif | |||||
| void Tensor::Squeeze() { shape_ = shape_.Squeeze(); } | void Tensor::Squeeze() { shape_ = shape_.Squeeze(); } | ||||
| @@ -26,20 +26,27 @@ | |||||
| #undef HAVE_STDDEF_H | #undef HAVE_STDDEF_H | ||||
| #undef HAVE_STDLIB_H | #undef HAVE_STDLIB_H | ||||
| #endif | #endif | ||||
| #ifdef ENABLE_PYTHON | |||||
| #include "pybind11/numpy.h" | #include "pybind11/numpy.h" | ||||
| #include "pybind11/pybind11.h" | #include "pybind11/pybind11.h" | ||||
| #include "pybind11/stl.h" | #include "pybind11/stl.h" | ||||
| #endif | |||||
| #include "dataset/core/constants.h" | #include "dataset/core/constants.h" | ||||
| #include "dataset/core/data_type.h" | #include "dataset/core/data_type.h" | ||||
| #include "dataset/core/tensor_shape.h" | #include "dataset/core/tensor_shape.h" | ||||
| #include "dataset/util/allocator.h" | |||||
| #include "dataset/util/status.h" | #include "dataset/util/status.h" | ||||
| #include "proto/example.pb.h" | #include "proto/example.pb.h" | ||||
| #ifdef ENABLE_PYTHON | |||||
| namespace py = pybind11; | namespace py = pybind11; | ||||
| #endif | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| class Tensor; | class Tensor; | ||||
| template <typename T> | |||||
| class Allocator; | |||||
| using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>; | using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>; | ||||
| using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors | using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors | ||||
| @@ -114,16 +121,17 @@ class Tensor { | |||||
| static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type, | static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type, | ||||
| const unsigned char *data = nullptr); | const unsigned char *data = nullptr); | ||||
| /// Create a copy of the input tensor | |||||
| /// \param out [out] output tensor to be generated | |||||
| /// \param in [in] orginal tensor to be copied | |||||
| /// \return Status | |||||
| // Create a copy of the input tensor | |||||
| // @param out [out] output tensor to be generated | |||||
| // @param in [in] orginal tensor to be copied | |||||
| // @return Status | |||||
| static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) { | static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) { | ||||
| const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | ||||
| *out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes()); | *out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes()); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| // A static factory method to create a Tensor from a given py::array. | // A static factory method to create a Tensor from a given py::array. | ||||
| // @param ptr output argument to hold the created Tensor | // @param ptr output argument to hold the created Tensor | ||||
| // @param arr py::array | // @param arr py::array | ||||
| @@ -132,6 +140,7 @@ class Tensor { | |||||
| // Helper function to create a tensor from Numpy of strings | // Helper function to create a tensor from Numpy of strings | ||||
| static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr); | static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr); | ||||
| #endif | |||||
| // A static factory method to create a Tensor from a given list of strings. | // A static factory method to create a Tensor from a given list of strings. | ||||
| // @param ptr output argument to hold the created Tensor | // @param ptr output argument to hold the created Tensor | ||||
| @@ -170,6 +179,7 @@ class Tensor { | |||||
| static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) { | static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) { | ||||
| return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar()); | return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar()); | ||||
| } | } | ||||
| // Create tensor from protobuf bytelist with uint8 or int8 types | // Create tensor from protobuf bytelist with uint8 or int8 types | ||||
| static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, | static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, | ||||
| const TensorShape &shape, const DataType &type, dsize_t pad_size); | const TensorShape &shape, const DataType &type, dsize_t pad_size); | ||||
| @@ -346,12 +356,12 @@ class Tensor { | |||||
| virtual void Squeeze(); | virtual void Squeeze(); | ||||
| /// Calculates the strides of the Tensor | |||||
| /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) | |||||
| /// The strides will be {6,2,1}. | |||||
| /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) | |||||
| /// The strides will be {24,8,4}. | |||||
| /// @return vector of integers | |||||
| // Calculates the strides of the Tensor | |||||
| // Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) | |||||
| // The strides will be {6,2,1}. | |||||
| // Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) | |||||
| // The strides will be {24,8,4}. | |||||
| // @return vector of integers | |||||
| std::vector<dsize_t> Strides(); | std::vector<dsize_t> Strides(); | ||||
| std::string ToString() { | std::string ToString() { | ||||
| @@ -376,6 +386,7 @@ class Tensor { | |||||
| // Slice string tensors | // Slice string tensors | ||||
| Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); | Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); | ||||
| #ifdef ENABLE_PYTHON | |||||
| // Constructs numpy array from input tensor | // Constructs numpy array from input tensor | ||||
| // @param data this data is the location of python data | // @param data this data is the location of python data | ||||
| // @return Status code | // @return Status code | ||||
| @@ -383,7 +394,8 @@ class Tensor { | |||||
| Status GetDataAsNumpyStrings(py::array *data); | Status GetDataAsNumpyStrings(py::array *data); | ||||
| static Status GetBufferInfo(Tensor &t, py::buffer_info *out); | |||||
| static Status GetBufferInfo(Tensor *t, py::buffer_info *out); | |||||
| #endif | |||||
| // Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor | // Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor | ||||
| Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); | Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); | ||||
| @@ -570,7 +582,7 @@ class Tensor { | |||||
| // Return a TensorIterator that points to the start of the Tensor. | // Return a TensorIterator that points to the start of the Tensor. | ||||
| // It's the user responsibility to use the correct type that matches the Tensor type | // It's the user responsibility to use the correct type that matches the Tensor type | ||||
| // @tparam T The type of values in the Tensor | |||||
| // @param T The type of values in the Tensor | |||||
| // @return TensorIterator | // @return TensorIterator | ||||
| template <typename T> | template <typename T> | ||||
| TensorIterator<T> begin() { | TensorIterator<T> begin() { | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include "dataset/core/tensor_row.h" | #include "dataset/core/tensor_row.h" | ||||
| namespace py = pybind11; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| @@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape) | |||||
| known_ = shape.known_; // override with the input shape in case of unknown-rank tensor shape. | known_ = shape.known_; // override with the input shape in case of unknown-rank tensor shape. | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| TensorShape::TensorShape(py::list l) | TensorShape::TensorShape(py::list l) | ||||
| : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) { | : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) { | ||||
| std::vector<dsize_t> list_c; | std::vector<dsize_t> list_c; | ||||
| @@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l) | |||||
| } | } | ||||
| AddListToShape(list_c); | AddListToShape(list_c); | ||||
| } | } | ||||
| #endif | |||||
| TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type) | TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type) | ||||
| : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) { | : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) { | ||||
| @@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const { | |||||
| return TensorShape(vec); | return TensorShape(vec); | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| py::list TensorShape::AsPyList() { | py::list TensorShape::AsPyList() { | ||||
| py::list list; | py::list list; | ||||
| for (auto i : raw_shape_) { | for (auto i : raw_shape_) { | ||||
| @@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() { | |||||
| } | } | ||||
| return list; | return list; | ||||
| } | } | ||||
| #endif | |||||
| TensorShape TensorShape::Squeeze() const { | TensorShape TensorShape::Squeeze() const { | ||||
| std::vector<dsize_t> new_shape; | std::vector<dsize_t> new_shape; | ||||
| @@ -24,13 +24,16 @@ | |||||
| #include <opencv2/core/mat.hpp> | #include <opencv2/core/mat.hpp> | ||||
| #ifdef ENABLE_PYTHON | |||||
| #include "pybind11/pybind11.h" | #include "pybind11/pybind11.h" | ||||
| namespace py = pybind11; | |||||
| #endif | |||||
| #include "dataset/core/constants.h" | #include "dataset/core/constants.h" | ||||
| #include "dataset/util/status.h" | |||||
| #include "dataset/core/global_context.h" | #include "dataset/core/global_context.h" | ||||
| #include "dataset/util/allocator.h" | #include "dataset/util/allocator.h" | ||||
| namespace py = pybind11; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| // Class that represents a shape of a Tensor. A shape can be: | // Class that represents a shape of a Tensor. A shape can be: | ||||
| @@ -43,7 +46,8 @@ namespace dataset { | |||||
| // -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n | // -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n | ||||
| // Example: <3,?> (the 1st dim is unknown)\n | // Example: <3,?> (the 1st dim is unknown)\n | ||||
| // <2,?,?,?> (all dims but the 0th dim are unknown) | // <2,?,?,?> (all dims but the 0th dim are unknown) | ||||
| // TensorShape supports any dim > 0 and < 2^31-1 | |||||
| /// \brief TensorShape supports any dim > 0 and < 2^31-1 | |||||
| class TensorShape { | class TensorShape { | ||||
| public: | public: | ||||
| static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension | static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension | ||||
| @@ -51,57 +55,59 @@ class TensorShape { | |||||
| // Force the compiler to not create a no-arg constructor | // Force the compiler to not create a no-arg constructor | ||||
| TensorShape() = delete; | TensorShape() = delete; | ||||
| // Create a Shape from an initialization list (e.g., TensorShape s = {2,2}). | |||||
| // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||||
| // @param list | |||||
| /// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}). | |||||
| /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||||
| /// \param[in] list | |||||
| explicit TensorShape(const std::initializer_list<dsize_t> &list); | explicit TensorShape(const std::initializer_list<dsize_t> &list); | ||||
| // Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ). | |||||
| // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||||
| // @param list | |||||
| /// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ). | |||||
| /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||||
| /// \param[in] list | |||||
| explicit TensorShape(const std::vector<dsize_t> &list); | explicit TensorShape(const std::vector<dsize_t> &list); | ||||
| // Copy constructor | |||||
| // @param shape | |||||
| /// \brief Copy constructor | |||||
| /// \param[in] shape | |||||
| TensorShape(const TensorShape &shape); | TensorShape(const TensorShape &shape); | ||||
| // construct a TensorShape via a python list | |||||
| // @param py::list l - a list object from python | |||||
| #ifdef ENABLE_PYTHON | |||||
| /// \brief construct a TensorShape via a python list | |||||
| /// \param[in] py::list l - a list object from python | |||||
| explicit TensorShape(py::list l); | explicit TensorShape(py::list l); | ||||
| #endif | |||||
| ~TensorShape() = default; | ~TensorShape() = default; | ||||
| // Create a scalar Shape (i.e., empty shape with mKnown = true) | |||||
| // @return TensorShape | |||||
| /// \brief Create a scalar Shape (i.e., empty shape with mKnown = true) | |||||
| /// \return TensorShape | |||||
| static TensorShape CreateScalar() { return TensorShape({}); } | static TensorShape CreateScalar() { return TensorShape({}); } | ||||
| // Create a shape with an unknown rank. | |||||
| // @return TensorShape | |||||
| /// \brief Create a shape with an unknown rank. | |||||
| /// \return TensorShape | |||||
| static TensorShape CreateUnknownRankShape(); | static TensorShape CreateUnknownRankShape(); | ||||
| // Create a shape with a known rank . | |||||
| // @return TensorShape | |||||
| /// \brief Create a shape with a known rank . | |||||
| /// \return TensorShape | |||||
| static TensorShape CreateUnknownShapeWithRank(dsize_t rank); | static TensorShape CreateUnknownShapeWithRank(dsize_t rank); | ||||
| // Insert a new dim into a copy of the current shape. | |||||
| // @param dim to be added | |||||
| // @param axis the index where dim should be added | |||||
| // @return New modified shape | |||||
| /// \brief Insert a new dim into a copy of the current shape. | |||||
| /// \param[in] dim to be added | |||||
| /// \param[in] axis the index where dim should be added | |||||
| /// \return New modified shape | |||||
| TensorShape InsertDim(dsize_t axis, dsize_t dim) const; | TensorShape InsertDim(dsize_t axis, dsize_t dim) const; | ||||
| // Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4> | |||||
| // @param dim | |||||
| // @return | |||||
| /// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4> | |||||
| /// \param[in] dim | |||||
| /// \return | |||||
| TensorShape PrependDim(dsize_t dim) const; | TensorShape PrependDim(dsize_t dim) const; | ||||
| // Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> | |||||
| // @param dim | |||||
| // @return | |||||
| /// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> | |||||
| /// \param[in] dim | |||||
| /// \return | |||||
| TensorShape AppendDim(dsize_t dim) const; | TensorShape AppendDim(dsize_t dim) const; | ||||
| // Create a shape based on OpenCV shape and type | |||||
| // @param cv_size | |||||
| // @param type int that represent the type in OpenCV, example CV_8U, CV_64S | |||||
| /// \brief Create a shape based on OpenCV shape and type | |||||
| /// \param[in] cv_size | |||||
| /// \param[in] type int that represent the type in OpenCV, example CV_8U, CV_64S | |||||
| TensorShape(cv::MatSize cv_size, uint32_t type); | TensorShape(cv::MatSize cv_size, uint32_t type); | ||||
| dsize_t Size() const { return raw_shape_.size(); } | dsize_t Size() const { return raw_shape_.size(); } | ||||
| @@ -123,47 +129,50 @@ class TensorShape { | |||||
| return raw_shape_[index]; | return raw_shape_[index]; | ||||
| } | } | ||||
| // Return the Shape as a vector | |||||
| // @return | |||||
| /// \brief Return the Shape as a vector | |||||
| /// \return | |||||
| std::vector<dsize_t> AsVector() const; | std::vector<dsize_t> AsVector() const; | ||||
| // Returns the class info as a string | |||||
| // @return | |||||
| /// \brief Returns the class info as a string | |||||
| /// \return | |||||
| std::string ToString() const { | std::string ToString() const { | ||||
| std::stringstream ss; | std::stringstream ss; | ||||
| ss << *this; | ss << *this; | ||||
| return ss.str(); | return ss.str(); | ||||
| } | } | ||||
| // Actual print function used by operator<< | |||||
| // @param out output string stream | |||||
| /// \brief Actual print function used by operator<< | |||||
| /// \param out output string stream | |||||
| void Print(std::ostream &out) const; | void Print(std::ostream &out) const; | ||||
| // << Stream output operator overload | |||||
| // @notes This allows you to print the info using stream operators | |||||
| // @param out - reference to the output stream being overloaded | |||||
| // @param rO - reference to the TensorShape to display | |||||
| // @return - the output stream must be returned | |||||
| /// \brief << Stream output operator overload | |||||
| /// This allows you to print the info using stream operators | |||||
| /// \param[in] out - reference to the output stream being overloaded | |||||
| /// \param[in] rO - reference to the TensorShape to display | |||||
| /// \return - the output stream must be returned | |||||
| friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) { | friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) { | ||||
| so.Print(out); | so.Print(out); | ||||
| return out; | return out; | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| py::list AsPyList(); | py::list AsPyList(); | ||||
| #endif | |||||
| // Checks if the given index is a valid index for this tensor. | |||||
| // For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not. | |||||
| // @param index | |||||
| // @return bool | |||||
| /// \brief Checks if the given index is a valid index for this tensor. | |||||
| /// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not. | |||||
| /// \param[in] index | |||||
| /// \return bool | |||||
| bool IsValidIndex(const std::vector<dsize_t> &index) const; | bool IsValidIndex(const std::vector<dsize_t> &index) const; | ||||
| TensorShape Squeeze() const; | TensorShape Squeeze() const; | ||||
| std::vector<dsize_t> Strides() const; | std::vector<dsize_t> Strides() const; | ||||
| // Returns the location of the item assuming row major memory layout. | |||||
| // @param index | |||||
| // @return | |||||
| /// \brief Returns the location of the item assuming row major memory layout. | |||||
| /// \param[in] index | |||||
| /// \param[out] flat_index | |||||
| /// \return | |||||
| Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const; | Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const; | ||||
| private: | private: | ||||
| @@ -174,11 +183,11 @@ class TensorShape { | |||||
| // Vector to keep the strides of the shape. The size is rank+1 | // Vector to keep the strides of the shape. The size is rank+1 | ||||
| std::vector<dsize_t, IntAlloc> strides_; | std::vector<dsize_t, IntAlloc> strides_; | ||||
| // Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape. | |||||
| // @tparam T list | |||||
| // @param list Iterable list | |||||
| // @return true if the shape is valid and no overflow would be generated when counting the number of elements. | |||||
| // False otherwise. | |||||
| /// \brief Internal utility function to iterate over a list, | |||||
| /// check if the dim is valid and then insert it into the shape. | |||||
| /// \param[in] list Iterable list | |||||
| /// \return true if the shape is valid and no overflow would be generated when counting the number of elements. | |||||
| /// False otherwise. | |||||
| template <typename T> | template <typename T> | ||||
| void AddListToShape(const T &list); | void AddListToShape(const T &list); | ||||
| }; | }; | ||||
| @@ -2,13 +2,12 @@ add_subdirectory(source) | |||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | ||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | ||||
| add_library(engine-datasetops OBJECT | |||||
| set(DATASET_ENGINE_DATASETOPS_SRC_FILES | |||||
| dataset_op.cc | dataset_op.cc | ||||
| parallel_op.cc | parallel_op.cc | ||||
| pipeline_op.cc | pipeline_op.cc | ||||
| barrier_op.cc | |||||
| batch_op.cc | batch_op.cc | ||||
| bucket_batch_by_length_op.cc | |||||
| device_queue_op.cc | device_queue_op.cc | ||||
| map_op.cc | map_op.cc | ||||
| project_op.cc | project_op.cc | ||||
| @@ -18,8 +17,18 @@ add_library(engine-datasetops OBJECT | |||||
| take_op.cc | take_op.cc | ||||
| shuffle_op.cc | shuffle_op.cc | ||||
| zip_op.cc | zip_op.cc | ||||
| concat_op.cc | |||||
| filter_op.cc | |||||
| build_vocab_op.cc | |||||
| concat_op.cc | |||||
| ) | ) | ||||
| if (ENABLE_PYTHON) | |||||
| set(DATASET_ENGINE_DATASETOPS_SRC_FILES | |||||
| ${DATASET_ENGINE_DATASETOPS_SRC_FILES} | |||||
| bucket_batch_by_length_op.cc | |||||
| barrier_op.cc | |||||
| filter_op.cc | |||||
| build_vocab_op.cc | |||||
| ) | |||||
| endif() | |||||
| add_library(engine-datasetops OBJECT ${DATASET_ENGINE_DATASETOPS_SRC_FILES}) | |||||
| @@ -19,7 +19,9 @@ | |||||
| #include <iomanip> | #include <iomanip> | ||||
| #include "common/utils.h" | #include "common/utils.h" | ||||
| #ifdef ENABLE_PYTHON | |||||
| #include "dataset/core/pybind_support.h" | #include "dataset/core/pybind_support.h" | ||||
| #endif | |||||
| #include "dataset/engine/data_buffer.h" | #include "dataset/engine/data_buffer.h" | ||||
| #include "dataset/engine/db_connector.h" | #include "dataset/engine/db_connector.h" | ||||
| #include "dataset/engine/opt/pass.h" | #include "dataset/engine/opt/pass.h" | ||||
| @@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa | |||||
| Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) { | Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) { | ||||
| RETURN_IF_NOT_OK(SanityCheck()); | RETURN_IF_NOT_OK(SanityCheck()); | ||||
| #ifdef ENABLE_PYTHON | |||||
| *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, | *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, | ||||
| builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_, | builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_, | ||||
| builder_batch_map_func_, builder_pad_map_); | builder_batch_map_func_, builder_pad_map_); | ||||
| #else | |||||
| *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, | |||||
| builder_num_workers_, builder_cols_to_map_, builder_pad_map_); | |||||
| #endif | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() { | |||||
| return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err)); | return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err)); | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | ||||
| const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func, | const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func, | ||||
| PadInfo pad_map) | PadInfo pad_map) | ||||
| @@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, | |||||
| pad_info_(pad_map) { | pad_info_(pad_map) { | ||||
| worker_queues_.Init(num_workers, op_queue_size); | worker_queues_.Init(num_workers, op_queue_size); | ||||
| } | } | ||||
| #else | |||||
| BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | |||||
| const std::vector<std::string> &cols_to_map, PadInfo pad_map) | |||||
| : ParallelOp(num_workers, op_queue_size), | |||||
| start_batch_size_(batch_size), | |||||
| drop_(drop), | |||||
| pad_(pad), | |||||
| pyfunc_column_names_(cols_to_map), | |||||
| pad_info_(pad_map) { | |||||
| worker_queues_.Init(num_workers, op_queue_size); | |||||
| } | |||||
| #endif | |||||
| Status BatchOp::operator()() { | Status BatchOp::operator()() { | ||||
| Status rc = LaunchThreadsAndInitOp(); | Status rc = LaunchThreadsAndInitOp(); | ||||
| @@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) { | |||||
| Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, | Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, | ||||
| std::unique_ptr<DataBuffer> *db) { | std::unique_ptr<DataBuffer> *db) { | ||||
| RETURN_UNEXPECTED_IF_NULL(table_pair.first); | RETURN_UNEXPECTED_IF_NULL(table_pair.first); | ||||
| if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc | |||||
| #ifdef ENABLE_PYTHON | |||||
| if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc | |||||
| #endif | |||||
| if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_)); // do padding if needed | if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_)); // do padding if needed | ||||
| (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); | (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); | ||||
| std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>(); | std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>(); | ||||
| @@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) { | Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) { | ||||
| TensorBatchTable input_table; | TensorBatchTable input_table; | ||||
| input_table.reserve(pyfunc_column_names_.size()); | input_table.reserve(pyfunc_column_names_.size()); | ||||
| @@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> | |||||
| } | } | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #endif | |||||
| Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { | Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { | ||||
| #ifdef ENABLE_PYTHON | |||||
| if (batch_size_func_ != nullptr) { | if (batch_size_func_ != nullptr) { | ||||
| RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info)); | RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info)); | ||||
| } else { | } else { | ||||
| (*batch_size) = start_batch_size_; | (*batch_size) = start_batch_size_; | ||||
| } | } | ||||
| #else | |||||
| (*batch_size) = start_batch_size_; | |||||
| #endif | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { | Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { | ||||
| { | { | ||||
| // Acquire Python GIL | // Acquire Python GIL | ||||
| @@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou | |||||
| } | } | ||||
| return Status(StatusCode::kOK); | return Status(StatusCode::kOK); | ||||
| } | } | ||||
| #endif | |||||
| Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &pad_info, | Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &pad_info, | ||||
| const std::unordered_map<std::string, int32_t> &column_name_id_map) { | const std::unordered_map<std::string, int32_t> &column_name_id_map) { | ||||
| @@ -89,6 +89,7 @@ class BatchOp : public ParallelOp { | |||||
| return *this; | return *this; | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| // set columns to perform map on | // set columns to perform map on | ||||
| // @param const std::vector<std::string> & cols_to_map - name of columns to perform map on | // @param const std::vector<std::string> & cols_to_map - name of columns to perform map on | ||||
| // @return Builder & reference to builder class object | // @return Builder & reference to builder class object | ||||
| @@ -104,6 +105,7 @@ class BatchOp : public ParallelOp { | |||||
| builder_batch_size_func_ = batch_size_func; | builder_batch_size_func_ = batch_size_func; | ||||
| return *this; | return *this; | ||||
| } | } | ||||
| #endif | |||||
| // @param std::shared_ptr<BatchOp> *ptr pointer to shared_ptr, actual return arg | // @param std::shared_ptr<BatchOp> *ptr pointer to shared_ptr, actual return arg | ||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| @@ -121,8 +123,10 @@ class BatchOp : public ParallelOp { | |||||
| int32_t builder_op_connector_size_; | int32_t builder_op_connector_size_; | ||||
| std::vector<std::string> builder_cols_to_map_; | std::vector<std::string> builder_cols_to_map_; | ||||
| PadInfo builder_pad_map_; | PadInfo builder_pad_map_; | ||||
| #ifdef ENABLE_PYTHON | |||||
| py::function builder_batch_size_func_; | py::function builder_batch_size_func_; | ||||
| py::function builder_batch_map_func_; | py::function builder_batch_map_func_; | ||||
| #endif | |||||
| }; | }; | ||||
| enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 }; | enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 }; | ||||
| @@ -144,6 +148,7 @@ class BatchOp : public ParallelOp { | |||||
| const int64_t get_epoch_num() const { return epoch_num_; } | const int64_t get_epoch_num() const { return epoch_num_; } | ||||
| }; | }; | ||||
| #ifdef ENABLE_PYTHON | |||||
| // BatchOp constructor | // BatchOp constructor | ||||
| // @param int32_t batch_size | // @param int32_t batch_size | ||||
| // @param bool drop | // @param bool drop | ||||
| @@ -152,6 +157,10 @@ class BatchOp : public ParallelOp { | |||||
| // @param int32_t num_workers | // @param int32_t num_workers | ||||
| BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | ||||
| const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map); | const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map); | ||||
| #else | |||||
| BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, | |||||
| const std::vector<std::string> &, PadInfo pad_map); | |||||
| #endif | |||||
| // BatchOp destructor | // BatchOp destructor | ||||
| ~BatchOp() {} | ~BatchOp() {} | ||||
| @@ -219,10 +228,13 @@ class BatchOp : public ParallelOp { | |||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, | Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, | ||||
| std::unique_ptr<DataBuffer> *db); | std::unique_ptr<DataBuffer> *db); | ||||
| #ifdef ENABLE_PYTHON | |||||
| // Function that calls pyfunc to perform map on batch | // Function that calls pyfunc to perform map on batch | ||||
| // @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor | // @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor | ||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair); | Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair); | ||||
| #endif | |||||
| // @param const PadInfo &pad_info pad info to unpack | // @param const PadInfo &pad_info pad info to unpack | ||||
| // @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping | // @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping | ||||
| @@ -247,6 +259,7 @@ class BatchOp : public ParallelOp { | |||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status LaunchThreadsAndInitOp(); | Status LaunchThreadsAndInitOp(); | ||||
| #ifdef ENABLE_PYTHON | |||||
| // Invoke batch size function with current BatchInfo to generate batch size. | // Invoke batch size function with current BatchInfo to generate batch size. | ||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info); | Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info); | ||||
| @@ -254,6 +267,7 @@ class BatchOp : public ParallelOp { | |||||
| // Invoke batch map function with current BatchInfo to generate tensors to batch. | // Invoke batch map function with current BatchInfo to generate tensors to batch. | ||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info); | Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info); | ||||
| #endif | |||||
| int32_t start_batch_size_; | int32_t start_batch_size_; | ||||
| bool drop_; // bool for whether to drop remainder or not | bool drop_; // bool for whether to drop remainder or not | ||||
| @@ -262,8 +276,10 @@ class BatchOp : public ParallelOp { | |||||
| PadInfo pad_info_; // column names to perform padding on | PadInfo pad_info_; // column names to perform padding on | ||||
| std::unique_ptr<ChildIterator> child_iterator_; // child iterator for fetching TensorRows 1 by 1 | std::unique_ptr<ChildIterator> child_iterator_; // child iterator for fetching TensorRows 1 by 1 | ||||
| QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_; // internal queue for syncing worker | QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_; // internal queue for syncing worker | ||||
| #ifdef ENABLE_PYTHON | |||||
| py::function batch_size_func_; // Function pointer of batch size function | py::function batch_size_func_; // Function pointer of batch size function | ||||
| py::function batch_map_func_; // Function pointer of per batch map function | py::function batch_map_func_; // Function pointer of per batch map function | ||||
| #endif | |||||
| }; | }; | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -1,19 +1,32 @@ | |||||
| add_subdirectory(sampler) | add_subdirectory(sampler) | ||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | ||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | ||||
| add_library(engine-datasetops-source OBJECT | |||||
| generator_op.cc | |||||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||||
| io_block.cc | io_block.cc | ||||
| mindrecord_op.cc | |||||
| tf_reader_op.cc | |||||
| image_folder_op.cc | image_folder_op.cc | ||||
| mnist_op.cc | mnist_op.cc | ||||
| voc_op.cc | |||||
| coco_op.cc | coco_op.cc | ||||
| manifest_op.cc | |||||
| cifar_op.cc | cifar_op.cc | ||||
| random_data_op.cc | random_data_op.cc | ||||
| celeba_op.cc | celeba_op.cc | ||||
| text_file_op.cc | text_file_op.cc | ||||
| clue_op.cc | clue_op.cc | ||||
| ) | |||||
| ) | |||||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||||
| ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||||
| mindrecord_op.cc | |||||
| tf_reader_op.cc | |||||
| ) | |||||
| if (ENABLE_PYTHON) | |||||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||||
| ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||||
| generator_op.cc | |||||
| voc_op.cc | |||||
| manifest_op.cc | |||||
| ) | |||||
| endif() | |||||
| add_library(engine-datasetops-source OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}) | |||||
| @@ -1,12 +1,21 @@ | |||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | ||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | ||||
| add_library(engine-datasetops-source-sampler OBJECT | |||||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES | |||||
| distributed_sampler.cc | distributed_sampler.cc | ||||
| pk_sampler.cc | pk_sampler.cc | ||||
| python_sampler.cc | |||||
| random_sampler.cc | random_sampler.cc | ||||
| sampler.cc | sampler.cc | ||||
| sequential_sampler.cc | sequential_sampler.cc | ||||
| subset_random_sampler.cc | subset_random_sampler.cc | ||||
| weighted_random_sampler.cc | weighted_random_sampler.cc | ||||
| ) | ) | ||||
| if (ENABLE_PYTHON) | |||||
| set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES | |||||
| ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} | |||||
| python_sampler.cc | |||||
| ) | |||||
| endif() | |||||
| add_library(engine-datasetops-source-sampler OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}) | |||||
| @@ -89,6 +89,7 @@ void Sampler::Print(std::ostream &out, bool show_all) const { | |||||
| } | } | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| Status Sampler::GetAllIdsThenReset(py::array *data) { | Status Sampler::GetAllIdsThenReset(py::array *data) { | ||||
| std::unique_ptr<DataBuffer> db; | std::unique_ptr<DataBuffer> db; | ||||
| std::shared_ptr<Tensor> sample_ids; | std::shared_ptr<Tensor> sample_ids; | ||||
| @@ -120,6 +121,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) { | |||||
| RETURN_IF_NOT_OK(ResetSampler()); | RETURN_IF_NOT_OK(ResetSampler()); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #endif | |||||
| Status Sampler::SetNumSamples(int64_t num_samples) { | Status Sampler::SetNumSamples(int64_t num_samples) { | ||||
| CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative"); | CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative"); | ||||
| @@ -74,8 +74,11 @@ class Sampler { | |||||
| // @return - The error code return | // @return - The error code return | ||||
| virtual Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) = 0; | virtual Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) = 0; | ||||
| // This function only called by python layer. Not needed by Android. | |||||
| #ifdef ENABLE_PYTHON | |||||
| // return all ids in one epoch as a numpy array, then call reset | // return all ids in one epoch as a numpy array, then call reset | ||||
| Status GetAllIdsThenReset(py::array *data); | Status GetAllIdsThenReset(py::array *data); | ||||
| #endif | |||||
| // for next epoch of sampleIds | // for next epoch of sampleIds | ||||
| // @return - The error code return | // @return - The error code return | ||||
| @@ -155,5 +158,4 @@ class Sampler { | |||||
| }; | }; | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_ | #endif // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_ | ||||
| @@ -429,6 +429,7 @@ Status Graph::GetMetaInfo(MetaInfo *meta_info) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| Status Graph::GraphInfo(py::dict *out) { | Status Graph::GraphInfo(py::dict *out) { | ||||
| MetaInfo meta_info; | MetaInfo meta_info; | ||||
| RETURN_IF_NOT_OK(GetMetaInfo(&meta_info)); | RETURN_IF_NOT_OK(GetMetaInfo(&meta_info)); | ||||
| @@ -440,6 +441,7 @@ Status Graph::GraphInfo(py::dict *out) { | |||||
| (*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type); | (*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #endif | |||||
| Status Graph::LoadNodeAndEdge() { | Status Graph::LoadNodeAndEdge() { | ||||
| GraphLoader gl(dataset_file_, num_workers_); | GraphLoader gl(dataset_file_, num_workers_); | ||||
| @@ -140,8 +140,10 @@ class Graph { | |||||
| // @return Status - The error code return | // @return Status - The error code return | ||||
| Status GetMetaInfo(MetaInfo *meta_info); | Status GetMetaInfo(MetaInfo *meta_info); | ||||
| #ifdef ENABLE_PYTHON | |||||
| // Return meta information to python layer | // Return meta information to python layer | ||||
| Status GraphInfo(py::dict *out); | Status GraphInfo(py::dict *out); | ||||
| #endif | |||||
| Status Init(); | Status Init(); | ||||
| @@ -21,13 +21,15 @@ | |||||
| #include "dataset/engine/datasetops/map_op.h" | #include "dataset/engine/datasetops/map_op.h" | ||||
| #include "dataset/engine/datasetops/project_op.h" | #include "dataset/engine/datasetops/project_op.h" | ||||
| #include "dataset/engine/datasetops/rename_op.h" | #include "dataset/engine/datasetops/rename_op.h" | ||||
| #include "dataset/engine/datasetops/filter_op.h" | |||||
| #include "dataset/engine/datasetops/repeat_op.h" | #include "dataset/engine/datasetops/repeat_op.h" | ||||
| #include "dataset/engine/datasetops/skip_op.h" | #include "dataset/engine/datasetops/skip_op.h" | ||||
| #include "dataset/engine/datasetops/shuffle_op.h" | #include "dataset/engine/datasetops/shuffle_op.h" | ||||
| #include "dataset/engine/datasetops/source/generator_op.h" | |||||
| #include "dataset/engine/datasetops/source/mindrecord_op.h" | #include "dataset/engine/datasetops/source/mindrecord_op.h" | ||||
| #include "dataset/engine/datasetops/source/tf_reader_op.h" | #include "dataset/engine/datasetops/source/tf_reader_op.h" | ||||
| #ifdef ENABLE_PYTHON | |||||
| #include "dataset/engine/datasetops/filter_op.h" | |||||
| #include "dataset/engine/datasetops/source/generator_op.h" | |||||
| #endif | |||||
| #include "dataset/engine/datasetops/source/image_folder_op.h" | #include "dataset/engine/datasetops/source/image_folder_op.h" | ||||
| #include "dataset/engine/datasetops/take_op.h" | #include "dataset/engine/datasetops/take_op.h" | ||||
| #include "dataset/engine/datasetops/zip_op.h" | #include "dataset/engine/datasetops/zip_op.h" | ||||
| @@ -111,35 +113,37 @@ Status NodePass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) { | |||||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | ||||
| } | } | ||||
| Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) { | |||||
| Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) { | |||||
| // Fallback to base class visitor by default | // Fallback to base class visitor by default | ||||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | ||||
| } | } | ||||
| Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) { | |||||
| Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) { | |||||
| // Fallback to base class visitor by default | // Fallback to base class visitor by default | ||||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | ||||
| } | } | ||||
| Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) { | |||||
| Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) { | |||||
| // Fallback to base class visitor by default | // Fallback to base class visitor by default | ||||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | ||||
| } | } | ||||
| Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) { | |||||
| Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) { | |||||
| // Fallback to base class visitor by default | // Fallback to base class visitor by default | ||||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | ||||
| } | } | ||||
| Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) { | |||||
| #ifdef ENABLE_PYTHON | |||||
| Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) { | |||||
| // Fallback to base class visitor by default | // Fallback to base class visitor by default | ||||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | ||||
| } | } | ||||
| Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) { | |||||
| Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) { | |||||
| // Fallback to base class visitor by default | // Fallback to base class visitor by default | ||||
| return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified); | ||||
| } | } | ||||
| #endif | |||||
| Status NodePass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) { | Status NodePass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) { | ||||
| // Fallback to base class visitor by default | // Fallback to base class visitor by default | ||||
| @@ -33,18 +33,20 @@ class ProjectOp; | |||||
| class RenameOp; | class RenameOp; | ||||
| class FilterOp; | |||||
| class SkipOp; | class SkipOp; | ||||
| class ShuffleOp; | class ShuffleOp; | ||||
| class GeneratorOp; | |||||
| class MindRecordOp; | class MindRecordOp; | ||||
| class TFReaderOp; | class TFReaderOp; | ||||
| #ifdef ENABLE_PYTHON | |||||
| class FilterOp; | |||||
| class GeneratorOp; | |||||
| #endif | |||||
| class TakeOp; | class TakeOp; | ||||
| class ZipOp; | class ZipOp; | ||||
| @@ -122,18 +124,20 @@ class NodePass : public Pass { | |||||
| virtual Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified); | virtual Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified); | ||||
| virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified); | |||||
| virtual Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified); | virtual Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified); | ||||
| virtual Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified); | virtual Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified); | ||||
| virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified); | |||||
| virtual Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified); | virtual Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified); | ||||
| virtual Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified); | virtual Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified); | ||||
| #ifdef ENABLE_PYTHON | |||||
| virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified); | |||||
| virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified); | |||||
| #endif | |||||
| virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified); | virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified); | ||||
| virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified); | virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified); | ||||
| @@ -50,12 +50,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) { | |||||
| *modified = false; | |||||
| std::cout << "Visiting FilterOp" << '\n'; | |||||
| return Status::OK(); | |||||
| } | |||||
| Status PrinterPass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) { | Status PrinterPass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) { | ||||
| *modified = false; | *modified = false; | ||||
| std::cout << "Visiting SkipOp" << '\n'; | std::cout << "Visiting SkipOp" << '\n'; | ||||
| @@ -67,11 +61,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) { | |||||
| *modified = false; | |||||
| std::cout << "Visiting GeneratorOp" << '\n'; | |||||
| return Status::OK(); | |||||
| } | |||||
| Status PrinterPass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) { | Status PrinterPass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) { | ||||
| *modified = false; | *modified = false; | ||||
| std::cout << "Visiting MindRecordOp" << '\n'; | std::cout << "Visiting MindRecordOp" << '\n'; | ||||
| @@ -84,6 +73,20 @@ Status PrinterPass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| #ifdef ENABLE_PYTHON | |||||
| Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) { | |||||
| *modified = false; | |||||
| std::cout << "Visiting FilterOp" << '\n'; | |||||
| return Status::OK(); | |||||
| } | |||||
| Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) { | |||||
| *modified = false; | |||||
| std::cout << "Visiting GeneratorOp" << '\n'; | |||||
| return Status::OK(); | |||||
| } | |||||
| #endif | |||||
| Status PrinterPass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) { | Status PrinterPass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) { | ||||
| *modified = false; | *modified = false; | ||||
| std::cout << "Visiting TakeOp" << '\n'; | std::cout << "Visiting TakeOp" << '\n'; | ||||
| @@ -35,18 +35,20 @@ class PrinterPass : public NodePass { | |||||
| Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) override; | Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) override; | ||||
| Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override; | |||||
| Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) override; | Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) override; | ||||
| Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) override; | Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) override; | ||||
| Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override; | |||||
| Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override; | Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override; | ||||
| Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override; | Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override; | ||||
| #ifdef ENABLE_PYTHON | |||||
| Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override; | |||||
| Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override; | |||||
| #endif | |||||
| Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) override; | Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) override; | ||||
| Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified) override; | Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified) override; | ||||
| @@ -0,0 +1 @@ | |||||
| ../../../core/constants.h | |||||
| @@ -0,0 +1 @@ | |||||
| ../../../core/data_type.h | |||||
| @@ -0,0 +1 @@ | |||||
| ../../../core/tensor_shape.h | |||||
| @@ -0,0 +1 @@ | |||||
| ../../../util/status.h | |||||
| @@ -0,0 +1,357 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef DATASET_INCLUDE_DATASETS_H_ | |||||
| #define DATASET_INCLUDE_DATASETS_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <set> | |||||
| #include <map> | |||||
| #include <utility> | |||||
| #include <string> | |||||
| #include "dataset/include/tensor.h" | |||||
| #include "dataset/include/iterator.h" | |||||
| #include "dataset/include/samplers.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Forward declare | |||||
| class DatasetOp; | |||||
| class DataSchema; | |||||
| class Tensor; | |||||
| class TensorShape; | |||||
| namespace api { | |||||
| class TensorOperation; | |||||
| class SamplerObj; | |||||
| class ImageFolderDataset; | |||||
| class MnistDataset; | |||||
| class BatchDataset; | |||||
| class RepeatDataset; | |||||
| class MapDataset; | |||||
| class ShuffleDataset; | |||||
| class Cifar10Dataset; | |||||
| class ProjectDataset; | |||||
| /// \brief Function to create an ImageFolderDataset | |||||
| /// \notes A source dataset that reads images from a tree of directories | |||||
| /// All images within one folder have the same label | |||||
| /// The generated dataset has two columns ['image', 'label'] | |||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||||
| /// \param[in] decode A flag to decode in ImageFolder | |||||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, | |||||
| /// A `RandomSampler` will be used to randomly iterate the entire dataset | |||||
| /// \param[in] extensions File extensions to be read | |||||
| /// \param[in] class_indexing a class name to label map | |||||
| /// \return Shared pointer to the current ImageFolderDataset | |||||
| std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode = false, | |||||
| std::shared_ptr<SamplerObj> sampler = nullptr, | |||||
| std::set<std::string> extensions = {}, | |||||
| std::map<std::string, int32_t> class_indexing = {}); | |||||
| /// \brief Function to create a MnistDataset | |||||
| /// \notes The generated dataset has two columns ['image', 'label'] | |||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, | |||||
| /// A `RandomSampler` will be used to randomly iterate the entire dataset | |||||
| /// \return Shared pointer to the current MnistDataset | |||||
| std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr); | |||||
| /// \brief Function to create a Cifar10 Dataset | |||||
| /// \notes The generated dataset has two columns ['image', 'label'] | |||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||||
| /// \param[in] num_samples The number of images to be included in the dataset | |||||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` | |||||
| /// will be used to randomly iterate the entire dataset | |||||
| /// \return Shared pointer to the current Dataset | |||||
| std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples, | |||||
| std::shared_ptr<SamplerObj> sampler); | |||||
| /// \class Dataset datasets.h | |||||
| /// \brief A base class to represent a dataset in the data pipeline. | |||||
| class Dataset : public std::enable_shared_from_this<Dataset> { | |||||
| public: | |||||
| friend class Iterator; | |||||
| /// \brief Constructor | |||||
| Dataset(); | |||||
| /// \brief Destructor | |||||
| ~Dataset() = default; | |||||
| /// \brief Pure virtual function to convert a Dataset class into a runtime dataset object | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| virtual std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() = 0; | |||||
| /// \brief Pure virtual function for derived class to implement parameters validation | |||||
| /// \return bool True if all the params are valid | |||||
| virtual bool ValidateParams() = 0; | |||||
| /// \brief Setter function for runtime number of workers | |||||
| /// \param[in] num_workers The number of threads in this operator | |||||
| /// \return Shared pointer to the original object | |||||
| std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers) { | |||||
| num_workers_ = num_workers; | |||||
| return shared_from_this(); | |||||
| } | |||||
| /// \brief Function to create an Iterator over the Dataset pipeline | |||||
| /// \return Shared pointer to the Iterator | |||||
| std::shared_ptr<Iterator> CreateIterator(); | |||||
| /// \brief Function to create a BatchDataset | |||||
| /// \notes Combines batch_size number of consecutive rows into batches | |||||
| /// \param[in] batch_size Path to the root directory that contains the dataset | |||||
| /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete | |||||
| /// batch. If true, and if there are less than batch_size rows | |||||
| /// available to make the last batch, then those rows will | |||||
| /// be dropped and not propagated to the next node | |||||
| /// \return Shared pointer to the current BatchDataset | |||||
| std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false); | |||||
| /// \brief Function to create a RepeatDataset | |||||
| /// \notes Repeats this dataset count times. Repeat indefinitely if count is -1 | |||||
| /// \param[in] count Number of times the dataset should be repeated | |||||
| /// \return Shared pointer to the current Dataset | |||||
| /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset` | |||||
| /// due to a limitation in the current implementation | |||||
| std::shared_ptr<Dataset> Repeat(int32_t count = -1); | |||||
| /// \brief Function to create a MapDataset | |||||
| /// \notes Applies each operation in operations to this dataset | |||||
| /// \param[in] operations Vector of operations to be applied on the dataset. Operations are | |||||
| /// applied in the order they appear in this list | |||||
| /// \param[in] input_columns Vector of the names of the columns that will be passed to the first | |||||
| /// operation as input. The size of this list must match the number of | |||||
| /// input columns expected by the first operator. The default input_columns | |||||
| /// is the first column | |||||
| /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation | |||||
| /// This parameter is mandatory if len(input_columns) != len(output_columns) | |||||
| /// The size of this list must match the number of output columns of the | |||||
| /// last operation. The default output_columns will have the same | |||||
| /// name as the input columns, i.e., the columns will be replaced | |||||
| /// \param[in] project_columns A list of column names to project | |||||
| /// \return Shared pointer to the current MapDataset | |||||
| std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations, | |||||
| std::vector<std::string> input_columns = {}, | |||||
| std::vector<std::string> output_columns = {}, | |||||
| const std::vector<std::string> &project_columns = {}); | |||||
| /// \brief Function to create a Shuffle Dataset | |||||
| /// \notes Randomly shuffles the rows of this dataset | |||||
| /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling | |||||
| /// \return Shared pointer to the current ShuffleDataset | |||||
| std::shared_ptr<ShuffleDataset> Shuffle(int32_t shuffle_size); | |||||
| /// \brief Function to create a Project Dataset | |||||
| /// \notes Applies project to the dataset | |||||
| /// \param[in] columns The name of columns to project | |||||
| /// \return Shared pointer to the current Dataset | |||||
| std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns); | |||||
| protected: | |||||
| std::vector<std::shared_ptr<Dataset>> children; | |||||
| std::shared_ptr<Dataset> parent; | |||||
| int32_t num_workers_; | |||||
| int32_t rows_per_buffer_; | |||||
| int32_t connector_que_size_; | |||||
| }; | |||||
| /* ####################################### Derived Dataset classes ################################# */ | |||||
| /// \class ImageFolderDataset | |||||
| /// \brief A Dataset derived class to represent ImageFolder dataset | |||||
| class ImageFolderDataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive, | |||||
| std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing); | |||||
| /// \brief Destructor | |||||
| ~ImageFolderDataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::string dataset_dir_; | |||||
| bool decode_; | |||||
| bool recursive_; | |||||
| std::shared_ptr<SamplerObj> sampler_; | |||||
| std::map<std::string, int32_t> class_indexing_; | |||||
| std::set<std::string> exts_; | |||||
| }; | |||||
| class MnistDataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler); | |||||
| /// \brief Destructor | |||||
| ~MnistDataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::string dataset_dir_; | |||||
| std::shared_ptr<SamplerObj> sampler_; | |||||
| }; | |||||
| class BatchDataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map, | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map); | |||||
| /// \brief Destructor | |||||
| ~BatchDataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| int32_t batch_size_; | |||||
| bool drop_remainder_; | |||||
| bool pad_; | |||||
| std::vector<std::string> cols_to_map_; | |||||
| std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_; | |||||
| }; | |||||
| class RepeatDataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| explicit RepeatDataset(uint32_t count); | |||||
| /// \brief Destructor | |||||
| ~RepeatDataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| uint32_t repeat_count_; | |||||
| }; | |||||
| class ShuffleDataset : public Dataset { | |||||
| public: | |||||
| ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch); | |||||
| ~ShuffleDataset() = default; | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| int32_t shuffle_size_; | |||||
| uint32_t shuffle_seed_; | |||||
| bool reset_every_epoch_; | |||||
| }; | |||||
| class MapDataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns = {}, | |||||
| std::vector<std::string> output_columns = {}, const std::vector<std::string> &columns = {}); | |||||
| /// \brief Destructor | |||||
| ~MapDataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<std::shared_ptr<TensorOperation>> operations_; | |||||
| std::vector<std::string> input_columns_; | |||||
| std::vector<std::string> output_columns_; | |||||
| std::vector<std::string> project_columns_; | |||||
| }; | |||||
| class Cifar10Dataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler); | |||||
| /// \brief Destructor | |||||
| ~Cifar10Dataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::string dataset_dir_; | |||||
| int32_t num_samples_; | |||||
| std::shared_ptr<SamplerObj> sampler_; | |||||
| }; | |||||
| class ProjectDataset : public Dataset { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| explicit ProjectDataset(const std::vector<std::string> &columns); | |||||
| /// \brief Destructor | |||||
| ~ProjectDataset() = default; | |||||
| /// \brief a base class override function to create the required runtime dataset op objects for this class | |||||
| /// \return shared pointer to the list of newly created DatasetOps | |||||
| std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override; | |||||
| /// \brief Parameters validation | |||||
| /// \return bool true if all the params are valid | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<std::string> columns_; | |||||
| }; | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // DATASET_INCLUDE_DATASETS_H_ | |||||
| @@ -0,0 +1,115 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef DATASET_INCLUDE_ITERATOR_H_ | |||||
| #define DATASET_INCLUDE_ITERATOR_H_ | |||||
| #include <unordered_map> | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "dataset/include/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Forward declare | |||||
| class ExecutionTree; | |||||
| class DatasetIterator; | |||||
| class DatasetOp; | |||||
| class Tensor; | |||||
| namespace api { | |||||
| class Dataset; | |||||
| using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>; | |||||
| // Abstract class for iterating over the dataset. | |||||
| class Iterator { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| Iterator() = default; | |||||
| /// \brief Destructor | |||||
| ~Iterator() = default; | |||||
| /// \brief Method for building and launching the pipeline. | |||||
| /// \param[in] ops - a vector of DatasetOp in the data pipeline. | |||||
| /// \return - a Status error code, returns OK if no error encountered. | |||||
| Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds); | |||||
| /// \brief Function to get the next row from the data pipeline. | |||||
| /// \param[out] row - the output tensor row. | |||||
| void GetNextRow(TensorMap *row); | |||||
| /// \brief Function to shut down the data pipeline. | |||||
| void Stop(); | |||||
| class _Iterator { | |||||
| public: | |||||
| explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} { | |||||
| if (lt_) { | |||||
| cur_row_ = new TensorMap(); | |||||
| lt_->GetNextRow(cur_row_); | |||||
| } | |||||
| } | |||||
| // Destructor | |||||
| ~_Iterator() { | |||||
| if (cur_row_) { | |||||
| delete cur_row_; | |||||
| } | |||||
| } | |||||
| _Iterator &operator++() { | |||||
| if (lt_) { | |||||
| ++ind_; | |||||
| lt_->GetNextRow(cur_row_); | |||||
| } | |||||
| if (cur_row_ && cur_row_->size() == 0) { | |||||
| delete cur_row_; | |||||
| cur_row_ = nullptr; | |||||
| } | |||||
| return *this; | |||||
| } // prefix ++ overload | |||||
| TensorMap &operator*() { return *cur_row_; } // dereference operator | |||||
| TensorMap *operator->() { return cur_row_; } | |||||
| bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; } | |||||
| private: | |||||
| int ind_; // the cur node our Iterator points to | |||||
| Iterator *lt_; | |||||
| TensorMap *cur_row_; | |||||
| }; | |||||
| _Iterator begin() { return _Iterator(this); } | |||||
| _Iterator end() { return _Iterator(nullptr); } | |||||
| private: | |||||
| // Runtime tree. | |||||
| // Use shared_ptr instead of unique_ptr because the DatasetIterator constructor takes in a shared_ptr type. | |||||
| std::shared_ptr<ExecutionTree> tree_; | |||||
| // Runtime iterator | |||||
| std::unique_ptr<DatasetIterator> iterator_; | |||||
| }; | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // DATASET_INCLUDE_ITERATOR_H_ | |||||
| @@ -0,0 +1,199 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef DATASET_API_SAMPLERS_H_ | |||||
| #define DATASET_API_SAMPLERS_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Internal Sampler class forward declaration | |||||
| class Sampler; | |||||
| namespace api { | |||||
| class SamplerObj : public std::enable_shared_from_this<SamplerObj> { | |||||
| public: | |||||
| SamplerObj(); | |||||
| ~SamplerObj() = default; | |||||
| virtual std::shared_ptr<Sampler> Build() = 0; | |||||
| virtual bool ValidateParams() = 0; | |||||
| }; | |||||
| class DistributedSamplerObj; | |||||
| class PKSamplerObj; | |||||
| class RandomSamplerObj; | |||||
| class SequentialSamplerObj; | |||||
| class SubsetRandomSamplerObj; | |||||
| class WeightedRandomSamplerObj; | |||||
| /// Function to create a Distributed Sampler. | |||||
| /// \notes A Sampler that access a shard of the dataset. | |||||
| /// \param[in] num_shards - Number of shards to divide the dataset into. | |||||
| /// \param[in] shard_id - Shard ID of the current shard within num_shards. | |||||
| /// \param[in] shuffle - If true, the indices are shuffled. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \param[in] seed - The seed in use when shuffle is true. | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, | |||||
| int64_t num_samples = 0, uint32_t seed = 1); | |||||
| /// Function to create a PK Sampler. | |||||
| /// \notes Samples K elements for each P class in the dataset. | |||||
| /// This will sample all classes. | |||||
| /// \param[in] num_val - Number of elements to sample for each class. | |||||
| /// \param[in] shuffle - If true, the class IDs are shuffled. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0); | |||||
| /// Function to create a Random Sampler. | |||||
| /// \notes Samples the elements randomly. | |||||
| /// \param[in] replacement - If True, put the sample ID back for the next draw. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0); | |||||
| /// Function to create a Sequential Sampler. | |||||
| /// \notes Samples the dataset elements sequentially, same as not having a sampler. | |||||
| /// \param[in] start_index - Index to start sampling at (dafault to start at first id). | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0); | |||||
| /// Function to create a Subset Random Sampler. | |||||
| /// \notes Samples the elements randomly from a sequence of indices. | |||||
| /// \param[in] indices - A vector sequence of indices. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, | |||||
| int64_t num_samples = 0); | |||||
| /// Function to create a Weighted Random Sampler. | |||||
| /// \notes Samples the elements from [0, len(weights) - 1] randomly with the given | |||||
| /// weights (probabilities). | |||||
| /// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \param[in] replacement - If True, put the sample ID back for the next draw. | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, | |||||
| int64_t num_samples = 0, bool replacement = true); | |||||
| /* ####################################### Derived Sampler classes ################################# */ | |||||
| class DistributedSamplerObj : public SamplerObj { | |||||
| public: | |||||
| DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed); | |||||
| ~DistributedSamplerObj() = default; | |||||
| std::shared_ptr<Sampler> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| int64_t num_shards_; | |||||
| int64_t shard_id_; | |||||
| bool shuffle_; | |||||
| int64_t num_samples_; | |||||
| uint32_t seed_; | |||||
| }; | |||||
| class PKSamplerObj : public SamplerObj { | |||||
| public: | |||||
| PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples); | |||||
| ~PKSamplerObj() = default; | |||||
| std::shared_ptr<Sampler> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| int64_t num_val_; | |||||
| bool shuffle_; | |||||
| int64_t num_samples_; | |||||
| }; | |||||
| class RandomSamplerObj : public SamplerObj { | |||||
| public: | |||||
| RandomSamplerObj(bool replacement, int64_t num_samples); | |||||
| ~RandomSamplerObj() = default; | |||||
| std::shared_ptr<Sampler> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| bool replacement_; | |||||
| int64_t num_samples_; | |||||
| }; | |||||
| class SequentialSamplerObj : public SamplerObj { | |||||
| public: | |||||
| SequentialSamplerObj(int64_t start_index, int64_t num_samples); | |||||
| ~SequentialSamplerObj() = default; | |||||
| std::shared_ptr<Sampler> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| int64_t start_index_; | |||||
| int64_t num_samples_; | |||||
| }; | |||||
| class SubsetRandomSamplerObj : public SamplerObj { | |||||
| public: | |||||
| SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples); | |||||
| ~SubsetRandomSamplerObj() = default; | |||||
| std::shared_ptr<Sampler> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| const std::vector<int64_t> &indices_; | |||||
| int64_t num_samples_; | |||||
| }; | |||||
| class WeightedRandomSamplerObj : public SamplerObj { | |||||
| public: | |||||
| explicit WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples = 0, | |||||
| bool replacement = true); | |||||
| ~WeightedRandomSamplerObj() = default; | |||||
| std::shared_ptr<Sampler> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| const std::vector<double> &weights_; | |||||
| int64_t num_samples_; | |||||
| bool replacement_; | |||||
| }; | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // DATASET_API_SAMPLERS_H_ | |||||
| @@ -0,0 +1 @@ | |||||
| ../util/status.h | |||||
| @@ -0,0 +1 @@ | |||||
| ../core/tensor.h | |||||
| @@ -0,0 +1,380 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef DATASET_API_TRANSFORMS_H_ | |||||
| #define DATASET_API_TRANSFORMS_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "dataset/core/constants.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| class TensorOp; | |||||
| namespace api { | |||||
| // Abstract class to represent a dataset in the data pipeline. | |||||
| class TensorOperation : public std::enable_shared_from_this<TensorOperation> { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| TensorOperation(); | |||||
| /// \brief Destructor | |||||
| ~TensorOperation() = default; | |||||
| /// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object. | |||||
| /// \return shared pointer to the newly created TensorOp. | |||||
| virtual std::shared_ptr<TensorOp> Build() = 0; | |||||
| virtual bool ValidateParams() = 0; | |||||
| }; | |||||
| // Transform operations for performing computer vision. | |||||
| namespace vision { | |||||
| class NormalizeOperation; | |||||
| class DecodeOperation; | |||||
| class ResizeOperation; | |||||
| class RandomCropOperation; | |||||
| class CenterCropOperation; | |||||
| class UniformAugOperation; | |||||
| class RandomHorizontalFlipOperation; | |||||
| class RandomVerticalFlipOperation; | |||||
| class RandomRotationOperation; | |||||
| class PadOperation; | |||||
| class CutOutOperation; | |||||
| class RandomColorAdjustOperation; | |||||
| /// \brief Function to create a Normalize TensorOperation. | |||||
| /// \notes Normalize the input image with respect to mean and standard deviation. | |||||
| /// \param[in] mean - a vector of mean values for each channel, w.r.t channel order. | |||||
| /// \param[in] std - a vector of standard deviations for each channel, w.r.t. channel order. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std); | |||||
| /// \brief Function to create a Decode TensorOperation. | |||||
| /// \notes Decode the input image in RGB mode. | |||||
| /// \param[in] rgb - a boolean of whether to decode in RGB mode or not. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<DecodeOperation> Decode(bool rgb = true); | |||||
| /// \brief Function to create a Resize TensorOperation. | |||||
| /// \notes Resize the input image to the given size.. | |||||
| /// \param[in] size - a vector representing the output size of the resized image. | |||||
| /// If size is a single value, the image will be resized to this value with | |||||
| /// the same image aspect ratio. If size has 2 values, it should be (height, width). | |||||
| /// \param[in] interpolation An enum for the mode of interpolation | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, | |||||
| InterpolationMode interpolation = InterpolationMode::kLinear); | |||||
| /// \brief Function to create a RandomCrop TensorOperation. | |||||
| /// \notes Crop the input image at a random location. | |||||
| /// \param[in] size - a vector representing the output size of the cropped image. | |||||
| /// If size is a single value, a square crop of size (size, size) is returned. | |||||
| /// If size has 2 values, it should be (height, width). | |||||
| /// \param[in] padding - a vector with the value of pixels to pad the image. If 4 values are provided, | |||||
| /// it pads the left, top, right and bottom respectively. | |||||
| /// \param[in] pad_if_needed - a boolean whether to pad the image if either side is smaller than | |||||
| /// the given output size. | |||||
| /// \param[in] fill_value - a vector representing the pixel intensity of the borders, it is used to | |||||
| /// fill R, G, B channels respectively. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0}, | |||||
| bool pad_if_needed = false, | |||||
| std::vector<uint8_t> fill_value = {0, 0, 0}); | |||||
| /// \brief Function to create a CenterCrop TensorOperation. | |||||
| /// \notes Crops the input image at the center to the given size. | |||||
| /// \param[in] size - a vector representing the output size of the cropped image. | |||||
| /// If size is a single value, a square crop of size (size, size) is returned. | |||||
| /// If size has 2 values, it should be (height, width). | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size); | |||||
| /// \brief Function to create a UniformAugment TensorOperation. | |||||
| /// \notes Tensor operation to perform randomly selected augmentation. | |||||
| /// \param[in] operations - a vector of TensorOperation operations. | |||||
| /// \param[in] num_ops - integer representing the number of OPs to be selected and applied. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations, | |||||
| int32_t num_ops = 2); | |||||
| /// \brief Function to create a RandomHorizontalFlip TensorOperation. | |||||
| /// \notes Tensor operation to perform random horizontal flip. | |||||
| /// \param[in] prob - float representing the probability of flip. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob = 0.5); | |||||
| /// \brief Function to create a RandomVerticalFlip TensorOperation. | |||||
| /// \notes Tensor operation to perform random vertical flip. | |||||
| /// \param[in] prob - float representing the probability of flip. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob = 0.5); | |||||
| /// \brief Function to create a RandomRotation TensorOp | |||||
| /// \notes Rotates the image according to parameters | |||||
| /// \param[in] degrees A float vector size 2, representing the starting and ending degree | |||||
| /// \param[in] resample An enum for the mode of interpolation | |||||
| /// \param[in] expand A boolean representing whether the image is expanded after rotation | |||||
| /// \param[in] center A float vector size 2, representing the x and y center of rotation. | |||||
| /// \param[in] fill_value A uint8_t vector size 3, representing the rgb value of the fill color | |||||
| /// \return Shared pointer to the current TensorOp | |||||
| std::shared_ptr<RandomRotationOperation> RandomRotation( | |||||
| std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false, | |||||
| std::vector<float> center = {-1, -1}, std::vector<uint8_t> fill_value = {0, 0, 0}); | |||||
| /// \brief Function to create a Pad TensorOp | |||||
| /// \notes Pads the image according to padding parameters | |||||
| /// \param[in] padding A vector representing the number of pixels to pad the image | |||||
| /// If vector has one value, it pads all sides of the image with that value | |||||
| /// If vector has two values, it pads left and right with the first and | |||||
| /// top and bottom with the second value | |||||
| /// If vector has four values, it pads left, top, right, and bottom with | |||||
| /// those values respectively | |||||
| /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is | |||||
| /// BorderType.kConstant. If 3 values are provided, | |||||
| /// it is used to fill R, G, B channels respectively | |||||
| /// \param[in] padding_mode The method of padding (default=BorderType.kConstant) | |||||
| /// Can be any of | |||||
| /// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric] | |||||
| /// - BorderType.kConstant, means it fills the border with constant values | |||||
| /// - BorderType.kEdge, means it pads with the last value on the edge | |||||
| /// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge | |||||
| /// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge | |||||
| /// \return Shared pointer to the current TensorOp | |||||
| std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0}, | |||||
| BorderType padding_mode = BorderType::kConstant); | |||||
| /// \brief Function to create a CutOut TensorOp | |||||
| /// \notes Randomly cut (mask) out a given number of square patches from the input image | |||||
| /// \param[in] length Integer representing the side length of each square patch | |||||
| /// \param[in] num_patches Integer representing the number of patches to be cut out of an image | |||||
| /// \return Shared pointer to the current TensorOp | |||||
| std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches = 1); | |||||
| /// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image | |||||
| /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values | |||||
| /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} | |||||
| /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values | |||||
| /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} | |||||
| /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values | |||||
| /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} | |||||
| /// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values | |||||
| /// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5 | |||||
| /// Default value is {0, 0} | |||||
| /// \return Shared pointer to the current TensorOp | |||||
| std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, | |||||
| std::vector<float> contrast = {1.0, 1.0}, | |||||
| std::vector<float> saturation = {1.0, 1.0}, | |||||
| std::vector<float> hue = {0.0, 0.0}); | |||||
| /* ####################################### Derived TensorOperation classes ################################# */ | |||||
| class NormalizeOperation : public TensorOperation { | |||||
| public: | |||||
| NormalizeOperation(std::vector<float> mean, std::vector<float> std); | |||||
| ~NormalizeOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<float> mean_; | |||||
| std::vector<float> std_; | |||||
| }; | |||||
| class DecodeOperation : public TensorOperation { | |||||
| public: | |||||
| explicit DecodeOperation(bool rgb = true); | |||||
| ~DecodeOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| bool rgb_; | |||||
| }; | |||||
| class ResizeOperation : public TensorOperation { | |||||
| public: | |||||
| explicit ResizeOperation(std::vector<int32_t> size, | |||||
| InterpolationMode interpolation_mode = InterpolationMode::kLinear); | |||||
| ~ResizeOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<int32_t> size_; | |||||
| InterpolationMode interpolation_; | |||||
| }; | |||||
| class RandomCropOperation : public TensorOperation { | |||||
| public: | |||||
| RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0}, | |||||
| bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0}); | |||||
| ~RandomCropOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<int32_t> size_; | |||||
| std::vector<int32_t> padding_; | |||||
| bool pad_if_needed_; | |||||
| std::vector<uint8_t> fill_value_; | |||||
| }; | |||||
| class CenterCropOperation : public TensorOperation { | |||||
| public: | |||||
| explicit CenterCropOperation(std::vector<int32_t> size); | |||||
| ~CenterCropOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<int32_t> size_; | |||||
| }; | |||||
| class UniformAugOperation : public TensorOperation { | |||||
| public: | |||||
| explicit UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops = 2); | |||||
| ~UniformAugOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<std::shared_ptr<TensorOperation>> operations_; | |||||
| int32_t num_ops_; | |||||
| }; | |||||
| class RandomHorizontalFlipOperation : public TensorOperation { | |||||
| public: | |||||
| explicit RandomHorizontalFlipOperation(float probability = 0.5); | |||||
| ~RandomHorizontalFlipOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| float probability_; | |||||
| }; | |||||
| class RandomVerticalFlipOperation : public TensorOperation { | |||||
| public: | |||||
| explicit RandomVerticalFlipOperation(float probability = 0.5); | |||||
| ~RandomVerticalFlipOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| float probability_; | |||||
| }; | |||||
| class RandomRotationOperation : public TensorOperation { | |||||
| public: | |||||
| RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand, | |||||
| std::vector<float> center, std::vector<uint8_t> fill_value); | |||||
| ~RandomRotationOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<float> degrees_; | |||||
| InterpolationMode interpolation_mode_; | |||||
| std::vector<float> center_; | |||||
| bool expand_; | |||||
| std::vector<uint8_t> fill_value_; | |||||
| }; | |||||
| class PadOperation : public TensorOperation { | |||||
| public: | |||||
| PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0}, | |||||
| BorderType padding_mode = BorderType::kConstant); | |||||
| ~PadOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<int32_t> padding_; | |||||
| std::vector<uint8_t> fill_value_; | |||||
| BorderType padding_mode_; | |||||
| }; | |||||
| class CutOutOperation : public TensorOperation { | |||||
| public: | |||||
| explicit CutOutOperation(int32_t length, int32_t num_patches = 1); | |||||
| ~CutOutOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| int32_t length_; | |||||
| int32_t num_patches_; | |||||
| }; | |||||
| class RandomColorAdjustOperation : public TensorOperation { | |||||
| public: | |||||
| RandomColorAdjustOperation(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0}, | |||||
| std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0}); | |||||
| ~RandomColorAdjustOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| bool ValidateParams() override; | |||||
| private: | |||||
| std::vector<float> brightness_; | |||||
| std::vector<float> contrast_; | |||||
| std::vector<float> saturation_; | |||||
| std::vector<float> hue_; | |||||
| }; | |||||
| } // namespace vision | |||||
| } // namespace api | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // DATASET_API_TRANSFORMS_H_ | |||||
| @@ -0,0 +1 @@ | |||||
| ../../../utils/log_adapter.h | |||||
| @@ -0,0 +1 @@ | |||||
| ../../../utils/overload.h | |||||
| @@ -2,7 +2,13 @@ add_subdirectory(image) | |||||
| add_subdirectory(data) | add_subdirectory(data) | ||||
| file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") | ||||
| set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) | ||||
| add_library(kernels OBJECT | |||||
| py_func_op.cc | |||||
| tensor_op.cc) | |||||
| target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS}) | |||||
| if (ENABLE_PYTHON) | |||||
| add_library(kernels OBJECT | |||||
| py_func_op.cc | |||||
| tensor_op.cc) | |||||
| target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS}) | |||||
| else() | |||||
| add_library(kernels OBJECT | |||||
| tensor_op.cc) | |||||
| endif() | |||||
| @@ -23,7 +23,9 @@ | |||||
| #include "dataset/core/constants.h" | #include "dataset/core/constants.h" | ||||
| #include "dataset/core/data_type.h" | #include "dataset/core/data_type.h" | ||||
| #ifdef ENABLE_PYTHON | |||||
| #include "dataset/core/pybind_support.h" | #include "dataset/core/pybind_support.h" | ||||
| #endif | |||||
| #include "dataset/core/tensor.h" | #include "dataset/core/tensor.h" | ||||
| #include "dataset/core/tensor_shape.h" | #include "dataset/core/tensor_shape.h" | ||||
| #include "dataset/kernels/data/type_cast_op.h" | #include "dataset/kernels/data/type_cast_op.h" | ||||
| @@ -729,7 +729,6 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output | |||||
| int num_channels = input_cv->shape()[2]; | int num_channels = input_cv->shape()[2]; | ||||
| if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2); | if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2); | ||||
| *output = std::static_pointer_cast<Tensor>(output_cv); | *output = std::static_pointer_cast<Tensor>(output_cv); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } catch (const cv::Exception &e) { | } catch (const cv::Exception &e) { | ||||
| RETURN_STATUS_UNEXPECTED("Unexpected error in pad"); | RETURN_STATUS_UNEXPECTED("Unexpected error in pad"); | ||||
| @@ -35,10 +35,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; | |||||
| enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; | |||||
| void JpegErrorExitCustom(j_common_ptr cinfo); | void JpegErrorExitCustom(j_common_ptr cinfo); | ||||
| struct JpegErrorManagerCustom { | struct JpegErrorManagerCustom { | ||||
| @@ -16,6 +16,7 @@ | |||||
| #include "dataset/kernels/image/pad_op.h" | #include "dataset/kernels/image/pad_op.h" | ||||
| #include "dataset/kernels/image/image_utils.h" | #include "dataset/kernels/image/image_utils.h" | ||||
| #include "dataset/core/constants.h" | |||||
| #include "dataset/util/status.h" | #include "dataset/util/status.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -21,7 +21,7 @@ | |||||
| #include "dataset/core/tensor.h" | #include "dataset/core/tensor.h" | ||||
| #include "dataset/kernels/tensor_op.h" | #include "dataset/kernels/tensor_op.h" | ||||
| #include "dataset/kernels/image/image_utils.h" | |||||
| #include "dataset/core/constants.h" | |||||
| #include "dataset/util/status.h" | #include "dataset/util/status.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include "dataset/kernels/image/image_utils.h" | #include "dataset/kernels/image/image_utils.h" | ||||
| #include "dataset/util/status.h" | #include "dataset/util/status.h" | ||||
| #include "dataset/core/cv_tensor.h" | #include "dataset/core/cv_tensor.h" | ||||
| #include "dataset/core/pybind_support.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| @@ -16,8 +16,6 @@ | |||||
| #ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ | #ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ | ||||
| #define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ | #define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ | ||||
| #include <pybind11/numpy.h> | |||||
| #include <pybind11/stl.h> | |||||
| #include <memory> | #include <memory> | ||||
| #include <random> | #include <random> | ||||
| #include <cstdlib> | #include <cstdlib> | ||||
| @@ -26,8 +24,6 @@ | |||||
| #include "dataset/kernels/tensor_op.h" | #include "dataset/kernels/tensor_op.h" | ||||
| #include "dataset/util/random.h" | #include "dataset/util/random.h" | ||||
| #include "dataset/util/status.h" | #include "dataset/util/status.h" | ||||
| #include "pybind11/pybind11.h" | |||||
| #include "pybind11/stl_bind.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| @@ -27,7 +27,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| namespace py = pybind11; | |||||
| class NgramOp : public TensorOp { | class NgramOp : public TensorOp { | ||||
| public: | public: | ||||
| @@ -32,7 +32,15 @@ if(ENABLE_MINDDATA) | |||||
| endif() | endif() | ||||
| # fetch ut test files | # fetch ut test files | ||||
| if(ENABLE_MINDDATA) | if(ENABLE_MINDDATA) | ||||
| file(GLOB_RECURSE UT_SRCS ./*.cc) | |||||
| file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./*.cc) | |||||
| if(NOT ENABLE_PYTHON) | |||||
| set(PYTHON_RELATED_SRCS | |||||
| dataset/filter_op_test.cc | |||||
| dataset/voc_op_test.cc | |||||
| dataset/manifest_op_test.cc | |||||
| ) | |||||
| list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS}) | |||||
| endif() | |||||
| else() | else() | ||||
| file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc) | file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc) | ||||
| foreach(OBJ ${TEMP_UT_SRCS}) | foreach(OBJ ${TEMP_UT_SRCS}) | ||||
| @@ -90,6 +90,7 @@ SET(DE_UT_SRCS | |||||
| concatenate_op_test.cc | concatenate_op_test.cc | ||||
| cyclic_array_test.cc | cyclic_array_test.cc | ||||
| perf_data_test.cc | perf_data_test.cc | ||||
| c_api_test.cc | |||||
| ) | ) | ||||
| add_executable(de_ut_tests ${DE_UT_SRCS}) | add_executable(de_ut_tests ${DE_UT_SRCS}) | ||||
| @@ -0,0 +1,771 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <fstream> | |||||
| #include <iostream> | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "utils/log_adapter.h" | |||||
| #include "common/utils.h" | |||||
| #include "common/common.h" | |||||
| #include "gtest/gtest.h" | |||||
| #include "securec.h" | |||||
| #include "dataset/include/datasets.h" | |||||
| #include "dataset/include/status.h" | |||||
| #include "dataset/include/transforms.h" | |||||
| #include "dataset/include/iterator.h" | |||||
| #include "dataset/core/constants.h" | |||||
| #include "dataset/include/samplers.h" | |||||
| using namespace mindspore::dataset::api; | |||||
| using mindspore::MsLogLevel::ERROR; | |||||
| using mindspore::ExceptionType::NoExceptionType; | |||||
| using mindspore::LogStream; | |||||
| using mindspore::dataset::Tensor; | |||||
| using mindspore::dataset::Status; | |||||
| using mindspore::dataset::BorderType; | |||||
| class MindDataTestPipeline : public UT::DatasetOpTesting { | |||||
| protected: | |||||
| }; | |||||
| TEST_F(MindDataTestPipeline, TestBatchAndRepeat) { | |||||
| // Create a Mnist Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 2; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 10); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) { | |||||
| // Create a Mnist Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30}); | |||||
| EXPECT_TRUE(resize_op != nullptr); | |||||
| std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16}); | |||||
| EXPECT_TRUE(center_crop_op != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({resize_op, center_crop_op}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 40); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestUniformAugWithOps) { | |||||
| // Create a Mnist Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testMnistData/"; | |||||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 1; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30}); | |||||
| EXPECT_TRUE(resize_op != nullptr); | |||||
| std::shared_ptr<TensorOperation> random_crop_op = vision::RandomCrop({28, 28}); | |||||
| EXPECT_TRUE(random_crop_op != nullptr); | |||||
| std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16}); | |||||
| EXPECT_TRUE(center_crop_op != nullptr); | |||||
| std::shared_ptr<TensorOperation> uniform_aug_op = vision::UniformAugment({random_crop_op, center_crop_op}, 2); | |||||
| EXPECT_TRUE(uniform_aug_op != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({resize_op, uniform_aug_op}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestRandomFlip) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5); | |||||
| EXPECT_TRUE(random_vertical_flip_op != nullptr); | |||||
| std::shared_ptr<TensorOperation> random_horizontal_flip_op = vision::RandomHorizontalFlip(0.5); | |||||
| EXPECT_TRUE(random_horizontal_flip_op != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({random_vertical_flip_op, random_horizontal_flip_op}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 2; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 10); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { | |||||
| std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1); | |||||
| EXPECT_NE(sampl, nullptr); | |||||
| sampl = PKSampler(3); | |||||
| EXPECT_NE(sampl, nullptr); | |||||
| sampl = RandomSampler(false, 12); | |||||
| EXPECT_NE(sampl, nullptr); | |||||
| sampl = SequentialSampler(0, 12); | |||||
| EXPECT_NE(sampl, nullptr); | |||||
| std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1}; | |||||
| sampl = WeightedRandomSampler(weights, 12); | |||||
| EXPECT_NE(sampl, nullptr); | |||||
| std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23}; | |||||
| sampl = SubsetRandomSampler(indices); | |||||
| EXPECT_NE(sampl, nullptr); | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 2; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_NE(ds, nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_NE(iter, nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 12); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestPad) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> pad_op1 = vision::Pad({1, 2, 3, 4}, {0}, BorderType::kSymmetric); | |||||
| EXPECT_TRUE(pad_op1 != nullptr); | |||||
| std::shared_ptr<TensorOperation> pad_op2 = vision::Pad({1}, {1, 1, 1}, BorderType::kEdge); | |||||
| EXPECT_TRUE(pad_op2 != nullptr); | |||||
| std::shared_ptr<TensorOperation> pad_op3 = vision::Pad({1, 4}); | |||||
| EXPECT_TRUE(pad_op3 != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({pad_op1, pad_op2, pad_op3}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestCutOut) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> cut_out1 = vision::CutOut(30, 5); | |||||
| EXPECT_TRUE(cut_out1!= nullptr); | |||||
| std::shared_ptr<TensorOperation> cut_out2 = vision::CutOut(30); | |||||
| EXPECT_TRUE(cut_out2 != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({cut_out1, cut_out2}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestNormalize) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> normalize = vision::Normalize({121.0, 115.0, 100.0}, {70.0, 68.0, 71.0}); | |||||
| EXPECT_TRUE(normalize != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({normalize}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestDecode) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> decode = vision::Decode(true); | |||||
| EXPECT_TRUE(decode != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({decode}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_EQ(i, 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestShuffleDataset) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Shuffle operation on ds | |||||
| int32_t shuffle_size = 10; | |||||
| ds = ds->Shuffle(shuffle_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 2; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 10); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestCifar10Dataset) { | |||||
| // Create a Cifar10 Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; | |||||
| std::shared_ptr<Dataset> ds = Cifar10(folder_path, 0, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 2; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 10); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestRandomColorAdjust) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> random_color_adjust1 = vision::RandomColorAdjust({1.0}, {0.0}, {0.5}, {0.5}); | |||||
| EXPECT_TRUE(random_color_adjust1 != nullptr); | |||||
| std::shared_ptr<TensorOperation> random_color_adjust2 = vision::RandomColorAdjust({1.0, 1.0}, {0.0, 0.0}, {0.5, 0.5}, | |||||
| {0.5, 0.5}); | |||||
| EXPECT_TRUE(random_color_adjust2 != nullptr); | |||||
| std::shared_ptr<TensorOperation> random_color_adjust3 = vision::RandomColorAdjust({0.5, 1.0}, {0.0, 0.5}, {0.25, 0.5}, | |||||
| {0.25, 0.5}); | |||||
| EXPECT_TRUE(random_color_adjust3 != nullptr); | |||||
| std::shared_ptr<TensorOperation> random_color_adjust4 = vision::RandomColorAdjust(); | |||||
| EXPECT_TRUE(random_color_adjust4 != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({random_color_adjust1, random_color_adjust2, random_color_adjust3, random_color_adjust4}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestRandomRotation) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> random_rotation_op = vision::RandomRotation({-180, 180}); | |||||
| EXPECT_TRUE(random_rotation_op != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({random_rotation_op}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| TEST_F(MindDataTestPipeline, TestProjectMap) { | |||||
| // Create an ImageFolder Dataset | |||||
| std::string folder_path = datasets_root_path_ + "/testPK/data/"; | |||||
| std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 2; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create objects for the tensor ops | |||||
| std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5); | |||||
| EXPECT_TRUE(random_vertical_flip_op != nullptr); | |||||
| // Create a Map operation on ds | |||||
| ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"}); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Project operation on ds | |||||
| std::vector<std::string> column_project = {"label"}; | |||||
| ds = ds->Project(column_project); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create a Batch operation on ds | |||||
| int32_t batch_size = 1; | |||||
| ds = ds->Batch(batch_size); | |||||
| EXPECT_TRUE(ds != nullptr); | |||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| EXPECT_TRUE(iter != nullptr); | |||||
| // Iterate the dataset and get each row | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| MS_LOG(INFO) << "Tensor image shape: " << image->shape(); | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| EXPECT_TRUE(i == 20); | |||||
| // Manually terminate the pipeline | |||||
| iter->Stop(); | |||||
| } | |||||
| @@ -23,8 +23,6 @@ | |||||
| using namespace mindspore::dataset; | using namespace mindspore::dataset; | ||||
| namespace py = pybind11; | |||||
| class MindDataTestDatatype : public UT::Common { | class MindDataTestDatatype : public UT::Common { | ||||
| public: | public: | ||||
| MindDataTestDatatype() = default; | MindDataTestDatatype() = default; | ||||