| @@ -16,7 +16,33 @@ set(MIND_DATA_LIB_DIR ${RUNTIME_PKG_NAME}/minddata/lib) | |||||
| set(LIB_DIR_RUN_X86 ${RUNTIME_PKG_NAME}/lib) | set(LIB_DIR_RUN_X86 ${RUNTIME_PKG_NAME}/lib) | ||||
| if(BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") | |||||
| if(BUILD_MINDDATA STREQUAL "full") | |||||
| install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/liteapi/include/ DESTINATION | |||||
| ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE) | |||||
| install(FILES ${TOP_DIR}/include/api/status.h DESTINATION ${MIND_DATA_INC_DIR} | |||||
| RENAME ms_status.h COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||||
| if(PLATFORM_ARM64) | |||||
| file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so) | |||||
| install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so | |||||
| DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||||
| install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||||
| elseif(PLATFORM_ARM32) | |||||
| file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so) | |||||
| install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION | |||||
| ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||||
| install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||||
| else() | |||||
| install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION | |||||
| ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||||
| install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib | |||||
| RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||||
| install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib | |||||
| RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||||
| endif() | |||||
| endif() | |||||
| if(BUILD_MINDDATA STREQUAL "wrapper") | |||||
| install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} | install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} | ||||
| COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE) | COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE) | ||||
| if(PLATFORM_ARM64) | if(PLATFORM_ARM64) | ||||
| @@ -79,6 +79,7 @@ | |||||
| // IR leaf nodes | // IR leaf nodes | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/album_node.h" | #include "minddata/dataset/engine/ir/datasetops/source/album_node.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" | |||||
| // IR leaf nodes disabled for android | // IR leaf nodes disabled for android | ||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| @@ -89,7 +90,6 @@ | |||||
| #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h" | #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h" | #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" | |||||
| #include "minddata/dataset/engine/ir/datasetops/source/random_node.h" | #include "minddata/dataset/engine/ir/datasetops/source/random_node.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h" | #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h" | #include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h" | ||||
| @@ -390,7 +390,7 @@ std::shared_ptr<MindDataDataset> MindData(const std::vector<std::string> &datase | |||||
| return ds; | return ds; | ||||
| } | } | ||||
| #endif | |||||
| // Function to create a MnistDataset. | // Function to create a MnistDataset. | ||||
| std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage, | std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage, | ||||
| const std::shared_ptr<SamplerObj> &sampler, | const std::shared_ptr<SamplerObj> &sampler, | ||||
| @@ -399,6 +399,8 @@ std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::s | |||||
| return ds; | return ds; | ||||
| } | } | ||||
| #ifndef ENABLE_ANDROID | |||||
| // Function to overload "+" operator to concat two datasets | // Function to overload "+" operator to concat two datasets | ||||
| std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1, | std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1, | ||||
| const std::shared_ptr<Dataset> &datasets2) { | const std::shared_ptr<Dataset> &datasets2) { | ||||
| @@ -906,12 +908,15 @@ MindDataDataset::MindDataDataset(const std::vector<std::string> &dataset_files, | |||||
| auto ds = std::make_shared<MindDataNode>(dataset_files, columns_list, sampler, padded_sample, num_padded); | auto ds = std::make_shared<MindDataNode>(dataset_files, columns_list, sampler, padded_sample, num_padded); | ||||
| ir_node_ = std::static_pointer_cast<DatasetNode>(ds); | ir_node_ = std::static_pointer_cast<DatasetNode>(ds); | ||||
| } | } | ||||
| #endif | |||||
| MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage, | MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage, | ||||
| const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) { | const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) { | ||||
| auto ds = std::make_shared<MnistNode>(dataset_dir, usage, sampler, cache); | auto ds = std::make_shared<MnistNode>(dataset_dir, usage, sampler, cache); | ||||
| ir_node_ = std::static_pointer_cast<DatasetNode>(ds); | ir_node_ = std::static_pointer_cast<DatasetNode>(ds); | ||||
| } | } | ||||
| #ifndef ENABLE_ANDROID | |||||
| TextFileDataset::TextFileDataset(const std::vector<std::string> &dataset_files, int64_t num_samples, | TextFileDataset::TextFileDataset(const std::vector<std::string> &dataset_files, int64_t num_samples, | ||||
| ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, | ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, | ||||
| const std::shared_ptr<DatasetCache> &cache) { | const std::shared_ptr<DatasetCache> &cache) { | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "minddata/dataset/include/samplers.h" | #include "minddata/dataset/include/samplers.h" | ||||
| #include "minddata/dataset/core/config_manager.h" | |||||
| #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h" | #include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h" | ||||
| @@ -32,8 +33,6 @@ | |||||
| #include "minddata/mindrecord/include/shard_sequential_sample.h" | #include "minddata/mindrecord/include/shard_sequential_sample.h" | ||||
| #include "minddata/mindrecord/include/shard_shuffle.h" | #include "minddata/mindrecord/include/shard_shuffle.h" | ||||
| #include "minddata/dataset/util/random.h" | #include "minddata/dataset/util/random.h" | ||||
| #else | |||||
| #include "minddata/dataset/core/config_manager.h" | |||||
| #endif | #endif | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -25,9 +25,9 @@ | |||||
| #include "minddata/dataset/engine/datasetops/device_queue_op.h" | #include "minddata/dataset/engine/datasetops/device_queue_op.h" | ||||
| #include "minddata/dataset/engine/opt/pre/getter_pass.h" | #include "minddata/dataset/engine/opt/pre/getter_pass.h" | ||||
| #include "minddata/dataset/engine/tree_adapter.h" | #include "minddata/dataset/engine/tree_adapter.h" | ||||
| #include "minddata/mindrecord/include/shard_index_generator.h" | |||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| #include "minddata/mindrecord/include/shard_index_generator.h" | |||||
| #include "minddata/mindrecord/include/shard_header.h" | #include "minddata/mindrecord/include/shard_header.h" | ||||
| #include "minddata/mindrecord/include/shard_writer.h" | #include "minddata/mindrecord/include/shard_writer.h" | ||||
| #endif | #endif | ||||
| @@ -324,10 +324,9 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map<std::string, | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row, | |||||
| const std::unordered_map<std::string, int32_t> &column_name_id_map, | |||||
| nlohmann::json *row_raw_data, | |||||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) { | |||||
| static Status ValidateInputParams(nlohmann::json *row_raw_data, | |||||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data, | |||||
| const std::unordered_map<std::string, int32_t> &column_name_id_map) { | |||||
| if (row_raw_data == nullptr) { | if (row_raw_data == nullptr) { | ||||
| RETURN_STATUS_UNEXPECTED("Error: row raw data is NULL."); | RETURN_STATUS_UNEXPECTED("Error: row raw data is NULL."); | ||||
| } | } | ||||
| @@ -337,76 +336,104 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row, | |||||
| if (column_name_id_map.empty()) { | if (column_name_id_map.empty()) { | ||||
| RETURN_STATUS_UNEXPECTED("Error: column not found"); | RETURN_STATUS_UNEXPECTED("Error: column not found"); | ||||
| } | } | ||||
| return Status::OK(); | |||||
| } | |||||
| Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data, | |||||
| std::unique_ptr<std::vector<uint8_t>> *data_ptr) { | |||||
| auto column_type = tensor->type(); | |||||
| Status s; | Status s; | ||||
| if (column_type == DataType::DE_FLOAT32) { | |||||
| std::unique_ptr<float> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_FLOAT64) { | |||||
| std::unique_ptr<double> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data, | |||||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) { | |||||
| auto column_type = tensor->type(); | |||||
| Status s; | |||||
| std::unique_ptr<std::vector<uint8_t>> data_ptr; | |||||
| if (column_type == DataType::DE_INT8) { | |||||
| std::unique_ptr<int32_t> data; | |||||
| std::unique_ptr<int8_t> dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_INT16) { | |||||
| std::unique_ptr<int32_t> data; | |||||
| std::unique_ptr<int16_t> dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_UINT16) { | |||||
| std::unique_ptr<int32_t> data; | |||||
| std::unique_ptr<uint16_t> dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_UINT8) { | |||||
| std::unique_ptr<uint8_t> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_INT32) { | |||||
| std::unique_ptr<int32_t> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_UINT32) { | |||||
| std::unique_ptr<int64_t> data; | |||||
| std::unique_ptr<uint32_t> dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_INT64) { | |||||
| std::unique_ptr<int64_t> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_FLOAT32 || column_type == DataType::DE_FLOAT64) { | |||||
| s = FetchFloatData(tensor, column_name, row_raw_data, &data_ptr); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| } else if (column_type == DataType::DE_STRING) { | |||||
| std::string_view sv; | |||||
| RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor | |||||
| std::string ss(sv); | |||||
| (*row_raw_data)[column_name] = std::move(ss); | |||||
| return Status::OK(); | |||||
| } else { | |||||
| RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data."); | |||||
| } | |||||
| if (data_ptr != nullptr) { | |||||
| (*row_bin_data)[column_name] = std::move(data_ptr); | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row, | |||||
| const std::unordered_map<std::string, int32_t> &column_name_id_map, | |||||
| nlohmann::json *row_raw_data, | |||||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) { | |||||
| Status s; | |||||
| s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map); | |||||
| if (s.IsError()) { | |||||
| return s; | |||||
| } | |||||
| for (auto &col : column_name_id_map) { | for (auto &col : column_name_id_map) { | ||||
| auto idx = col.second; | auto idx = col.second; | ||||
| auto column_name = col.first; | auto column_name = col.first; | ||||
| auto &tensor = row[idx]; | auto &tensor = row[idx]; | ||||
| auto column_type = tensor->type(); | |||||
| std::unique_ptr<std::vector<uint8_t>> data_ptr; | |||||
| if (column_type == DataType::DE_INT8) { | |||||
| std::unique_ptr<int32_t> data; | |||||
| std::unique_ptr<int8_t> dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_INT16) { | |||||
| std::unique_ptr<int32_t> data; | |||||
| std::unique_ptr<int16_t> dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_UINT16) { | |||||
| std::unique_ptr<int32_t> data; | |||||
| std::unique_ptr<uint16_t> dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_UINT8) { | |||||
| std::unique_ptr<uint8_t> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_INT32) { | |||||
| std::unique_ptr<int32_t> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_UINT32) { | |||||
| std::unique_ptr<int64_t> data; | |||||
| std::unique_ptr<uint32_t> dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_INT64) { | |||||
| std::unique_ptr<int64_t> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_FLOAT32) { | |||||
| std::unique_ptr<float> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_FLOAT64) { | |||||
| std::unique_ptr<double> data, dummy; | |||||
| s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); | |||||
| RETURN_IF_NOT_OK(s); | |||||
| if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); | |||||
| } else if (column_type == DataType::DE_STRING) { | |||||
| std::string_view sv; | |||||
| RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor | |||||
| std::string ss(sv); | |||||
| (*row_raw_data)[column_name] = std::move(ss); | |||||
| continue; | |||||
| } else { | |||||
| RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data."); | |||||
| } | |||||
| s = FetchItemData(tensor, column_name, row_raw_data, row_bin_data); | |||||
| RETURN_IF_NOT_OK(s); | RETURN_IF_NOT_OK(s); | ||||
| if (data_ptr != nullptr) { | |||||
| (*row_bin_data)[column_name] = std::move(data_ptr); | |||||
| } | |||||
| } | } | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -130,6 +130,12 @@ class SaveToDisk : public TreeConsumer { | |||||
| nlohmann::json *row_raw_data, | nlohmann::json *row_raw_data, | ||||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data); | std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data); | ||||
| Status FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data, | |||||
| std::unique_ptr<std::vector<uint8_t>> *data_ptr); | |||||
| Status FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data, | |||||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data); | |||||
| std::string dataset_path_; | std::string dataset_path_; | ||||
| int32_t num_files_; | int32_t num_files_; | ||||
| std::string dataset_type_; | std::string dataset_type_; | ||||
| @@ -17,6 +17,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <vector> | #include <vector> | ||||
| #include <utility> | #include <utility> | ||||
| #include <set> | |||||
| #include "minddata/dataset/engine/datasetops/map_op/cpu_map_job.h" | #include "minddata/dataset/engine/datasetops/map_op/cpu_map_job.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -86,8 +86,9 @@ | |||||
| #include "minddata/dataset/engine/datasetops/source/csv_op.h" | #include "minddata/dataset/engine/datasetops/source/csv_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/text_file_op.h" | #include "minddata/dataset/engine/datasetops/source/text_file_op.h" | ||||
| #endif | #endif | ||||
| #include "minddata/dataset/engine/datasetops/source/voc_op.h" | |||||
| #ifdef ENABLE_PYTHON | #ifdef ENABLE_PYTHON | ||||
| #include "minddata/dataset/engine/datasetops/source/voc_op.h" | |||||
| #include "minddata/dataset/engine/datasetops/filter_op.h" | #include "minddata/dataset/engine/datasetops/filter_op.h" | ||||
| #include "minddata/dataset/engine/datasetops/source/generator_op.h" | #include "minddata/dataset/engine/datasetops/source/generator_op.h" | ||||
| #endif | #endif | ||||
| @@ -80,9 +80,10 @@ Status ProfilingManager::Initialize() { | |||||
| std::shared_ptr<Sampling> connector_thr_sampling = std::make_shared<ConnectorThroughput>(tree_); | std::shared_ptr<Sampling> connector_thr_sampling = std::make_shared<ConnectorThroughput>(tree_); | ||||
| RETURN_IF_NOT_OK(RegisterSamplingNode(connector_thr_sampling)); | RETURN_IF_NOT_OK(RegisterSamplingNode(connector_thr_sampling)); | ||||
| #ifndef ENABLE_ANDROID | |||||
| std::shared_ptr<Sampling> cpu_sampling = std::make_shared<CpuSampling>(tree_); | std::shared_ptr<Sampling> cpu_sampling = std::make_shared<CpuSampling>(tree_); | ||||
| RETURN_IF_NOT_OK(RegisterSamplingNode(cpu_sampling)); | RETURN_IF_NOT_OK(RegisterSamplingNode(cpu_sampling)); | ||||
| #endif | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -18,14 +18,16 @@ | |||||
| #include "minddata/dataset/core/client.h" | #include "minddata/dataset/core/client.h" | ||||
| #include "minddata/dataset/engine/ir/datasetops/root_node.h" | #include "minddata/dataset/engine/ir/datasetops/root_node.h" | ||||
| #ifndef ENABLE_ANDROID | |||||
| #include "minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h" | #include "minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h" | ||||
| #include "minddata/dataset/engine/opt/pre/cache_transform_pass.h" | |||||
| #include "minddata/dataset/engine/opt/post/repeat_pass.h" | |||||
| #endif | |||||
| #include "minddata/dataset/engine/opt/pass.h" | #include "minddata/dataset/engine/opt/pass.h" | ||||
| #include "minddata/dataset/engine/opt/post/auto_worker_pass.h" | #include "minddata/dataset/engine/opt/post/auto_worker_pass.h" | ||||
| #include "minddata/dataset/engine/opt/post/repeat_pass.h" | |||||
| #ifdef ENABLE_PYTHON | #ifdef ENABLE_PYTHON | ||||
| #include "minddata/dataset/engine/opt/post/generator_node_pass.h" | #include "minddata/dataset/engine/opt/post/generator_node_pass.h" | ||||
| #endif | #endif | ||||
| #include "minddata/dataset/engine/opt/pre/cache_transform_pass.h" | |||||
| #include "minddata/dataset/engine/opt/pre/cache_validation_pass.h" | #include "minddata/dataset/engine/opt/pre/cache_validation_pass.h" | ||||
| #include "minddata/dataset/engine/opt/pre/deep_copy_pass.h" | #include "minddata/dataset/engine/opt/pre/deep_copy_pass.h" | ||||
| #include "minddata/dataset/engine/opt/pre/epoch_ctrl_pass.h" | #include "minddata/dataset/engine/opt/pre/epoch_ctrl_pass.h" | ||||
| @@ -55,7 +57,9 @@ Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) { | |||||
| actions.emplace_back(std::make_unique<NodeRemovalPass>()); | actions.emplace_back(std::make_unique<NodeRemovalPass>()); | ||||
| actions.emplace_back(std::make_unique<EpochCtrlPass>()); | actions.emplace_back(std::make_unique<EpochCtrlPass>()); | ||||
| if (usage_ == kDeGetter) actions.emplace_back(std::make_unique<GetterPass>()); | if (usage_ == kDeGetter) actions.emplace_back(std::make_unique<GetterPass>()); | ||||
| #ifndef ENABLE_ANDROID | |||||
| actions.emplace_back(std::make_unique<CacheTransformPass>()); | actions.emplace_back(std::make_unique<CacheTransformPass>()); | ||||
| #endif | |||||
| // Vector of flags for each action | // Vector of flags for each action | ||||
| std::vector<bool> modified(actions.size(), false); | std::vector<bool> modified(actions.size(), false); | ||||
| // Apply pre-pass actions | // Apply pre-pass actions | ||||
| @@ -72,7 +76,9 @@ Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) { | |||||
| // Vector of optimizations | // Vector of optimizations | ||||
| std::vector<std::unique_ptr<IRNodePass>> optimizations; | std::vector<std::unique_ptr<IRNodePass>> optimizations; | ||||
| MS_LOG(INFO) << "Running optimization pass loops"; | MS_LOG(INFO) << "Running optimization pass loops"; | ||||
| #ifndef ENABLE_ANDROID | |||||
| optimizations.emplace_back(std::make_unique<TensorOpFusionPass>()); | optimizations.emplace_back(std::make_unique<TensorOpFusionPass>()); | ||||
| #endif | |||||
| // Apply optimization pass actions | // Apply optimization pass actions | ||||
| for (auto i = 0; i < optimizations.size(); i++) { | for (auto i = 0; i < optimizations.size(); i++) { | ||||
| bool modified = false; | bool modified = false; | ||||
| @@ -95,8 +101,9 @@ Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) { | |||||
| #ifdef ENABLE_PYTHON | #ifdef ENABLE_PYTHON | ||||
| actions.emplace_back(std::make_unique<GeneratorNodePass>()); | actions.emplace_back(std::make_unique<GeneratorNodePass>()); | ||||
| #endif | #endif | ||||
| #ifndef ENABLE_ANDROID | |||||
| actions.emplace_back(std::make_unique<RepeatPass>()); | actions.emplace_back(std::make_unique<RepeatPass>()); | ||||
| #endif | |||||
| // We will gradually move RepeatPass from ExecutionTree::PrepareTreePostAction to here. | // We will gradually move RepeatPass from ExecutionTree::PrepareTreePostAction to here. | ||||
| // Vector of flags for each action | // Vector of flags for each action | ||||
| @@ -830,6 +830,7 @@ std::shared_ptr<MindDataDataset> MindData(const std::vector<std::string> &datase | |||||
| const std::vector<std::string> &columns_list = {}, | const std::vector<std::string> &columns_list = {}, | ||||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | ||||
| nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); | nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); | ||||
| #endif | |||||
| class MnistDataset : public Dataset { | class MnistDataset : public Dataset { | ||||
| public: | public: | ||||
| @@ -850,7 +851,7 @@ class MnistDataset : public Dataset { | |||||
| std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage = "all", | std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage = "all", | ||||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | ||||
| const std::shared_ptr<DatasetCache> &cache = nullptr); | const std::shared_ptr<DatasetCache> &cache = nullptr); | ||||
| #ifndef ENABLE_ANDROID | |||||
| /// \brief Function to create a ConcatDataset | /// \brief Function to create a ConcatDataset | ||||
| /// \notes Reload "+" operator to concat two datasets | /// \notes Reload "+" operator to concat two datasets | ||||
| /// \param[in] datasets1 Shared pointer to the first dataset to be concatenated | /// \param[in] datasets1 Shared pointer to the first dataset to be concatenated | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <nlohmann/json.hpp> | |||||
| #include "minddata/dataset/include/status.h" | #include "minddata/dataset/include/status.h" | ||||
| #ifndef ENABLE_ANDROID | #ifndef ENABLE_ANDROID | ||||
| @@ -0,0 +1,190 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_ | |||||
| #include <cstdlib> | |||||
| #include <functional> | |||||
| #include <memory> | |||||
| #include <type_traits> | |||||
| #include <utility> | |||||
| #include "include/memory_pool.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // The following conforms to the requirements of | |||||
| // std::allocator. Do not rename/change any needed | |||||
| // requirements, e.g. function names, typedef etc. | |||||
| template <typename T> | |||||
| class Allocator { | |||||
| public: | |||||
| template <typename U> | |||||
| friend class Allocator; | |||||
| using value_type = T; | |||||
| using pointer = T *; | |||||
| using const_pointer = const T *; | |||||
| using reference = T &; | |||||
| using const_reference = const T &; | |||||
| using size_type = uint64_t; | |||||
| using difference_type = std::ptrdiff_t; | |||||
| template <typename U> | |||||
| struct rebind { | |||||
| using other = Allocator<U>; | |||||
| }; | |||||
| using propagate_on_container_copy_assignment = std::true_type; | |||||
| using propagate_on_container_move_assignment = std::true_type; | |||||
| using propagate_on_container_swap = std::true_type; | |||||
| explicit Allocator(const std::shared_ptr<MemoryPool> &b) : pool_(b) {} | |||||
| ~Allocator() = default; | |||||
| template <typename U> | |||||
| explicit Allocator(Allocator<U> const &rhs) : pool_(rhs.pool_) {} | |||||
| template <typename U> | |||||
| bool operator==(Allocator<U> const &rhs) const { | |||||
| return pool_ == rhs.pool_; | |||||
| } | |||||
| template <typename U> | |||||
| bool operator!=(Allocator<U> const &rhs) const { | |||||
| return pool_ != rhs.pool_; | |||||
| } | |||||
| pointer allocate(std::size_t n) { | |||||
| void *p = nullptr; | |||||
| Status rc = pool_->Allocate(n * sizeof(T), &p); | |||||
| if (rc.IsOk()) { | |||||
| return reinterpret_cast<pointer>(p); | |||||
| } else if (rc == StatusCode::kMDOutOfMemory) { | |||||
| throw std::bad_alloc(); | |||||
| } else { | |||||
| throw std::exception(); | |||||
| } | |||||
| } | |||||
| void deallocate(pointer p, std::size_t n = 0) noexcept { pool_->Deallocate(p); } | |||||
| size_type max_size() { return pool_->get_max_size(); } | |||||
| private: | |||||
| std::shared_ptr<MemoryPool> pool_; | |||||
| }; | |||||
| /// \brief It is a wrapper of unique_ptr with a custom Allocator class defined above | |||||
| template <typename T, typename C = std::allocator<T>, typename... Args> | |||||
| Status MakeUnique(std::unique_ptr<T[], std::function<void(T *)>> *out, C alloc, size_t n, Args &&... args) { | |||||
| RETURN_UNEXPECTED_IF_NULL(out); | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "size must be positive"); | |||||
| try { | |||||
| T *data = alloc.allocate(n); | |||||
| // Some of our implementation of allocator (e.g. NumaAllocator) don't throw std::bad_alloc. | |||||
| // So we have to catch for null ptr | |||||
| if (data == nullptr) { | |||||
| return Status(StatusCode::kMDOutOfMemory); | |||||
| } | |||||
| if (!std::is_arithmetic<T>::value) { | |||||
| for (auto i = 0; i < n; i++) { | |||||
| std::allocator_traits<C>::construct(alloc, &(data[i]), std::forward<Args>(args)...); | |||||
| } | |||||
| } | |||||
| auto deleter = [](T *p, C f_alloc, size_t f_n) { | |||||
| if (!std::is_arithmetic<T>::value && std::is_destructible<T>::value) { | |||||
| for (auto i = 0; i < f_n; ++i) { | |||||
| std::allocator_traits<C>::destroy(f_alloc, &p[i]); | |||||
| } | |||||
| } | |||||
| f_alloc.deallocate(p, f_n); | |||||
| }; | |||||
| *out = std::unique_ptr<T[], std::function<void(T *)>>(data, std::bind(deleter, std::placeholders::_1, alloc, n)); | |||||
| } catch (const std::bad_alloc &e) { | |||||
| return Status(StatusCode::kMDOutOfMemory); | |||||
| } catch (const std::exception &e) { | |||||
| RETURN_STATUS_UNEXPECTED(e.what()); | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| /// \brief It is a wrapper of the above custom unique_ptr with some additional methods | |||||
| /// \tparam T The type of object to be allocated | |||||
| /// \tparam C Allocator. Default to std::allocator | |||||
| template <typename T, typename C = std::allocator<T>> | |||||
| class MemGuard { | |||||
| public: | |||||
| using allocator = C; | |||||
| MemGuard() : n_(0) {} | |||||
| explicit MemGuard(allocator a) : n_(0), alloc_(a) {} | |||||
| // There is no copy constructor nor assignment operator because the memory is solely owned by this object. | |||||
| MemGuard(const MemGuard &) = delete; | |||||
| MemGuard &operator=(const MemGuard &) = delete; | |||||
| // On the other hand, We can support move constructor | |||||
| MemGuard(MemGuard &&lhs) noexcept : n_(lhs.n_), alloc_(std::move(lhs.alloc_)), ptr_(std::move(lhs.ptr_)) {} | |||||
| MemGuard &operator=(MemGuard &&lhs) noexcept { | |||||
| if (this != &lhs) { | |||||
| this->deallocate(); | |||||
| n_ = lhs.n_; | |||||
| alloc_ = std::move(lhs.alloc_); | |||||
| ptr_ = std::move(lhs.ptr_); | |||||
| } | |||||
| return *this; | |||||
| } | |||||
| /// \brief Explicitly deallocate the memory if allocated | |||||
| void deallocate() { | |||||
| if (ptr_) { | |||||
| ptr_.reset(); | |||||
| } | |||||
| } | |||||
| /// \brief Allocate memory (with emplace feature). Previous one will be released. If size is 0, no new memory is | |||||
| /// allocated. | |||||
| /// \param n Number of objects of type T to be allocated | |||||
| /// \tparam Args Extra arguments pass to the constructor of T | |||||
| template <typename... Args> | |||||
| Status allocate(size_t n, Args &&... args) noexcept { | |||||
| deallocate(); | |||||
| n_ = n; | |||||
| return MakeUnique(&ptr_, alloc_, n, std::forward<Args>(args)...); | |||||
| } | |||||
| ~MemGuard() noexcept { deallocate(); } | |||||
| /// \brief Getter function | |||||
| /// \return The pointer to the memory allocated | |||||
| T *GetPointer() const { return ptr_.get(); } | |||||
| /// \brief Getter function | |||||
| /// \return The pointer to the memory allocated | |||||
| T *GetMutablePointer() { return ptr_.get(); } | |||||
| /// \brief Overload [] operator to access a particular element | |||||
| /// \param x index to the element. Must be less than number of element allocated. | |||||
| /// \return pointer to the x-th element | |||||
| T *operator[](size_t x) { return GetMutablePointer() + x; } | |||||
| /// \brief Overload [] operator to access a particular element | |||||
| /// \param x index to the element. Must be less than number of element allocated. | |||||
| /// \return pointer to the x-th element | |||||
| T *operator[](size_t x) const { return GetPointer() + x; } | |||||
| /// \brief Return how many bytes are allocated in total | |||||
| /// \return Number of bytes allocated in total | |||||
| size_t GetSizeInBytes() const { return n_ * sizeof(T); } | |||||
| private: | |||||
| size_t n_; | |||||
| allocator alloc_; | |||||
| std::unique_ptr<T[], std::function<void(T *)>> ptr_; | |||||
| }; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_ | |||||
| @@ -0,0 +1,109 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ | |||||
| #include <cstdint> | |||||
| #include <limits> | |||||
| #include <random> | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Various type defines for convenience | |||||
| using uchar = unsigned char; | |||||
| using dsize_t = int64_t; | |||||
| // Target devices to perform map operation | |||||
| enum class MapTargetDevice { kCpu, kGpu, kDvpp }; | |||||
| // Possible dataset types for holding the data and client type | |||||
| enum class DatasetType { kUnknown, kArrow, kTf }; | |||||
| // Possible flavours of Tensor implementations | |||||
| enum class TensorImpl { kNone, kFlexible, kCv, kNP }; | |||||
| // Possible values for shuffle | |||||
| enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2 }; | |||||
| // Possible values for Border types | |||||
| enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; | |||||
| // Possible values for Image format types in a batch | |||||
| enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 }; | |||||
| // Possible values for Image format types | |||||
| enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 }; | |||||
| // Possible interpolation modes | |||||
| enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; | |||||
| // Possible JiebaMode modes | |||||
| enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 }; | |||||
| // Possible values for SPieceTokenizerOutType | |||||
| enum class SPieceTokenizerOutType { kString = 0, kInt = 1 }; | |||||
| // Possible values for SPieceTokenizerLoadType | |||||
| enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 }; | |||||
| // Possible values for SentencePieceModel | |||||
| enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 }; | |||||
| // Possible values for NormalizeForm | |||||
| enum class NormalizeForm { | |||||
| kNone = 0, | |||||
| kNfc, | |||||
| kNfkc, | |||||
| kNfd, | |||||
| kNfkd, | |||||
| }; | |||||
| // convenience functions for 32bit int bitmask | |||||
| inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; } | |||||
| inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; } | |||||
| inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); } | |||||
| constexpr int32_t kDeMaxDim = std::numeric_limits<int32_t>::max(); // 2147483647 or 2^32 -1 | |||||
| constexpr int32_t kDeMaxRank = std::numeric_limits<int32_t>::max(); | |||||
| constexpr int64_t kDeMaxFreq = std::numeric_limits<int64_t>::max(); // 9223372036854775807 or 2^(64-1) | |||||
| constexpr int64_t kDeMaxTopk = std::numeric_limits<int64_t>::max(); | |||||
| constexpr uint32_t kCfgRowsPerBuffer = 1; | |||||
| constexpr uint32_t kCfgParallelWorkers = 4; | |||||
| constexpr uint32_t kCfgWorkerConnectorSize = 16; | |||||
| constexpr uint32_t kCfgOpConnectorSize = 16; | |||||
| constexpr int32_t kCfgDefaultRankId = -1; | |||||
| constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed; | |||||
| constexpr uint32_t kCfgMonitorSamplingInterval = 10; | |||||
| constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds | |||||
| constexpr int32_t kCfgDefaultCachePort = 50052; | |||||
| constexpr char kCfgDefaultCacheHost[] = "127.0.0.1"; | |||||
| constexpr int32_t kDftPrefetchSize = 20; | |||||
| constexpr int32_t kDftNumConnections = 12; | |||||
| constexpr int32_t kDftAutoNumWorkers = false; | |||||
| // Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h) | |||||
| constexpr uint8_t kCVInvalidType = 255; | |||||
| using connection_id_type = uint64_t; | |||||
| using session_id_type = uint32_t; | |||||
| using row_id_type = int64_t; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ | |||||
| @@ -0,0 +1,291 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_ | |||||
| #include <string> | |||||
| #include "include/constants.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Class that represents basic data types in DataEngine. | |||||
| class DataType { | |||||
| public: | |||||
| enum Type : uint8_t { | |||||
| DE_UNKNOWN = 0, | |||||
| DE_BOOL, | |||||
| DE_INT8, | |||||
| DE_UINT8, | |||||
| DE_INT16, | |||||
| DE_UINT16, | |||||
| DE_INT32, | |||||
| DE_UINT32, | |||||
| DE_INT64, | |||||
| DE_UINT64, | |||||
| DE_FLOAT16, | |||||
| DE_FLOAT32, | |||||
| DE_FLOAT64, | |||||
| DE_STRING, | |||||
| NUM_OF_TYPES | |||||
| }; | |||||
| struct TypeInfo { | |||||
| const char *name_; // name to be represent the type while printing | |||||
| const uint8_t sizeInBytes_; // number of bytes needed for this type | |||||
| const char *pybindType_; // Python matching type, used in get_output_types | |||||
| const std::string pybindFormatDescriptor_; // pybind format used for numpy types | |||||
| const uint8_t cvType_; // OpenCv matching type | |||||
| }; | |||||
| // android and no python | |||||
| static inline const TypeInfo kTypeInfo[] = { | |||||
| // name, sizeInBytes, formatDescriptor | |||||
| {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN | |||||
| {"bool", 1, "bool", ""}, // DE_BOOL | |||||
| {"int8", 1, "int8", ""}, // DE_INT8 | |||||
| {"uint8", 1, "uint8", ""}, // DE_UINT8 | |||||
| {"int16", 2, "int16", ""}, // DE_INT16 | |||||
| {"uint16", 2, "uint16", ""}, // DE_UINT16 | |||||
| {"int32", 4, "int32", ""}, // DE_INT32 | |||||
| {"uint32", 4, "uint32", "", kCVInvalidType}, // DE_UINT32 | |||||
| {"int64", 8, "int64", "", kCVInvalidType}, // DE_INT64 | |||||
| {"uint64", 8, "uint64", "", kCVInvalidType}, // DE_UINT64 | |||||
| {"float16", 2, "float16", ""}, // DE_FLOAT16 | |||||
| {"float32", 4, "float32", ""}, // DE_FLOAT32 | |||||
| {"float64", 8, "double", ""}, // DE_FLOAT64 | |||||
| {"string", 0, "bytes", "", kCVInvalidType} // DE_STRING | |||||
| }; | |||||
| // No arg constructor to create an unknown shape | |||||
| DataType() : type_(DE_UNKNOWN) {} | |||||
| // Create a type from a given string | |||||
| /// \param type_str | |||||
| explicit DataType(const std::string &type_str); | |||||
| // Default destructor | |||||
| ~DataType() = default; | |||||
| // Create a type from a given enum | |||||
| /// \param d | |||||
| constexpr explicit DataType(Type d) : type_(d) {} | |||||
| constexpr bool operator==(const DataType a) const { return type_ == a.type_; } | |||||
| constexpr bool operator==(const Type a) const { return type_ == a; } | |||||
| constexpr bool operator!=(const DataType a) const { return type_ != a.type_; } | |||||
| constexpr bool operator!=(const Type a) const { return type_ != a; } | |||||
| // Disable this usage `if(d)` where d is of type DataType | |||||
| /// \return | |||||
| operator bool() = delete; | |||||
| // To be used in Switch/case | |||||
| /// \return | |||||
| operator Type() const { return type_; } | |||||
| // The number of bytes needed to store one value of this type | |||||
| /// \return | |||||
| uint8_t SizeInBytes() const; | |||||
| // Returns a string representation of the type | |||||
| /// \return | |||||
| std::string ToString() const; | |||||
| // returns true if the template type is the same as the Tensor type_ | |||||
| /// \tparam T | |||||
| /// \return true or false | |||||
| template <typename T> | |||||
| bool IsCompatible() const { | |||||
| return type_ == FromCType<T>(); | |||||
| } | |||||
| // returns true if the template type is the same as the Tensor type_ | |||||
| /// \tparam T | |||||
| /// \return true or false | |||||
| template <typename T> | |||||
| bool IsLooselyCompatible() const; | |||||
| // << Stream output operator overload | |||||
| /// \notes This allows you to print the info using stream operators | |||||
| /// \param out - reference to the output stream being overloaded | |||||
| /// \param rO - reference to the DataType to display | |||||
| /// \return - the output stream must be returned | |||||
| friend std::ostream &operator<<(std::ostream &out, const DataType &so) { | |||||
| out << so.ToString(); | |||||
| return out; | |||||
| } | |||||
| template <typename T> | |||||
| static DataType FromCType(); | |||||
| // Get the buffer string format of the current type. Used in pybind buffer protocol. | |||||
| /// \return | |||||
| std::string GetPybindFormat() const; | |||||
| bool IsSignedInt() const { | |||||
| return type_ == DataType::DE_INT8 || type_ == DataType::DE_INT16 || type_ == DataType::DE_INT32 || | |||||
| type_ == DataType::DE_INT64; | |||||
| } | |||||
| bool IsUnsignedInt() const { | |||||
| return type_ == DataType::DE_UINT8 || type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT32 || | |||||
| type_ == DataType::DE_UINT64; | |||||
| } | |||||
| bool IsInt() const { return IsSignedInt() || IsUnsignedInt(); } | |||||
| bool IsFloat() const { | |||||
| return type_ == DataType::DE_FLOAT16 || type_ == DataType::DE_FLOAT32 || type_ == DataType::DE_FLOAT64; | |||||
| } | |||||
| bool IsBool() const { return type_ == DataType::DE_BOOL; } | |||||
| bool IsNumeric() const { return type_ != DataType::DE_STRING; } | |||||
| Type value() const { return type_; } | |||||
| private: | |||||
| Type type_; | |||||
| }; | |||||
| template <> | |||||
| inline DataType DataType::FromCType<bool>() { | |||||
| return DataType(DataType::DE_BOOL); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<double>() { | |||||
| return DataType(DataType::DE_FLOAT64); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<float>() { | |||||
| return DataType(DataType::DE_FLOAT32); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<int64_t>() { | |||||
| return DataType(DataType::DE_INT64); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<uint64_t>() { | |||||
| return DataType(DataType::DE_UINT64); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<int32_t>() { | |||||
| return DataType(DataType::DE_INT32); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<uint32_t>() { | |||||
| return DataType(DataType::DE_UINT32); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<int16_t>() { | |||||
| return DataType(DataType::DE_INT16); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<uint16_t>() { | |||||
| return DataType(DataType::DE_UINT16); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<int8_t>() { | |||||
| return DataType(DataType::DE_INT8); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<uint8_t>() { | |||||
| return DataType(DataType::DE_UINT8); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<std::string_view>() { | |||||
| return DataType(DataType::DE_STRING); | |||||
| } | |||||
| template <> | |||||
| inline DataType DataType::FromCType<std::string>() { | |||||
| return DataType(DataType::DE_STRING); | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<bool>() const { | |||||
| return type_ == DataType::DE_BOOL; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<double>() const { | |||||
| return type_ == DataType::DE_FLOAT64 || type_ == DataType::DE_FLOAT32; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<float>() const { | |||||
| return type_ == DataType::DE_FLOAT32; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<int64_t>() const { | |||||
| return type_ == DataType::DE_INT64 || type_ == DataType::DE_INT32 || type_ == DataType::DE_INT16 || | |||||
| type_ == DataType::DE_INT8; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<uint64_t>() const { | |||||
| return type_ == DataType::DE_UINT64 || type_ == DataType::DE_UINT32 || type_ == DataType::DE_UINT16 || | |||||
| type_ == DataType::DE_UINT8; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<int32_t>() const { | |||||
| return type_ == DataType::DE_INT32 || type_ == DataType::DE_INT16 || type_ == DataType::DE_INT8; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<uint32_t>() const { | |||||
| return type_ == DataType::DE_UINT32 || type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT8; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<int16_t>() const { | |||||
| return type_ == DataType::DE_INT16 || type_ == DataType::DE_INT8; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<uint16_t>() const { | |||||
| return type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT8; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<int8_t>() const { | |||||
| return type_ == DataType::DE_INT8; | |||||
| } | |||||
| template <> | |||||
| inline bool DataType::IsLooselyCompatible<uint8_t>() const { | |||||
| return type_ == DataType::DE_UINT8; | |||||
| } | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_ | |||||
| @@ -0,0 +1,254 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_ | |||||
| #include <sys/stat.h> | |||||
| #include <unistd.h> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "include/iterator.h" | |||||
| #include "include/samplers.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| class Tensor; | |||||
| class TensorShape; | |||||
| class TreeGetters; | |||||
| class DatasetCache; | |||||
| class DatasetNode; | |||||
| class Iterator; | |||||
| class TensorOperation; | |||||
| class SchemaObj; | |||||
| class SamplerObj; | |||||
| // Dataset classes (in alphabetical order) | |||||
| class BatchDataset; | |||||
| class MapDataset; | |||||
| class ProjectDataset; | |||||
| class ShuffleDataset; | |||||
| class DSCallback; | |||||
| /// \class Dataset datasets.h | |||||
| /// \brief A base class to represent a dataset in the data pipeline. | |||||
| class Dataset : public std::enable_shared_from_this<Dataset> { | |||||
| public: | |||||
| // need friend class so they can access the children_ field | |||||
| friend class Iterator; | |||||
| friend class TransferNode; | |||||
| /// \brief Constructor | |||||
| Dataset(); | |||||
| /// \brief Destructor | |||||
| ~Dataset() = default; | |||||
| /// \brief Gets the dataset size | |||||
| /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting | |||||
| /// dataset size at the expense of accuracy. | |||||
| /// \return dataset size. If failed, return -1 | |||||
| int64_t GetDatasetSize(bool estimate = false); | |||||
| // /// \brief Gets the output type | |||||
| // /// \return a vector of DataType. If failed, return an empty vector | |||||
| // std::vector<DataType> GetOutputTypes(); | |||||
| /// \brief Gets the output shape | |||||
| /// \return a vector of TensorShape. If failed, return an empty vector | |||||
| std::vector<TensorShape> GetOutputShapes(); | |||||
| /// \brief Gets the batch size | |||||
| /// \return int64_t | |||||
| int64_t GetBatchSize(); | |||||
| /// \brief Gets the repeat count | |||||
| /// \return int64_t | |||||
| int64_t GetRepeatCount(); | |||||
| /// \brief Gets the number of classes | |||||
| /// \return number of classes. If failed, return -1 | |||||
| int64_t GetNumClasses(); | |||||
| /// \brief Gets the column names | |||||
| /// \return Names of the columns. If failed, return an empty vector | |||||
| std::vector<std::string> GetColumnNames(); | |||||
| /// \brief Gets the class indexing | |||||
| /// \return a map of ClassIndexing. If failed, return an empty map | |||||
| std::vector<std::pair<std::string, std::vector<int32_t>>> GetClassIndexing(); | |||||
| /// \brief Setter function for runtime number of workers | |||||
| /// \param[in] num_workers The number of threads in this operator | |||||
| /// \return Shared pointer to the original object | |||||
| std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers); | |||||
| /// \brief Function to create an Iterator over the Dataset pipeline | |||||
| /// \param[in] columns List of columns to be used to specify the order of columns | |||||
| /// \param[in] num_epochs Number of epochs to run through the pipeline, default -1 which means infinite epochs. | |||||
| /// An empty row is returned at the end of each epoch | |||||
| /// \return Shared pointer to the Iterator | |||||
| std::shared_ptr<Iterator> CreateIterator(std::vector<std::string> columns = {}, int32_t num_epochs = -1); | |||||
| /// \brief Function to create a BatchDataset | |||||
| /// \notes Combines batch_size number of consecutive rows into batches | |||||
| /// \param[in] batch_size The number of rows each batch is created with | |||||
| /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete | |||||
| /// batch. If true, and if there are less than batch_size rows | |||||
| /// available to make the last batch, then those rows will | |||||
| /// be dropped and not propagated to the next node | |||||
| /// \return Shared pointer to the current BatchDataset | |||||
| std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false); | |||||
| /// \brief Function to create a MapDataset | |||||
| /// \notes Applies each operation in operations to this dataset | |||||
| /// \param[in] operations Vector of operations to be applied on the dataset. Operations are | |||||
| /// applied in the order they appear in this list | |||||
| /// \param[in] input_columns Vector of the names of the columns that will be passed to the first | |||||
| /// operation as input. The size of this list must match the number of | |||||
| /// input columns expected by the first operator. The default input_columns | |||||
| /// is the first column | |||||
| /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation | |||||
| /// This parameter is mandatory if len(input_columns) != len(output_columns) | |||||
| /// The size of this list must match the number of output columns of the | |||||
| /// last operation. The default output_columns will have the same | |||||
| /// name as the input columns, i.e., the columns will be replaced | |||||
| /// \param[in] project_columns A list of column names to project | |||||
| /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). | |||||
| /// \return Shared pointer to the current MapDataset | |||||
| std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations, | |||||
| const std::vector<std::string> &input_columns = {}, | |||||
| const std::vector<std::string> &output_columns = {}, | |||||
| const std::vector<std::string> &project_columns = {}, | |||||
| const std::shared_ptr<DatasetCache> &cache = nullptr, | |||||
| std::vector<std::shared_ptr<DSCallback>> callbacks = {}) { | |||||
| return std::make_shared<MapDataset>(shared_from_this(), operations, input_columns, output_columns, project_columns, | |||||
| cache, callbacks); | |||||
| } | |||||
| /// \brief Function to create a Project Dataset | |||||
| /// \notes Applies project to the dataset | |||||
| /// \param[in] columns The name of columns to project | |||||
| /// \return Shared pointer to the current Dataset | |||||
| std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns) { | |||||
| return std::make_shared<ProjectDataset>(shared_from_this(), columns); | |||||
| } | |||||
| /// \brief Function to create a Shuffle Dataset | |||||
| /// \notes Randomly shuffles the rows of this dataset | |||||
| /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling | |||||
| /// \return Shared pointer to the current ShuffleDataset | |||||
| std::shared_ptr<ShuffleDataset> Shuffle(int32_t buffer_size) { | |||||
| return std::make_shared<ShuffleDataset>(shared_from_this(), buffer_size); | |||||
| } | |||||
| std::shared_ptr<DatasetNode> IRNode() { return ir_node_; } | |||||
| protected: | |||||
| std::shared_ptr<TreeGetters> tree_getters_; | |||||
| std::shared_ptr<DatasetNode> ir_node_; | |||||
| }; | |||||
| class BatchDataset : public Dataset { | |||||
| public: | |||||
| BatchDataset(std::shared_ptr<Dataset> input, int32_t batch_size, bool drop_remainder = false); | |||||
| ~BatchDataset() = default; | |||||
| }; | |||||
| class MapDataset : public Dataset { | |||||
| public: | |||||
| MapDataset(std::shared_ptr<Dataset> input, std::vector<std::shared_ptr<TensorOperation>> operations, | |||||
| const std::vector<std::string> &input_columns, const std::vector<std::string> &output_columns, | |||||
| const std::vector<std::string> &project_columns, const std::shared_ptr<DatasetCache> &cache, | |||||
| std::vector<std::shared_ptr<DSCallback>> callbacks); | |||||
| ~MapDataset() = default; | |||||
| }; | |||||
| class ProjectDataset : public Dataset { | |||||
| public: | |||||
| ProjectDataset(std::shared_ptr<Dataset> input, const std::vector<std::string> &columns); | |||||
| ~ProjectDataset() = default; | |||||
| }; | |||||
| class ShuffleDataset : public Dataset { | |||||
| public: | |||||
| ShuffleDataset(std::shared_ptr<Dataset> input, int32_t buffer_size); | |||||
| ~ShuffleDataset() = default; | |||||
| }; | |||||
| /// \brief Function to create a SchemaObj | |||||
| /// \param[in] schema_file Path of schema file | |||||
| /// \return Shared pointer to the current schema | |||||
| std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = ""); | |||||
| class AlbumDataset : public Dataset { | |||||
| public: | |||||
| AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, | |||||
| const std::vector<std::string> &column_names = {}, bool decode = false, | |||||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | |||||
| const std::shared_ptr<DatasetCache> &cache = nullptr); | |||||
| ~AlbumDataset() = default; | |||||
| }; | |||||
| /// \brief Function to create an AlbumDataset | |||||
| /// \notes The generated dataset is specified through setting a schema | |||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||||
| /// \param[in] data_schema Path to dataset schema file | |||||
| /// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns. | |||||
| /// (default = {}) | |||||
| /// \param[in] decode the option to decode the images in dataset (default = false) | |||||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||||
| /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). | |||||
| /// \return Shared pointer to the current Dataset | |||||
| std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema, | |||||
| const std::vector<std::string> &column_names = {}, bool decode = false, | |||||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | |||||
| const std::shared_ptr<DatasetCache> &cache = nullptr); | |||||
| class MnistDataset : public Dataset { | |||||
| public: | |||||
| explicit MnistDataset(const std::string &dataset_dir, const std::string &usage = "all", | |||||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | |||||
| const std::shared_ptr<DatasetCache> &cache = nullptr); | |||||
| ~MnistDataset() = default; | |||||
| }; | |||||
| /// \brief Function to create a MnistDataset | |||||
| /// \notes The generated dataset has two columns ["image", "label"] | |||||
| /// \param[in] dataset_dir Path to the root directory that contains the dataset | |||||
| /// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all"). | |||||
| /// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, | |||||
| /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) | |||||
| /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). | |||||
| /// \return Shared pointer to the current MnistDataset | |||||
| std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage = "all", | |||||
| const std::shared_ptr<SamplerObj> &sampler = RandomSampler(), | |||||
| const std::shared_ptr<DatasetCache> &cache = nullptr); | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_ | |||||
| @@ -0,0 +1,58 @@ | |||||
| /** | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "include/api/types.h" | |||||
| #include "include/constants.h" | |||||
| #include "dataset/include/transforms.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // class to run tensor operations in eager mode | |||||
| class Execute { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| explicit Execute(std::shared_ptr<TensorOperation> op); | |||||
| explicit Execute(std::vector<std::shared_ptr<TensorOperation>> ops); | |||||
| /// \brief Destructor | |||||
| ~Execute() = default; | |||||
| /// \brief callable function to execute the TensorOperation in eager mode | |||||
| /// \param[in] input Tensor to be transformed | |||||
| /// \param[out] output Transformed tensor | |||||
| /// \return Status code | |||||
| Status operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output); | |||||
| /// \brief callable function to execute the TensorOperation in eager mode | |||||
| /// \param[in] input_tensor_list List of Tensor to be transformed | |||||
| /// \param[out] out Result tensor after transform | |||||
| /// \return - Status | |||||
| Status operator()(const std::vector<mindspore::MSTensor> &input_tensor_list, std::vector<mindspore::MSTensor> *out); | |||||
| private: | |||||
| std::vector<std::shared_ptr<TensorOperation>> ops_; | |||||
| }; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_ | |||||
| @@ -0,0 +1,120 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include "include/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Forward declare | |||||
| class ExecutionTree; | |||||
| class DatasetIterator; | |||||
| class DatasetOp; | |||||
| class Tensor; | |||||
| class NativeRuntimeContext; | |||||
| class IteratorConsumer; | |||||
| class Dataset; | |||||
| using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>; | |||||
| using TensorVec = std::vector<std::shared_ptr<Tensor>>; | |||||
| // Abstract class for iterating over the dataset. | |||||
| class Iterator { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| Iterator(); | |||||
| /// \brief Destructor | |||||
| ~Iterator(); | |||||
| /// \brief Method for building and launching the pipeline. | |||||
| /// \param[in] ops - a vector of DatasetOp in the data pipeline. | |||||
| /// \return - a Status error code, returns OK if no error encountered. | |||||
| Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds); | |||||
| /// \brief Function to get the next row from the data pipeline. | |||||
| /// \note Type of return data is a map(with column name). | |||||
| /// \param[out] row - the output tensor row. | |||||
| /// \return Returns true if no error encountered else false. | |||||
| bool GetNextRow(TensorMap *row); | |||||
| /// \brief Function to get the next row from the data pipeline. | |||||
| /// \note Type of return data is a vector(without column name). | |||||
| /// \param[out] row - the output tensor row. | |||||
| /// \return Returns true if no error encountered else false. | |||||
| bool GetNextRow(TensorVec *row); | |||||
| /// \brief Function to shut down the data pipeline. | |||||
| void Stop(); | |||||
| class _Iterator { | |||||
| public: | |||||
| explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} { | |||||
| if (lt_) { | |||||
| cur_row_ = new TensorMap(); | |||||
| lt_->GetNextRow(cur_row_); | |||||
| } | |||||
| } | |||||
| // Destructor | |||||
| ~_Iterator() { | |||||
| if (cur_row_) { | |||||
| delete cur_row_; | |||||
| } | |||||
| } | |||||
| _Iterator &operator++() { | |||||
| if (lt_) { | |||||
| ++ind_; | |||||
| lt_->GetNextRow(cur_row_); | |||||
| } | |||||
| if (cur_row_ && cur_row_->size() == 0) { | |||||
| delete cur_row_; | |||||
| cur_row_ = nullptr; | |||||
| } | |||||
| return *this; | |||||
| } // prefix ++ overload | |||||
| TensorMap &operator*() { return *cur_row_; } // dereference operator | |||||
| TensorMap *operator->() { return cur_row_; } | |||||
| bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; } | |||||
| private: | |||||
| int ind_; // the cur node our Iterator points to | |||||
| Iterator *lt_; | |||||
| TensorMap *cur_row_; | |||||
| }; | |||||
| _Iterator begin() { return _Iterator(this); } | |||||
| _Iterator end() { return _Iterator(nullptr); } | |||||
| private: | |||||
| std::unique_ptr<NativeRuntimeContext> runtime_context_; | |||||
| IteratorConsumer *consumer_; | |||||
| }; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ | |||||
| @@ -0,0 +1,59 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_ | |||||
| #include <cstddef> | |||||
| #include <cstdint> | |||||
| #include <memory> | |||||
| #include "include/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Abstract class of a memory pool | |||||
| class MemoryPool { | |||||
| public: | |||||
| // Allocate a block of size n | |||||
| virtual Status Allocate(size_t, void **) = 0; | |||||
| // Enlarge or shrink a block from oldSz to newSz | |||||
| virtual Status Reallocate(void **, size_t old_sz, size_t new_sz) = 0; | |||||
| // Free a pointer | |||||
| virtual void Deallocate(void *) = 0; | |||||
| // What is the maximum size I can allocate ? | |||||
| virtual uint64_t get_max_size() const = 0; | |||||
| virtual int PercentFree() const = 0; | |||||
| // Destructor | |||||
| virtual ~MemoryPool() {} | |||||
| }; | |||||
| Status DeMalloc(std::size_t s, void **p, bool); | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| void *operator new(std::size_t, mindspore::Status *, std::shared_ptr<mindspore::dataset::MemoryPool>); | |||||
| void *operator new[](std::size_t, mindspore::Status *, std::shared_ptr<mindspore::dataset::MemoryPool>); | |||||
| void operator delete(void *, std::shared_ptr<mindspore::dataset::MemoryPool>); | |||||
| void operator delete[](void *, std::shared_ptr<mindspore::dataset::MemoryPool>); | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_ | |||||
| @@ -0,0 +1,126 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_ | |||||
| #include <dirent.h> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "include/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| class Path { | |||||
| public: | |||||
| class DirIterator { | |||||
| public: | |||||
| static std::shared_ptr<DirIterator> OpenDirectory(Path *f); | |||||
| ~DirIterator(); | |||||
| bool hasNext(); | |||||
| Path next(); | |||||
| private: | |||||
| explicit DirIterator(Path *f); | |||||
| Path *dir_; | |||||
| DIR *dp_; | |||||
| struct dirent *entry_; | |||||
| }; | |||||
| explicit Path(const std::string &); | |||||
| explicit Path(const char *); | |||||
| ~Path() = default; | |||||
| Path(const Path &); | |||||
| Path &operator=(const Path &); | |||||
| Path(Path &&) noexcept; | |||||
| Path &operator=(Path &&) noexcept; | |||||
| std::string toString() const { return path_; } | |||||
| Path operator+(const Path &); | |||||
| Path operator+(const std::string &); | |||||
| Path operator+(const char *); | |||||
| Path &operator+=(const Path &rhs); | |||||
| Path &operator+=(const std::string &); | |||||
| Path &operator+=(const char *); | |||||
| Path operator/(const Path &); | |||||
| Path operator/(const std::string &); | |||||
| Path operator/(const char *); | |||||
| bool operator==(const Path &rhs) const { return (path_ == rhs.path_); } | |||||
| bool operator!=(const Path &rhs) const { return (path_ != rhs.path_); } | |||||
| bool operator<(const Path &rhs) const { return (path_ < rhs.path_); } | |||||
| bool operator>(const Path &rhs) const { return (path_ > rhs.path_); } | |||||
| bool operator<=(const Path &rhs) const { return (path_ <= rhs.path_); } | |||||
| bool operator>=(const Path &rhs) const { return (path_ >= rhs.path_); } | |||||
| bool Exists(); | |||||
| bool IsDirectory(); | |||||
| Status CreateDirectory(); | |||||
| Status CreateDirectories(); | |||||
| std::string Extension() const; | |||||
| std::string ParentPath(); | |||||
| Status Remove(); | |||||
| Status CreateFile(int *fd); | |||||
| Status OpenFile(int *fd, bool create = false); | |||||
| Status CloseFile(int fd) const; | |||||
| Status TruncateFile(int fd) const; | |||||
| std::string Basename(); | |||||
| friend std::ostream &operator<<(std::ostream &os, const Path &s); | |||||
| private: | |||||
| static char separator_; | |||||
| std::string path_; | |||||
| }; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_ | |||||
| @@ -0,0 +1,301 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "include/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Internal Sampler class forward declaration | |||||
| class SamplerRT; | |||||
| class SamplerObj : public std::enable_shared_from_this<SamplerObj> { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| SamplerObj(); | |||||
| /// \brief Destructor | |||||
| ~SamplerObj() = default; | |||||
| /// \brief Pure virtual function for derived class to implement parameters validation | |||||
| /// \return The Status code of the function. It returns OK status if parameters are valid. | |||||
| virtual Status ValidateParams() = 0; | |||||
| /// \brief Pure virtual function to convert a SamplerObj class into a runtime sampler object | |||||
| /// \return Shared pointers to the newly created Sampler | |||||
| virtual std::shared_ptr<SamplerRT> Build() = 0; | |||||
| /// \brief Pure virtual function to copy a SamplerObj class | |||||
| /// \return Shared pointers to the newly copied SamplerObj | |||||
| virtual std::shared_ptr<SamplerObj> Copy() = 0; | |||||
| /// \brief Function for derived class to get the shard id of sampler | |||||
| /// \return The shard id of the derived sampler | |||||
| virtual int64_t ShardId() { return 0; } | |||||
| /// \brief Adds a child to the sampler | |||||
| /// \param[in] child The sampler to be added as child | |||||
| /// \return the Status code returned | |||||
| Status AddChild(std::shared_ptr<SamplerObj> child); | |||||
| protected: | |||||
| /// \brief A function that calls build on the children of this sampler | |||||
| /// \param[in] sampler The samplerRT object built from this sampler | |||||
| void BuildChildren(std::shared_ptr<SamplerRT> sampler); | |||||
| std::vector<std::shared_ptr<SamplerObj>> children_; | |||||
| }; | |||||
| class DistributedSamplerObj; | |||||
| class PKSamplerObj; | |||||
| class PreBuiltSamplerObj; | |||||
| class RandomSamplerObj; | |||||
| class SequentialSamplerObj; | |||||
| class SubsetRandomSamplerObj; | |||||
| class WeightedRandomSamplerObj; | |||||
| /// Function to create a Distributed Sampler. | |||||
| /// \notes A Sampler that access a shard of the dataset. | |||||
| /// \param[in] num_shards - Number of shards to divide the dataset into. | |||||
| /// \param[in] shard_id - Shard ID of the current shard within num_shards. | |||||
| /// \param[in] shuffle - If true, the indices are shuffled. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \param[in] seed - The seed in use when shuffle is true. | |||||
| /// \param[in] offset - The starting position where access to elements in the dataset begins. | |||||
| /// \param[in] even_dist - If true, each shard would return the same number of rows (default to true). | |||||
| /// If false the total rows returned by all the shards would not have overlap. | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, | |||||
| int64_t num_samples = 0, uint32_t seed = 1, | |||||
| int64_t offset = -1, bool even_dist = true); | |||||
| /// Function to create a PK Sampler. | |||||
| /// \notes Samples K elements for each P class in the dataset. | |||||
| /// This will sample all classes. | |||||
| /// \param[in] num_val - Number of elements to sample for each class. | |||||
| /// \param[in] shuffle - If true, the class IDs are shuffled. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0); | |||||
| /// Function to create a Random Sampler. | |||||
| /// \notes Samples the elements randomly. | |||||
| /// \param[in] replacement - If true, put the sample ID back for the next draw. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0); | |||||
| /// Function to create a Sequential Sampler. | |||||
| /// \notes Samples the dataset elements sequentially, same as not having a sampler. | |||||
| /// \param[in] start_index - Index to start sampling at (default to start at first id). | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0); | |||||
| /// Function to create a Subset Random Sampler. | |||||
| /// \notes Samples the elements randomly from a sequence of indices. | |||||
| /// \param[in] indices - A vector sequence of indices. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(std::vector<int64_t> indices, int64_t num_samples = 0); | |||||
| /// Function to create a Weighted Random Sampler. | |||||
| /// \notes Samples the elements from [0, len(weights) - 1] randomly with the given | |||||
| /// weights (probabilities). | |||||
| /// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1. | |||||
| /// \param[in] num_samples - The number of samples to draw (default to all elements). | |||||
| /// \param[in] replacement - If true, put the sample ID back for the next draw. | |||||
| /// \return Shared pointer to the current Sampler. | |||||
| std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(std::vector<double> weights, int64_t num_samples = 0, | |||||
| bool replacement = true); | |||||
| /* ####################################### Derived Sampler classes ################################# */ | |||||
| class DistributedSamplerObj : public SamplerObj { | |||||
| public: | |||||
| DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed, | |||||
| int64_t offset, bool even_dist); | |||||
| ~DistributedSamplerObj() = default; | |||||
| std::shared_ptr<SamplerRT> Build() override; | |||||
| std::shared_ptr<SamplerObj> Copy() override { | |||||
| auto sampler = std::make_shared<DistributedSamplerObj>(num_shards_, shard_id_, shuffle_, num_samples_, seed_, | |||||
| offset_, even_dist_); | |||||
| for (auto child : children_) { | |||||
| sampler->AddChild(child); | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| Status ValidateParams() override; | |||||
| /// \brief Function to get the shard id of sampler | |||||
| /// \return The shard id of sampler | |||||
| int64_t ShardId() override { return shard_id_; } | |||||
| private: | |||||
| int64_t num_shards_; | |||||
| int64_t shard_id_; | |||||
| bool shuffle_; | |||||
| int64_t num_samples_; | |||||
| uint32_t seed_; | |||||
| int64_t offset_; | |||||
| bool even_dist_; | |||||
| }; | |||||
| class PKSamplerObj : public SamplerObj { | |||||
| public: | |||||
| PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples); | |||||
| ~PKSamplerObj() = default; | |||||
| std::shared_ptr<SamplerRT> Build() override; | |||||
| std::shared_ptr<SamplerObj> Copy() override { | |||||
| auto sampler = std::make_shared<PKSamplerObj>(num_val_, shuffle_, num_samples_); | |||||
| for (auto child : children_) { | |||||
| sampler->AddChild(child); | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| int64_t num_val_; | |||||
| bool shuffle_; | |||||
| int64_t num_samples_; | |||||
| }; | |||||
| class PreBuiltSamplerObj : public SamplerObj { | |||||
| public: | |||||
| explicit PreBuiltSamplerObj(std::shared_ptr<SamplerRT> sampler); | |||||
| ~PreBuiltSamplerObj() = default; | |||||
| std::shared_ptr<SamplerRT> Build() override; | |||||
| std::shared_ptr<SamplerObj> Copy() override; | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| std::shared_ptr<SamplerRT> sp_; | |||||
| }; | |||||
| class RandomSamplerObj : public SamplerObj { | |||||
| public: | |||||
| RandomSamplerObj(bool replacement, int64_t num_samples); | |||||
| ~RandomSamplerObj() = default; | |||||
| std::shared_ptr<SamplerRT> Build() override; | |||||
| std::shared_ptr<SamplerObj> Copy() override { | |||||
| auto sampler = std::make_shared<RandomSamplerObj>(replacement_, num_samples_); | |||||
| for (auto child : children_) { | |||||
| sampler->AddChild(child); | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| bool replacement_; | |||||
| int64_t num_samples_; | |||||
| }; | |||||
| class SequentialSamplerObj : public SamplerObj { | |||||
| public: | |||||
| SequentialSamplerObj(int64_t start_index, int64_t num_samples); | |||||
| ~SequentialSamplerObj() = default; | |||||
| std::shared_ptr<SamplerRT> Build() override; | |||||
| std::shared_ptr<SamplerObj> Copy() override { | |||||
| auto sampler = std::make_shared<SequentialSamplerObj>(start_index_, num_samples_); | |||||
| for (auto child : children_) { | |||||
| sampler->AddChild(child); | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| int64_t start_index_; | |||||
| int64_t num_samples_; | |||||
| }; | |||||
| class SubsetRandomSamplerObj : public SamplerObj { | |||||
| public: | |||||
| SubsetRandomSamplerObj(std::vector<int64_t> indices, int64_t num_samples); | |||||
| ~SubsetRandomSamplerObj() = default; | |||||
| std::shared_ptr<SamplerRT> Build() override; | |||||
| std::shared_ptr<SamplerObj> Copy() override { | |||||
| auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices_, num_samples_); | |||||
| for (auto child : children_) { | |||||
| sampler->AddChild(child); | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| const std::vector<int64_t> indices_; | |||||
| int64_t num_samples_; | |||||
| }; | |||||
| class WeightedRandomSamplerObj : public SamplerObj { | |||||
| public: | |||||
| explicit WeightedRandomSamplerObj(std::vector<double> weights, int64_t num_samples = 0, bool replacement = true); | |||||
| ~WeightedRandomSamplerObj() = default; | |||||
| std::shared_ptr<SamplerRT> Build() override; | |||||
| std::shared_ptr<SamplerObj> Copy() override { | |||||
| auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights_, num_samples_, replacement_); | |||||
| for (auto child : children_) { | |||||
| sampler->AddChild(child); | |||||
| } | |||||
| return sampler; | |||||
| } | |||||
| Status ValidateParams() override; | |||||
| private: | |||||
| const std::vector<double> weights_; | |||||
| int64_t num_samples_; | |||||
| bool replacement_; | |||||
| }; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_ | |||||
| @@ -0,0 +1,105 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_ | |||||
| #if defined(__GNUC__) || defined(__clang__) | |||||
| #define DEPRECATED __attribute__((deprecated)) | |||||
| #elif defined(_MSC_VER) | |||||
| #define DEPRECATED __declspec(deprecated) | |||||
| #else | |||||
| #pragma message("WARNING: You need to implement DEPRECATED for this compiler") | |||||
| #define DEPRECATED | |||||
| #endif | |||||
| #include <iostream> | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include "include/ms_status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| #define RETURN_IF_NOT_OK(_s) \ | |||||
| do { \ | |||||
| Status __rc = (_s); \ | |||||
| if (__rc.IsError()) { \ | |||||
| return __rc; \ | |||||
| } \ | |||||
| } while (false) | |||||
| #define RETURN_STATUS_UNEXPECTED(_e) \ | |||||
| do { \ | |||||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, _e); \ | |||||
| } while (false) | |||||
| #define CHECK_FAIL_RETURN_UNEXPECTED(_condition, _e) \ | |||||
| do { \ | |||||
| if (!(_condition)) { \ | |||||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, _e); \ | |||||
| } \ | |||||
| } while (false) | |||||
| #define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e) \ | |||||
| do { \ | |||||
| if (!(_condition)) { \ | |||||
| return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \ | |||||
| } \ | |||||
| } while (false) | |||||
| #define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e) \ | |||||
| do { \ | |||||
| if (!(_condition)) { \ | |||||
| return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \ | |||||
| } \ | |||||
| } while (false) | |||||
| #define RETURN_UNEXPECTED_IF_NULL(_ptr) \ | |||||
| do { \ | |||||
| if ((_ptr) == nullptr) { \ | |||||
| std::string err_msg = "The pointer[" + std::string(#_ptr) + "] is null."; \ | |||||
| RETURN_STATUS_UNEXPECTED(err_msg); \ | |||||
| } \ | |||||
| } while (false) | |||||
| #define RETURN_OK_IF_TRUE(_condition) \ | |||||
| do { \ | |||||
| if (_condition) { \ | |||||
| return Status::OK(); \ | |||||
| } \ | |||||
| } while (false) | |||||
| #define RETURN_STATUS_SYNTAX_ERROR(_e) \ | |||||
| do { \ | |||||
| return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \ | |||||
| } while (false) | |||||
| #define RETURN_SECOND_IF_ERROR(_s, _r) \ | |||||
| do { \ | |||||
| Status __rc = (_s); \ | |||||
| if (__rc.IsError()) { \ | |||||
| MS_LOG(ERROR) << __rc; \ | |||||
| return _r; \ | |||||
| } \ | |||||
| } while (false) | |||||
| #if !defined(_WIN32) && !defined(_WIN64) | |||||
| const float MAX_MEMORY_USAGE_THRESHOLD = 0.95; | |||||
| float GetMemoryUsage(); | |||||
| #endif | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_ | |||||
| @@ -0,0 +1,632 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ | |||||
| #include <deque> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| #undef HAVE_STDDEF_H | |||||
| #undef HAVE_STDLIB_H | |||||
| #endif | |||||
| #include "include/constants.h" | |||||
| #include "include/data_type.h" | |||||
| #include "include/tensor_helpers.h" | |||||
| #include "include/tensor_shape.h" | |||||
| #include "include/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| class Tensor; | |||||
| template <typename T> | |||||
| class Allocator; | |||||
| using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>; | |||||
| using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors | |||||
| using offset_t = uint32_t; // type of offset values to store strings locations | |||||
| using TensorPtr = std::shared_ptr<Tensor>; | |||||
| class Tensor { | |||||
| public: | |||||
| Tensor() = delete; | |||||
| Tensor(const Tensor &other) = delete; | |||||
| Tensor &operator=(const Tensor &other) = delete; | |||||
| /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead | |||||
| /// \note The shape and type information should be known and valid | |||||
| /// \note The constructor does not allocate data | |||||
| /// \param shape TensorShape | |||||
| /// \param type DataType | |||||
| Tensor(const TensorShape &shape, const DataType &type); | |||||
| /// Move constructor | |||||
| /// \param other Tensor to be moved | |||||
| Tensor(Tensor &&other) noexcept; | |||||
| /// Move assignment operator | |||||
| /// \param other Tensor to be moved | |||||
| Tensor &operator=(Tensor &&other) noexcept; | |||||
| /// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized. | |||||
| /// \param[in] shape shape of the output tensor | |||||
| /// \param[in] type type of the output tensor | |||||
| /// \param[out] out Generated tensor | |||||
| /// \return Status code | |||||
| static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out); | |||||
| /// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type. | |||||
| /// Data will be copied into the new created tensor. | |||||
| /// \param[in] shape shape of the output tensor | |||||
| /// \param[in] type type of the output tensor | |||||
| /// \param[in] src pointer to the source data | |||||
| /// \param[out] out Generated tensor | |||||
| /// \return Status code | |||||
| static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out); | |||||
| /// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor. | |||||
| /// \param[in] shape shape of the output tensor | |||||
| /// \param[in] type type of the output tensor | |||||
| /// \param[in] src pointer to the source data | |||||
| /// \param[in] length length of the src data | |||||
| /// \param[out] out Generated tensor | |||||
| /// \return Status code | |||||
| static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, | |||||
| const dsize_t &length, TensorPtr *out); | |||||
| /// Create a copy of the input tensor | |||||
| /// \param[in] in original tensor to be copied | |||||
| /// \param[out] out output tensor to be generated | |||||
| /// \return Status | |||||
| static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) { | |||||
| return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out); | |||||
| } | |||||
| /// Create a Tensor from a given list of values. | |||||
| /// \tparam type of the values to be inserted. | |||||
| /// \param[in] items elements of the tensor | |||||
| /// \param[in] shape shape of the output tensor | |||||
| /// \param[out] out output argument to hold the created Tensor | |||||
| /// \return Status Code | |||||
| template <typename T> | |||||
| static Status CreateFromVector(const std::vector<T> &items, const TensorShape &shape, TensorPtr *out) { | |||||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||||
| items.size() == shape.NumOfElements(), | |||||
| "Number of elements in the vector does not match the number of elements of the shape required"); | |||||
| // cppcheck-suppress shadowFunction | |||||
| DataType type = DataType::FromCType<T>(); | |||||
| // if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case. | |||||
| auto items_ptr = reinterpret_cast<const uchar *>(&items[0]); | |||||
| return CreateFromMemory(shape, type, items_ptr, out); | |||||
| } | |||||
| /// Create a 1D Tensor from a given list of values. | |||||
| /// \tparam type of the values to be inserted. | |||||
| /// \param[in] items elements of the tensor | |||||
| /// \param[out] out output argument to hold the created Tensor | |||||
| /// \return Status Code | |||||
| template <typename T> | |||||
| static Status CreateFromVector(const std::vector<T> &items, TensorPtr *out) { | |||||
| return CreateFromVector(items, TensorShape({static_cast<dsize_t>(items.size())}), out); | |||||
| } | |||||
| /// Create a 1D boolean Tensor from a given list of boolean values. | |||||
| /// \param[in] items elements of the tensor | |||||
| /// \param[in] shape shape of the output tensor | |||||
| /// \param[out] out output argument to hold the created Tensor | |||||
| /// \return Status Code | |||||
| static Status CreateFromVector(const std::vector<bool> &items, const TensorShape &shape, TensorPtr *out) { | |||||
| std::vector<uint8_t> temp(items.begin(), items.end()); | |||||
| RETURN_IF_NOT_OK(CreateFromVector(temp, shape, out)); | |||||
| (*out)->type_ = DataType(DataType::DE_BOOL); | |||||
| return Status::OK(); | |||||
| } | |||||
| /// Create a numeric scalar Tensor from the given value. | |||||
| /// \tparam T type of value | |||||
| /// \param[in] item value | |||||
| /// \param[out] out Created tensor | |||||
| /// \return Status code | |||||
| template <typename T> | |||||
| static Status CreateScalar(const T &item, TensorPtr *out) { | |||||
| // cppcheck-suppress shadowFunction | |||||
| DataType type = DataType::FromCType<T>(); | |||||
| auto item_ptr = reinterpret_cast<const uchar *>(&item); | |||||
| return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out); | |||||
| } | |||||
| /// Create a tensor from a binary file on disk. | |||||
| /// \param[in] path file to be read | |||||
| /// \param[out] out Created Tensor | |||||
| /// \return Status code | |||||
| static Status CreateFromFile(const std::string &path, TensorPtr *out); | |||||
| /// Destruct the tensor and release the memory using the allocator | |||||
| virtual ~Tensor(); | |||||
| /// Equality operator. compares tensor shape, type and data | |||||
| /// \param[in] rhs Tensor to be compared with | |||||
| /// \return bool | |||||
| bool operator==(const Tensor &rhs) const; | |||||
| bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); } | |||||
| /// Get item located at `index`, caller needs to provide the type. | |||||
| /// \tparam T | |||||
| /// \param[in] index vector<dsize_t> | |||||
| /// \return return the item specified at index | |||||
| template <typename T> | |||||
| Status GetItemAt(T *o, const std::vector<dsize_t> &index) const; | |||||
| /// Get string located at `index`. | |||||
| /// \param[in] index vector<dsize_t> | |||||
| /// \return return std::string_view specified at index | |||||
| Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const; | |||||
| template <typename T> | |||||
| Status GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const; | |||||
| template <typename T> | |||||
| Status GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const; | |||||
| template <typename T> | |||||
| Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const; | |||||
| /// set item at location specified by index | |||||
| /// \tparam `T` | |||||
| /// \param[in] index | |||||
| /// \param[in] value of type `T` | |||||
| template <typename T> | |||||
| Status SetItemAt(const std::vector<dsize_t> &index, const T &value) { | |||||
| T *ptr = nullptr; | |||||
| RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index)); | |||||
| *ptr = value; | |||||
| return Status::OK(); | |||||
| } | |||||
| Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value); | |||||
| /// fill tensor with Zeros. Does not support strings. | |||||
| Status Zero(); | |||||
| /// Fill all elements in the Tensor with the given value of type `T`. Does not support strings. | |||||
| /// \tparam T | |||||
| /// \param value[in] | |||||
| template <typename T> | |||||
| Status Fill(const T &value); | |||||
| /// Getter function for shape | |||||
| /// \return | |||||
| const TensorShape &shape() const { return shape_; } | |||||
| /// Check if tensor has data | |||||
| /// \return bool - true if tensor is not empty | |||||
| bool HasData() const { return data_ != nullptr; } | |||||
| /// Reshape the tensor. The given shape should have the same number of elements in the Tensor | |||||
| /// \param shape | |||||
| virtual Status Reshape(const TensorShape &shape); | |||||
| /// \return number of elements in this tensor | |||||
| dsize_t Size() const { return shape().NumOfElements(); } | |||||
| /// \return the number of bytes this tensor is needs | |||||
| dsize_t SizeInBytes() const { | |||||
| if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements(); | |||||
| return data_end_ - data_; | |||||
| } | |||||
| /// \return the rank of the tensor | |||||
| dsize_t Rank() const { return shape().Rank(); } | |||||
| /// Get the starting memory address as a constant for the data of the tensor. This potentially | |||||
| /// drives an allocation if the data area. | |||||
| /// \return const unsigned char* | |||||
| const unsigned char *GetBuffer() const { return data_; } | |||||
| /// Getter of the type | |||||
| /// \return | |||||
| // cppcheck-suppress shadowFunction | |||||
| DataType type() const { return type_; } | |||||
| /// Provide stream operator for displaying it | |||||
| /// \param output stream | |||||
| /// \param so the Tensor object to be printed | |||||
| /// \return output stream | |||||
| friend std::ostream &operator<<(std::ostream &out, const Tensor &so) { | |||||
| so.Print(out); | |||||
| return out; | |||||
| } | |||||
| /// Invalidate this Tensor by setting the type and shape to unknown and MData to null. | |||||
| /// Calling this method will make the Tensor and its data inaccessible, use it with caution. | |||||
| void Invalidate(); | |||||
| /// Copy input tensor into self at the location index. | |||||
| /// Index is a vector of axes which can be incomplete: | |||||
| /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. | |||||
| /// \param index | |||||
| /// \param input | |||||
| /// \param partial_insert: boolean to determine if insertion along the full axis is enforced | |||||
| /// \return Status code | |||||
| Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input, | |||||
| const bool partial_insert = false); | |||||
| /// Find the address of the given index. Used in InsertTensor. | |||||
| /// Example: | |||||
| /// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1 | |||||
| /// \param index incomplete index | |||||
| /// \param output: startAddrofIndex | |||||
| /// \param output: remaining | |||||
| /// \return Status code | |||||
| Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining); | |||||
| /// Expand the shape of the Tensor with one extra dimension. | |||||
| /// For example, if the shape is <512,512,3>: | |||||
| /// *- ExpandDim(0) gives: <1,512,512,3> | |||||
| /// *- ExpandDim(1) gives: <512,1,512,3> | |||||
| /// *- ExpandDim(3) gives: <512,512,3,1> | |||||
| /// \param axis location of the dim | |||||
| virtual Status ExpandDim(const dsize_t &axis); | |||||
| virtual void Squeeze(); | |||||
| /// Calculates the strides of the Tensor | |||||
| /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) | |||||
| /// The strides will be {6,2,1}. | |||||
| /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) | |||||
| /// The strides will be {24,8,4}. | |||||
| /// \return vector of integers | |||||
| std::vector<dsize_t> Strides() const; | |||||
| std::string ToString() { | |||||
| std::stringstream ss; | |||||
| this->Print(ss); | |||||
| return ss.str(); | |||||
| } | |||||
| /// Handle negative indices. | |||||
| /// \param[out] out modified index | |||||
| /// \param[in] index | |||||
| /// \param[in] length axis length used to modify index | |||||
| /// \return dsize_t modified index | |||||
| static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; } | |||||
| /// Handle negative indices for a vector of indices. | |||||
| /// \param[out] out modified vector of indices | |||||
| /// \param[in] index_vector vector of indices | |||||
| /// \return std::vector<dsize_t> modified vector of indices | |||||
| static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) { | |||||
| std::vector<dsize_t> indices(index_vector.size(), 0); | |||||
| for (int i = 0; i < index_vector.size(); i++) { | |||||
| indices[i] = HandleNeg(index_vector[i], length[i]); | |||||
| } | |||||
| return indices; | |||||
| } | |||||
| /// Slice tensor bases on the given indices. Copy the sliced data into out tensor. | |||||
| /// Based on the type of tensor, SliceNumeric or SliceString will be called | |||||
| /// \param[out] out Tensor | |||||
| /// \param[in] slice_options vector of SliceOption objects | |||||
| /// \return Status error code | |||||
| // cppcheck-suppress passedByValue | |||||
| Status Slice(TensorPtr *out, const std::vector<mindspore::dataset::SliceOption> slice_options); | |||||
| /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor | |||||
| /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 | |||||
| /// \tparam T type of values in the Tensor Iterator | |||||
| template <typename T, bool = true> | |||||
| class TensorIterator { | |||||
| public: | |||||
| using iterator_category = std::random_access_iterator_tag; | |||||
| using value_type = T; | |||||
| using difference_type = ptrdiff_t; | |||||
| using pointer = T *; | |||||
| using reference = T &; | |||||
| explicit TensorIterator(uchar *ptr = nullptr) { ptr_ = reinterpret_cast<T *>(ptr); } | |||||
| TensorIterator(const TensorIterator<T> &raw_iterator) { ptr_ = raw_iterator.ptr_; } | |||||
| ~TensorIterator() = default; | |||||
| // cppcheck-suppress operatorEqVarError | |||||
| TensorIterator<T> &operator=(const TensorIterator<T> &rhs) { | |||||
| ptr_ = rhs.ptr_; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<T> &operator=(T *rhs) { | |||||
| ptr_ = rhs; | |||||
| return *this; | |||||
| } | |||||
| bool operator==(const TensorIterator<T> &rhs) { return ptr_ == rhs.ptr_; } | |||||
| bool operator!=(const TensorIterator<T> &rhs) { return !(*this == rhs); } | |||||
| operator bool() const { return ptr_ != nullptr; } | |||||
| T &operator*() { return *ptr_; } | |||||
| const T &operator*() const { return *ptr_; } | |||||
| T *operator->() { return ptr_; } | |||||
| TensorIterator<T> &operator+=(const ptrdiff_t &inc) { | |||||
| ptr_ += inc; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<T> &operator-=(const ptrdiff_t &inc) { | |||||
| ptr_ -= inc; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<T> &operator++() { | |||||
| ++ptr_; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<T> &operator--() { | |||||
| --ptr_; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<T> operator++(int) { | |||||
| auto temp(*this); | |||||
| ++ptr_; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<T> operator--(int) { | |||||
| auto temp(*this); | |||||
| --ptr_; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<T> operator+(const ptrdiff_t &inc) { | |||||
| auto oldPtr = ptr_; | |||||
| ptr_ += inc; | |||||
| auto temp(*this); | |||||
| ptr_ = oldPtr; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<T> operator-(const ptrdiff_t &inc) { | |||||
| auto oldPtr = ptr_; | |||||
| ptr_ -= inc; | |||||
| auto temp(*this); | |||||
| ptr_ = oldPtr; | |||||
| return temp; | |||||
| } | |||||
| protected: | |||||
| T *ptr_; | |||||
| }; | |||||
| // Specialization of TensorIterator for strings. It returns std::string_view for every item. | |||||
| // \tparam DUMMY, used to mbe able to specialize the inner class | |||||
| template <bool DUMMY> | |||||
| class TensorIterator<std::string_view, DUMMY> { | |||||
| public: | |||||
| using iterator_category = std::random_access_iterator_tag; | |||||
| using value_type = std::string_view; | |||||
| using difference_type = ptrdiff_t; | |||||
| using pointer = std::string_view *; | |||||
| using reference = std::string_view &; | |||||
| explicit TensorIterator(uchar *data = nullptr, dsize_t index = 0) { | |||||
| data_ = reinterpret_cast<const char *>(data); | |||||
| // cppcheck-suppress useInitializationList | |||||
| index_ = index; | |||||
| } | |||||
| TensorIterator(const TensorIterator<std::string_view, DUMMY> &raw_iterator) { | |||||
| data_ = raw_iterator.data_; | |||||
| // cppcheck-suppress useInitializationList | |||||
| index_ = raw_iterator.index_; | |||||
| } | |||||
| ~TensorIterator() = default; | |||||
| bool operator==(const TensorIterator<std::string_view> &rhs) { return data_ == rhs.data_ && index_ == rhs.index_; } | |||||
| bool operator!=(const TensorIterator<std::string_view> &rhs) { return !(*this == rhs); } | |||||
| operator bool() const { return data_ != nullptr; } | |||||
| std::string_view operator*() const { | |||||
| auto offset_ = reinterpret_cast<const offset_t *>(data_); | |||||
| offset_t start = offset_[index_]; | |||||
| return std::string_view{data_ + start}; | |||||
| } | |||||
| TensorIterator<std::string_view> &operator+=(const dsize_t &inc) { | |||||
| index_ += inc; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<std::string_view> &operator-=(const dsize_t &inc) { | |||||
| index_ -= inc; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<std::string_view> &operator++() { | |||||
| ++index_; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<std::string_view> &operator--() { | |||||
| --index_; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<std::string_view> operator++(int) { | |||||
| auto temp(*this); | |||||
| ++index_; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<std::string_view> operator--(int) { | |||||
| auto temp(*this); | |||||
| --index_; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<std::string_view> operator+(const dsize_t &inc) { | |||||
| auto oldPtr = index_; | |||||
| index_ += inc; | |||||
| auto temp(*this); | |||||
| index_ = oldPtr; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<std::string_view> operator-(const dsize_t &inc) { | |||||
| auto oldPtr = index_; | |||||
| index_ -= inc; | |||||
| auto temp(*this); | |||||
| index_ = oldPtr; | |||||
| return temp; | |||||
| } | |||||
| protected: | |||||
| dsize_t index_; | |||||
| const char *data_; | |||||
| }; | |||||
| /// Return a TensorIterator that points to the start of the Tensor. | |||||
| /// It's the user responsibility to use the correct type that matches the Tensor type | |||||
| /// \tparam T The type of values in the Tensor | |||||
| /// \return TensorIterator | |||||
| template <typename T> | |||||
| TensorIterator<T> begin() { | |||||
| return TensorIterator<T>(data_); | |||||
| } | |||||
| /// Return a linear iterator that points to the place after the last element of the Tensor. | |||||
| /// \tparam T The type of values in the Tensor | |||||
| /// \return TensorIterator | |||||
| template <typename T> | |||||
| TensorIterator<T> end() { | |||||
| return TensorIterator<T>(data_end_); | |||||
| } | |||||
| /// Copies the last dimension at `index` from Tensor `src` to this Tensor. | |||||
| /// \param[in] src Tensor | |||||
| /// \param[in] index vector to the start of the dimension. The last dim should be 0 | |||||
| /// \return Status | |||||
| Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index); | |||||
| protected: | |||||
| /// Allocate memory for the tensor using the data_allocator | |||||
| /// \param[in] length number of bytes to be allocated | |||||
| /// \return Error Status | |||||
| Status AllocateBuffer(const dsize_t &length); | |||||
| /// Get the starting memory address for the data of the tensor. This potentially | |||||
| /// drives an allocation if the data is null. | |||||
| /// \return unsigned char* | |||||
| unsigned char *GetMutableBuffer() { return data_; } | |||||
| /// A function that prints Tensor recursively, first called by print | |||||
| /// \param[in] out | |||||
| /// \param[in] cur_dim | |||||
| /// \param[in] cur_index | |||||
| void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const; | |||||
| /// A function that prints info about the tensor | |||||
| /// \param[out] out output stream | |||||
| void Print(std::ostream &out) const; | |||||
| /// A function that print the value as specified by its index | |||||
| /// \param[in] index vector representing the index | |||||
| /// \param[out] out | |||||
| void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const; | |||||
| /// Get pointer to item located at `index`, caller needs to provide the type. | |||||
| /// \tparam T | |||||
| /// \param[in] index vector<dsize_t> | |||||
| /// \return return a pointer to the item specified at index of type `T` | |||||
| template <typename T> | |||||
| Status GetItemPtr(T **, const std::vector<dsize_t> &index) const; | |||||
| /// Get pointer to string located at `index` and the length of string | |||||
| /// \param[in] index vector<dsize_t> | |||||
| /// \return return a pointer to the string specified at index and the length of the string | |||||
| Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const; | |||||
| /// Given a flat index of an item string, return the start and length of the item | |||||
| /// \param[in] index flat index of the item | |||||
| /// \param[out] start address of the ths string | |||||
| /// \param[out] length of the string | |||||
| Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const; | |||||
| /// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if | |||||
| /// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string | |||||
| /// of the tensor. | |||||
| uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; } | |||||
| /// all access to shape_ should be via shape | |||||
| TensorShape shape_; | |||||
| /// data type of tensor | |||||
| DataType type_; | |||||
| /// pointer to the start of the physical data | |||||
| unsigned char *data_; | |||||
| /// An allocator for data_ | |||||
| CharAllocPtr data_allocator_; | |||||
| /// pointer to the end of the physical data | |||||
| unsigned char *data_end_ = nullptr; | |||||
| private: | |||||
| /// Slice numeric tensors. | |||||
| Status SliceNumeric(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape); | |||||
| /// Slice string tensors | |||||
| Status SliceString(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape); | |||||
| /// Copy raw data of a array based on shape and strides to the destination pointer | |||||
| /// \param dst [out] Pointer to the destination array where the content is to be copied | |||||
| /// \param[in] src Pointer to the source of strided array to be copied | |||||
| /// \param[in] shape shape of the source array | |||||
| /// \param[in] strides strides of the source array | |||||
| /// \param[in] type_size number of bytes needed to store one array element's type | |||||
| /// \return Status Code | |||||
| static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape, | |||||
| std::vector<dsize_t> strides, uint8_t type_size); | |||||
| /// const of the size of the offset variable | |||||
| static constexpr uint8_t kOffsetSize = sizeof(offset_t); | |||||
| }; | |||||
| template <> | |||||
| inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() { | |||||
| return TensorIterator<std::string_view>(data_, shape_.NumOfElements()); | |||||
| } | |||||
| /// Create a string scalar Tensor from the given value. | |||||
| /// \param[in] item value | |||||
| /// \param[out] out Created tensor | |||||
| /// \return Status code | |||||
| template <> | |||||
| inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) { | |||||
| return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out); | |||||
| } | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ | |||||
| @@ -0,0 +1,83 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include "include/constants.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| class Slice { | |||||
| public: | |||||
| Slice() : start_(0), stop_(0), step_(0) {} | |||||
| Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {} | |||||
| Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {} | |||||
| explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {} | |||||
| Slice(Slice const &slice) = default; | |||||
| ~Slice() = default; | |||||
| bool valid() const { return step_ != 0; } | |||||
| dsize_t start_; | |||||
| dsize_t stop_; | |||||
| dsize_t step_; | |||||
| }; | |||||
| class SliceOption { | |||||
| public: | |||||
| explicit SliceOption(bool all) : all_(all) {} | |||||
| explicit SliceOption(std::vector<dsize_t> indices) : indices_(indices) {} | |||||
| explicit SliceOption(Slice slice) : slice_(slice) {} | |||||
| SliceOption(SliceOption const &slice) = default; | |||||
| ~SliceOption() = default; | |||||
| // only one of the following will be valid | |||||
| // given indices to slice the Tensor. | |||||
| std::vector<dsize_t> indices_ = {}; | |||||
| // Slice object. All start, stop and step are 0 if invalid. | |||||
| Slice slice_; | |||||
| bool all_ = false; | |||||
| }; | |||||
| /// Recursive helper function to generate indices based on vector of SliceOptions. It recursively iterates through each | |||||
| /// range represented by slice_options to generate a list of indices to be sliced. | |||||
| /// \param[out] matrix Generated nested vector of indices | |||||
| /// Example: For a 4 x 2 tensor, and with slice_list = {SliceOption({0})} (the first row), matrix will become | |||||
| /// {{0}}. For slice_list = {SliceOption(all), SliceOption({0})} (the first column), matrix will become | |||||
| /// {{0, 0}, {1, 0}, {2, 0}, {3, 0}}. | |||||
| /// For slice_list = {SliceOption({0, 2})}, matrix will become {{0}, {2}}. The size of each nested array is always | |||||
| /// equal to (slice_list).size(). | |||||
| /// \param[in] depth used to keep track of recursion level | |||||
| /// \param[in] numbers vector used to represent current index | |||||
| /// \param[in] matrix 2D vector to be populated with desired indices | |||||
| /// \param[in] slice_options vector of SliceOption objects | |||||
| void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers, const std::vector<SliceOption> &slice_list, | |||||
| std::vector<std::vector<dsize_t>> *matrix); | |||||
| /// Generate indices based on vector of SliceOptions | |||||
| /// Calls the recursive helper function IndexGeneratorHelper | |||||
| /// \param[in] slice_list vector of SliceOption objects. Note: If the user passes | |||||
| /// {SliceOption(true), SliceOption(true)}, it will return a M x 2 vector, instead of reducing it to | |||||
| /// {SliceOption(true)} first to only generate a M x 1 vector. | |||||
| /// \return std::vector<std::vector<dsize_t>> 2D vector of generated indices, M x (slice_list).size() | |||||
| std::vector<std::vector<dsize_t>> IndexGenerator(const std::vector<SliceOption> &slice_list); | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ | |||||
| @@ -0,0 +1,176 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_ | |||||
| #include <cstdint> | |||||
| #include <ostream> | |||||
| #include <sstream> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "include/constants.h" | |||||
| #include "include/status.h" | |||||
| #include "include/allocator.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| using IntAlloc = Allocator<dsize_t>; | |||||
| // Class that represents a shape of a Tensor. A shape can be: | |||||
| // -# Known shape (mKnown = true) | |||||
| // -# Scalar --> empty vector --> <> | |||||
| // -# n-Dim --> not empty vector --> <d1, d2, d2, d3, ...> where di is >= 0\n | |||||
| // Example: <1,2>, <1>, <1,13,10,11,1> | |||||
| // -# Unknown shape (mKnown = false) | |||||
| // -# Rank is unknown --> empty vector --> <> | |||||
| // -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n | |||||
| // Example: <3,?> (the 1st dim is unknown)\n | |||||
| // <2,?,?,?> (all dims but the 0th dim are unknown) | |||||
| /// \brief TensorShape supports any dim > 0 and < 2^31-1 | |||||
| class TensorShape { | |||||
| public: | |||||
| static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension | |||||
| // Force the compiler to not create a no-arg constructor | |||||
| TensorShape() = delete; | |||||
| /// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}). | |||||
| /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||||
| /// \param[in] list | |||||
| explicit TensorShape(const std::initializer_list<dsize_t> &list); | |||||
| /// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ). | |||||
| /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown | |||||
| /// \param[in] list | |||||
| explicit TensorShape(const std::vector<dsize_t> &list); | |||||
| /// \brief Copy constructor | |||||
| /// \param[in] shape | |||||
| TensorShape(const TensorShape &shape); | |||||
| ~TensorShape() = default; | |||||
| /// \brief Create a scalar Shape (i.e., empty shape with mKnown = true) | |||||
| /// \return TensorShape | |||||
| static TensorShape CreateScalar() { return TensorShape({}); } | |||||
| /// \brief Create a shape with an unknown rank. | |||||
| /// \return TensorShape | |||||
| static TensorShape CreateUnknownRankShape(); | |||||
| /// \brief Create a shape with a known rank . | |||||
| /// \return TensorShape | |||||
| static TensorShape CreateUnknownShapeWithRank(dsize_t rank); | |||||
| /// \brief Insert a new dim into a copy of the current shape. | |||||
| /// \param[in] dim to be added | |||||
| /// \param[in] axis the index where dim should be added | |||||
| /// \return New modified shape | |||||
| TensorShape InsertDim(dsize_t axis, dsize_t dim) const; | |||||
| /// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4> | |||||
| /// \param[in] dim | |||||
| /// \return | |||||
| TensorShape PrependDim(dsize_t dim) const; | |||||
| /// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> | |||||
| /// \param[in] dim | |||||
| /// \return | |||||
| TensorShape AppendDim(dsize_t dim) const; | |||||
| dsize_t Size() const { return raw_shape_.size(); } | |||||
| dsize_t Rank() const { return raw_shape_.size(); } | |||||
| bool known() const { return known_; } | |||||
| bool empty() const { return raw_shape_.empty(); } | |||||
| dsize_t NumOfElements() const; | |||||
| bool operator==(const TensorShape &rhs) const { return known_ == rhs.known_ && raw_shape_ == rhs.raw_shape_; } | |||||
| bool operator!=(const TensorShape &rhs) const { return !(rhs == *this); } | |||||
| dsize_t operator[](const dsize_t index) const { | |||||
| if (index < 0) return raw_shape_[raw_shape_.size() + index]; | |||||
| return raw_shape_[index]; | |||||
| } | |||||
| /// \brief Return the Shape as a vector | |||||
| /// \return | |||||
| std::vector<dsize_t> AsVector() const; | |||||
| /// \brief Returns the class info as a string | |||||
| /// \return | |||||
| std::string ToString() const { | |||||
| std::stringstream ss; | |||||
| ss << *this; | |||||
| return ss.str(); | |||||
| } | |||||
| /// \brief Actual print function used by operator<< | |||||
| /// \param out output string stream | |||||
| void Print(std::ostream &out) const; | |||||
| /// \brief << Stream output operator overload | |||||
| /// This allows you to print the info using stream operators | |||||
| /// \param[in] out - reference to the output stream being overloaded | |||||
| /// \param[in] rO - reference to the TensorShape to display | |||||
| /// \return - the output stream must be returned | |||||
| friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) { | |||||
| so.Print(out); | |||||
| return out; | |||||
| } | |||||
| /// \brief Checks if the given index is a valid index for this tensor. | |||||
| /// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not. | |||||
| /// \param[in] index | |||||
| /// \return bool | |||||
| bool IsValidIndex(const std::vector<dsize_t> &index) const; | |||||
| TensorShape Squeeze() const; | |||||
| std::vector<dsize_t> Strides() const; | |||||
| /// \brief Returns the location of the item assuming row major memory layout. | |||||
| /// \param[in] index | |||||
| /// \param[out] flat_index | |||||
| /// \return | |||||
| Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const; | |||||
| private: | |||||
| // True if known and valid shape, false otherwise | |||||
| bool known_; | |||||
| // Vector to keep the dims of the shape. | |||||
| std::vector<dsize_t, IntAlloc> raw_shape_; | |||||
| // Vector to keep the strides of the shape. The size is rank+1 | |||||
| std::vector<dsize_t, IntAlloc> strides_; | |||||
| /// \brief Internal utility function to iterate over a list, | |||||
| /// check if the dim is valid and then insert it into the shape. | |||||
| /// \param[in] list Iterable list | |||||
| /// \return true if the shape is valid and no overflow would be generated when counting the number of elements. | |||||
| /// False otherwise. | |||||
| template <typename T> | |||||
| void AddListToShape(const T &list); | |||||
| }; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_ | |||||
| @@ -0,0 +1,252 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "include/constants.h" | |||||
| #include "include/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| class TensorOp; | |||||
| // Char arrays storing name of corresponding classes (in alphabetical order) | |||||
| constexpr char kComposeOperation[] = "Compose"; | |||||
| constexpr char kDuplicateOperation[] = "Duplicate"; | |||||
| constexpr char kOneHotOperation[] = "OneHot"; | |||||
| constexpr char kPreBuiltOperation[] = "PreBuilt"; | |||||
| constexpr char kRandomApplyOperation[] = "RandomApply"; | |||||
| constexpr char kRandomChoiceOperation[] = "RandomChoice"; | |||||
| constexpr char kRandomSelectSubpolicyOperation[] = "RandomSelectSubpolicy"; | |||||
| constexpr char kTypeCastOperation[] = "TypeCast"; | |||||
| constexpr char kUniqueOperation[] = "Unique"; | |||||
| // Abstract class to represent a dataset in the data pipeline. | |||||
| class TensorOperation : public std::enable_shared_from_this<TensorOperation> { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| TensorOperation() : random_op_(false) {} | |||||
| /// \brief Constructor | |||||
| explicit TensorOperation(bool random) : random_op_(random) {} | |||||
| /// \brief Destructor | |||||
| ~TensorOperation() = default; | |||||
| /// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object. | |||||
| /// \return shared pointer to the newly created TensorOp. | |||||
| virtual std::shared_ptr<TensorOp> Build() = 0; | |||||
| virtual Status ValidateParams() = 0; | |||||
| virtual std::string Name() const = 0; | |||||
| /// \brief Check whether the operation is deterministic. | |||||
| /// \return true if this op is a random op (returns non-deterministic result e.g. RandomCrop) | |||||
| bool IsRandomOp() const { return random_op_; } | |||||
| protected: | |||||
| bool random_op_; | |||||
| }; | |||||
| // Helper function to validate fill value | |||||
| Status ValidateVectorFillvalue(const std::string &transform_name, const std::vector<uint8_t> &fill_value); | |||||
| // Helper function to validate probability | |||||
| Status ValidateProbability(const std::string &transform_name, const float &probability); | |||||
| // Helper function to validate padding | |||||
| Status ValidateVectorPadding(const std::string &transform_name, const std::vector<int32_t> &padding); | |||||
| // Helper function to validate size | |||||
| Status ValidateVectorPositive(const std::string &transform_name, const std::vector<int32_t> &size); | |||||
| // Helper function to validate transforms | |||||
| Status ValidateVectorTransforms(const std::string &transform_name, | |||||
| const std::vector<std::shared_ptr<TensorOperation>> &transforms); | |||||
| // Helper function to compare float value | |||||
| bool CmpFloat(const float &a, const float &b, float epsilon = 0.0000000001f); | |||||
| // Transform operations for performing data transformation. | |||||
| namespace transforms { | |||||
| // Transform Op classes (in alphabetical order) | |||||
| class ComposeOperation; | |||||
| class DuplicateOperation; | |||||
| class OneHotOperation; | |||||
| class PreBuiltOperation; | |||||
| class RandomApplyOperation; | |||||
| class RandomChoiceOperation; | |||||
| class TypeCastOperation; | |||||
| /// \brief Function to create a Compose TensorOperation. | |||||
| /// \notes Compose a list of transforms into a single transform. | |||||
| /// \param[in] transforms A vector of transformations to be applied. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<ComposeOperation> Compose(const std::vector<std::shared_ptr<TensorOperation>> &transforms); | |||||
| /// \brief Function to create a Duplicate TensorOperation. | |||||
| /// \notes Duplicate the input tensor to a new output tensor. | |||||
| /// The input tensor is carried over to the output list. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<DuplicateOperation> Duplicate(); | |||||
| /// \brief Function to create a OneHot TensorOperation. | |||||
| /// \notes Convert the labels into OneHot format. | |||||
| /// \param[in] num_classes number of classes. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<OneHotOperation> OneHot(int32_t num_classes); | |||||
| /// \brief Function to create a RandomApply TensorOperation. | |||||
| /// \notes Randomly perform a series of transforms with a given probability. | |||||
| /// \param[in] transforms A vector of transformations to be applied. | |||||
| /// \param[in] prob The probability to apply the transformation list (default=0.5) | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<RandomApplyOperation> RandomApply(const std::vector<std::shared_ptr<TensorOperation>> &transforms, | |||||
| double prob = 0.5); | |||||
| /// \brief Function to create a RandomChoice TensorOperation. | |||||
| /// \notes Randomly selects one transform from a list of transforms to perform operation. | |||||
| /// \param[in] transforms A vector of transformations to be chosen from to apply. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<RandomChoiceOperation> RandomChoice(const std::vector<std::shared_ptr<TensorOperation>> &transforms); | |||||
| /// \brief Function to create a TypeCast TensorOperation. | |||||
| /// \notes Tensor operation to cast to a given MindSpore data type. | |||||
| /// \param[in] data_type mindspore.dtype to be cast to. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<TypeCastOperation> TypeCast(std::string data_type); | |||||
| /* ####################################### Derived TensorOperation classes ################################# */ | |||||
| class ComposeOperation : public TensorOperation { | |||||
| public: | |||||
| explicit ComposeOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms); | |||||
| ~ComposeOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kComposeOperation; } | |||||
| private: | |||||
| std::vector<std::shared_ptr<TensorOperation>> transforms_; | |||||
| }; | |||||
| class DuplicateOperation : public TensorOperation { | |||||
| public: | |||||
| DuplicateOperation() = default; | |||||
| ~DuplicateOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kDuplicateOperation; } | |||||
| }; | |||||
| class OneHotOperation : public TensorOperation { | |||||
| public: | |||||
| explicit OneHotOperation(int32_t num_classes_); | |||||
| ~OneHotOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kOneHotOperation; } | |||||
| private: | |||||
| float num_classes_; | |||||
| }; | |||||
| class PreBuiltOperation : public TensorOperation { | |||||
| public: | |||||
| explicit PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op); | |||||
| ~PreBuiltOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kPreBuiltOperation; } | |||||
| private: | |||||
| std::shared_ptr<TensorOp> op_; | |||||
| }; | |||||
| class RandomApplyOperation : public TensorOperation { | |||||
| public: | |||||
| explicit RandomApplyOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms, double prob); | |||||
| ~RandomApplyOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kRandomApplyOperation; } | |||||
| private: | |||||
| std::vector<std::shared_ptr<TensorOperation>> transforms_; | |||||
| double prob_; | |||||
| }; | |||||
| class RandomChoiceOperation : public TensorOperation { | |||||
| public: | |||||
| explicit RandomChoiceOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms); | |||||
| ~RandomChoiceOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kRandomChoiceOperation; } | |||||
| private: | |||||
| std::vector<std::shared_ptr<TensorOperation>> transforms_; | |||||
| }; | |||||
| class TypeCastOperation : public TensorOperation { | |||||
| public: | |||||
| explicit TypeCastOperation(std::string data_type); | |||||
| ~TypeCastOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kTypeCastOperation; } | |||||
| private: | |||||
| std::string data_type_; | |||||
| }; | |||||
| } // namespace transforms | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_ | |||||
| @@ -0,0 +1,198 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_ | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "include/transforms.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Transform operations for performing computer vision. | |||||
| namespace vision { | |||||
| // Char arrays storing name of corresponding classes (in alphabetical order) | |||||
| constexpr char kCenterCropOperation[] = "CenterCrop"; | |||||
| constexpr char kCropOperation[] = "Crop"; | |||||
| constexpr char kDecodeOperation[] = "Decode"; | |||||
| constexpr char kNormalizeOperation[] = "Normalize"; | |||||
| constexpr char kResizeOperation[] = "Resize"; | |||||
| constexpr char kRotateOperation[] = "Rotate"; | |||||
| // Transform Op classes (in alphabetical order) | |||||
| class CenterCropOperation; | |||||
| class CropOperation; | |||||
| class DecodeOperation; | |||||
| class NormalizeOperation; | |||||
| class ResizeOperation; | |||||
| class RotateOperation; | |||||
| /// \brief Function to create a CenterCrop TensorOperation. | |||||
| /// \notes Crops the input image at the center to the given size. | |||||
| /// \param[in] size A vector representing the output size of the cropped image. | |||||
| /// If size is a single value, a square crop of size (size, size) is returned. | |||||
| /// If size has 2 values, it should be (height, width). | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size); | |||||
| /// \brief Function to create a Crop TensorOp | |||||
| /// \notes Crop an image based on location and crop size | |||||
| /// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor} | |||||
| /// \param[in] size Size of the cropped area. | |||||
| /// If size is a single value, a square crop of size (size, size) is returned. | |||||
| /// If size has 2 values, it should be (height, width). | |||||
| /// \return Shared pointer to the current TensorOp | |||||
| std::shared_ptr<CropOperation> Crop(std::vector<int32_t> coordinates, std::vector<int32_t> size); | |||||
| /// \brief Function to create a Decode TensorOperation. | |||||
| /// \notes Decode the input image in RGB mode. | |||||
| /// \param[in] rgb A boolean of whether to decode in RGB mode or not. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<DecodeOperation> Decode(bool rgb = true); | |||||
| /// \brief Function to create a Normalize TensorOperation. | |||||
| /// \notes Normalize the input image with respect to mean and standard deviation. | |||||
| /// \param[in] mean A vector of mean values for each channel, w.r.t channel order. | |||||
| /// The mean values must be in range [0.0, 255.0]. | |||||
| /// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order. | |||||
| /// The standard deviation values must be in range (0.0, 255.0] | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std); | |||||
| /// \brief Function to create a Resize TensorOperation. | |||||
| /// \notes Resize the input image to the given size. | |||||
| /// \param[in] size A vector representing the output size of the resized image. | |||||
| /// If size is a single value, the image will be resized to this value with | |||||
| /// the same image aspect ratio. If size has 2 values, it should be (height, width). | |||||
| /// \param[in] interpolation An enum for the mode of interpolation | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, | |||||
| InterpolationMode interpolation = InterpolationMode::kLinear); | |||||
| /// \brief Applies an rotate transformation to an image. | |||||
| /// \notes Rotate the input image using a specified angle id. | |||||
| /// \return Shared pointer to the current TensorOperation. | |||||
| std::shared_ptr<RotateOperation> Rotate(); | |||||
| class CenterCropOperation : public TensorOperation { | |||||
| public: | |||||
| explicit CenterCropOperation(std::vector<int32_t> size); | |||||
| ~CenterCropOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kCenterCropOperation; } | |||||
| private: | |||||
| std::vector<int32_t> size_; | |||||
| }; | |||||
| class CropOperation : public TensorOperation { | |||||
| public: | |||||
| CropOperation(std::vector<int32_t> coordinates, std::vector<int32_t> size); | |||||
| ~CropOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kCropOperation; } | |||||
| private: | |||||
| std::vector<int32_t> coordinates_; | |||||
| std::vector<int32_t> size_; | |||||
| }; | |||||
| class DecodeOperation : public TensorOperation { | |||||
| public: | |||||
| explicit DecodeOperation(bool rgb = true); | |||||
| ~DecodeOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kDecodeOperation; } | |||||
| private: | |||||
| bool rgb_; | |||||
| }; | |||||
| class NormalizeOperation : public TensorOperation { | |||||
| public: | |||||
| NormalizeOperation(std::vector<float> mean, std::vector<float> std); | |||||
| ~NormalizeOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kNormalizeOperation; } | |||||
| private: | |||||
| std::vector<float> mean_; | |||||
| std::vector<float> std_; | |||||
| }; | |||||
| class ResizeOperation : public TensorOperation { | |||||
| public: | |||||
| explicit ResizeOperation(std::vector<int32_t> size, | |||||
| InterpolationMode interpolation_mode = InterpolationMode::kLinear); | |||||
| ~ResizeOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kResizeOperation; } | |||||
| private: | |||||
| std::vector<int32_t> size_; | |||||
| InterpolationMode interpolation_; | |||||
| }; | |||||
| class RotateOperation : public TensorOperation { | |||||
| public: | |||||
| RotateOperation(); | |||||
| ~RotateOperation() = default; | |||||
| std::shared_ptr<TensorOp> Build() override; | |||||
| Status ValidateParams() override; | |||||
| std::string Name() const override { return kRotateOperation; } | |||||
| void setAngle(uint64_t angle_id); | |||||
| private: | |||||
| std::shared_ptr<TensorOp> rotate_op; | |||||
| }; | |||||
| } // namespace vision | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_ | |||||
| @@ -14,6 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "minddata/dataset/util/task.h" | #include "minddata/dataset/util/task.h" | ||||
| #include <unistd.h> | |||||
| #include "utils/ms_utils.h" | #include "utils/ms_utils.h" | ||||
| #include "minddata/dataset/util/log_adapter.h" | #include "minddata/dataset/util/log_adapter.h" | ||||
| #include "minddata/dataset/util/task_manager.h" | #include "minddata/dataset/util/task_manager.h" | ||||
| @@ -99,233 +99,148 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/util MINDDATA_UTIL_SRC_FILES) | |||||
| AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES) | AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES) | ||||
| if(BUILD_MINDDATA STREQUAL "full") | |||||
| set(BUILD_MINDDATA "wrapper") | |||||
| endif() | |||||
| if(BUILD_MINDDATA STREQUAL "full") | if(BUILD_MINDDATA STREQUAL "full") | ||||
| include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image") | |||||
| list(REMOVE_ITEM MINDDATA_API_SRC_FILES | |||||
| "${MINDDATA_DIR}/api/text.cc" | |||||
| "${MINDDATA_DIR}/api/minddata_eager.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_CALLBACK_SRC_FILES | |||||
| "${MINDDATA_DIR}/callback/py_ds_callback.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES | |||||
| "${MINDDATA_DIR}/core/cv_tensor.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_KERNELS_SRC_FILES "${MINDDATA_DIR}/kernels/py_func_op.cc") | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/datasetops/build_sentence_piece_vocab_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/filter_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/barrier_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/bucket_batch_by_length_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/build_vocab_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/cache_merge_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/cache_base_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/cache_lookup_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/cache_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/concat_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/rename_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/skip_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/take_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/zip_op.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/opt/post/generator_node_pass.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_OPT_PRE_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc" | |||||
| "${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_CACHE_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_KERNELS_IMAGE_SRC_FILES | |||||
| "${MINDDATA_DIR}/kernels/image/affine_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/bounding_box_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/concatenate_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/equalize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/image_utils.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/invert_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/math_utils.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/pad_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/posterize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/normalize_pad_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_affine_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rescale_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/sharpness_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/solarize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_color_op.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/bucket_batch_by_length_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/build_sentence_piece_vocab_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/build_vocab_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/filter_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/sync_wait_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/skip_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/take_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/transfer_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/zip_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/rename_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/concat_node.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_CONSUMERS_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/consumers/python_tree_consumer.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_SRC_FILES | |||||
| "${MINDDATA_DIR}/engine/python_runtime_context.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_KERNELS_DATA_SRC_FILES | |||||
| "${MINDDATA_DIR}/kernels/data/unique_op.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_UTIL_SRC_FILES | |||||
| "${MINDDATA_DIR}/util/numa_interface.cc" | |||||
| ) | |||||
| include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache") | |||||
| if(BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) | |||||
| set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) | |||||
| endif() | |||||
| include_directories("${MINDDATA_DIR}/kernels/image") | |||||
| include_directories("${MINDDATA_DIR}/liteapi") | |||||
| include_directories("${TOP_DIR}") | |||||
| set(MINDDATA_FULL_SRC | |||||
| ${TOP_DIR}/mindspore/lite/src/cxx_api/types.cc | |||||
| ${TOP_DIR}/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc | |||||
| ${TOP_DIR}/mindspore/lite/src/tensor.cc | |||||
| ${CORE_DIR}/utils/status.cc | |||||
| ${MINDDATA_DIR}/api/datasets.cc | |||||
| ${MINDDATA_DIR}/kernels/data/data_utils.cc | |||||
| ${MINDDATA_DIR}/api/samplers.cc | |||||
| ${MINDDATA_DIR}/api/iterator.cc | |||||
| ${MINDDATA_DIR}/api/execute.cc | |||||
| ${MINDDATA_DIR}/core/de_tensor.cc | |||||
| ${MINDDATA_DIR}/core/tensor_shape.cc | |||||
| ${MINDDATA_DIR}/util/memory_pool.cc | |||||
| ${MINDDATA_DIR}/core/config_manager.cc | |||||
| ${MINDDATA_DIR}/core/data_type.cc | |||||
| ${MINDDATA_DIR}/core/tensor_helpers.cc | |||||
| ${MINDDATA_DIR}/core/tensor.cc | |||||
| ${MINDDATA_DIR}/core/global_context.cc | |||||
| ${MINDDATA_DIR}/core/client.cc | |||||
| ${MINDDATA_DIR}/engine/consumers/tree_consumer.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/dataset_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/epoch_ctrl_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/batch_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/map_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/root_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/repeat_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/project_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/shuffle_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/source/album_node.cc | |||||
| ${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/dataset_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/repeat_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/epoch_ctrl_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/device_queue_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/project_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/shuffle_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/pipeline_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/batch_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/parallel_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/map_op/map_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/map_op/cpu_map_job.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/album_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/io_block.cc | |||||
| ${MINDDATA_DIR}/engine/opt/pre/getter_pass.cc | |||||
| ${MINDDATA_DIR}/engine/opt/pre/input_validation_pass.cc | |||||
| ${MINDDATA_DIR}/engine/opt/pre/cache_validation_pass.cc | |||||
| ${MINDDATA_DIR}/engine/opt/pre/node_removal_pass.cc | |||||
| ${MINDDATA_DIR}/engine/opt/pre/epoch_ctrl_pass.cc | |||||
| ${MINDDATA_DIR}/engine/opt/pre/deep_copy_pass.cc | |||||
| ${MINDDATA_DIR}/engine/opt/post/auto_worker_pass.cc | |||||
| ${MINDDATA_DIR}/engine/opt/pass.cc | |||||
| ${MINDDATA_DIR}/engine/perf/profiling.cc | |||||
| ${MINDDATA_DIR}/engine/perf/monitor.cc | |||||
| ${MINDDATA_DIR}/engine/perf/device_queue_tracing.cc | |||||
| ${MINDDATA_DIR}/engine/perf/connector_size.cc | |||||
| ${MINDDATA_DIR}/engine/perf/connector_throughput.cc | |||||
| ${MINDDATA_DIR}/engine/perf/dataset_iterator_tracing.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/sampler.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/subset_sampler.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/distributed_sampler.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/pk_sampler.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/random_sampler.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/sequential_sampler.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/subset_random_sampler.cc | |||||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/weighted_random_sampler.cc | |||||
| ${MINDDATA_DIR}/engine/runtime_context.cc | |||||
| ${MINDDATA_DIR}/engine/tree_adapter.cc | |||||
| ${MINDDATA_DIR}/engine/data_buffer.cc | |||||
| ${MINDDATA_DIR}/engine/execution_tree.cc | |||||
| ${MINDDATA_DIR}/engine/dataset_iterator.cc | |||||
| ${MINDDATA_DIR}/core/tensor_row.cc | |||||
| ${MINDDATA_DIR}/api/vision.cc | |||||
| ${MINDDATA_DIR}/api/transforms.cc | |||||
| ${MINDDATA_DIR}/util/path.cc | |||||
| ${MINDDATA_DIR}/util/status.cc | |||||
| ${MINDDATA_DIR}/util/service.cc | |||||
| ${MINDDATA_DIR}/util/data_helper.cc | |||||
| ${MINDDATA_DIR}/util/cond_var.cc | |||||
| ${MINDDATA_DIR}/engine/data_schema.cc | |||||
| ${MINDDATA_DIR}/kernels/tensor_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/lite_image_utils.cc | |||||
| ${MINDDATA_DIR}/kernels/image/center_crop_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/crop_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/normalize_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/resize_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/rotate_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/compose_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/duplicate_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/one_hot_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/random_apply_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/random_choice_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/type_cast_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/exif_utils.cc | |||||
| ${MINDDATA_DIR}/callback/callback_manager.cc | |||||
| ${MINDDATA_DIR}/util/task_manager.cc | |||||
| ${MINDDATA_DIR}/util/services.cc | |||||
| ${MINDDATA_DIR}/util/wait_post.cc | |||||
| ${MINDDATA_DIR}/util/task.cc | |||||
| ${MINDDATA_DIR}/util/circular_pool.cc | |||||
| ${MINDDATA_DIR}/util/lock.cc | |||||
| ${MINDDATA_DIR}/util/wait_post.cc | |||||
| ${MINDDATA_DIR}/util/intrp_service.cc | |||||
| ${MINDDATA_DIR}/util/arena.cc | |||||
| ) | |||||
| add_library(minddata-lite SHARED | add_library(minddata-lite SHARED | ||||
| ${MINDDATA_API_SRC_FILES} | |||||
| ${MINDDATA_CALLBACK_SRC_FILES} | |||||
| ${MINDDATA_CORE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_SRC_FILES} | |||||
| #${MINDDATA_ENGINE_CACHE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_CONSUMERS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_CACHE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_POST_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_PRE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_PERF_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} | |||||
| ${MINDDATA_KERNELS_IMAGE_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_DATA_SRC_FILES} | |||||
| ${MINDDATA_UTIL_SRC_FILES} | |||||
| ${MINDDATA_EXAMPLE_SRC} | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc | |||||
| ${CORE_DIR}/utils/ms_utils.cc | |||||
| ) | |||||
| ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc | |||||
| ${CORE_DIR}/utils/ms_utils.cc | |||||
| ${MINDDATA_FULL_SRC} | |||||
| ) | |||||
| find_package(Threads REQUIRED) | find_package(Threads REQUIRED) | ||||
| target_link_libraries(minddata-lite | target_link_libraries(minddata-lite | ||||
| securec | |||||
| mindspore::jpeg_turbo | |||||
| mindspore::turbojpeg | |||||
| mindspore::json | |||||
| Threads::Threads | |||||
| ) | |||||
| securec | |||||
| mindspore::jpeg_turbo | |||||
| mindspore::turbojpeg | |||||
| mindspore::json | |||||
| Threads::Threads | |||||
| ) | |||||
| # ref: https://github.com/android/ndk/issues/1202 | # ref: https://github.com/android/ndk/issues/1202 | ||||
| if(PLATFORM_ARM32) | if(PLATFORM_ARM32) | ||||
| file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a) | |||||
| if(LIBCLANG_RT_LIB STREQUAL "") | |||||
| MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}") | |||||
| endif() | |||||
| target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB}) | |||||
| file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a) | |||||
| if(LIBCLANG_RT_LIB STREQUAL "") | |||||
| MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}") | |||||
| endif() | |||||
| target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB}) | |||||
| endif() | endif() | ||||
| if(PLATFORM_ARM32 OR PLATFORM_ARM64) | if(PLATFORM_ARM32 OR PLATFORM_ARM64) | ||||
| target_link_libraries(minddata-lite log) | |||||
| elseif(BUILD_MINDDATA_EXAMPLE) | |||||
| endif() | |||||
| target_link_libraries(minddata-lite log) | |||||
| elseif() | |||||
| endif() | |||||
| elseif(BUILD_MINDDATA STREQUAL "wrapper") | elseif(BUILD_MINDDATA STREQUAL "wrapper") | ||||
| include_directories("${MINDDATA_DIR}/kernels/image") | include_directories("${MINDDATA_DIR}/kernels/image") | ||||
| include_directories("${MINDDATA_DIR}/util") | include_directories("${MINDDATA_DIR}/util") | ||||
| @@ -0,0 +1,22 @@ | |||||
| cmake_minimum_required(VERSION 3.14.1) | |||||
| project(testlenet) | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror | |||||
| -Wall -Wno-deprecated-declarations -fPIC") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare") | |||||
| set(DepDIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.1.0-inference-linux-x64/minddata") | |||||
| include_directories(${DepDIR}) | |||||
| add_executable(testlenet | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/testlenet.cpp | |||||
| ) | |||||
| target_link_libraries(testlenet | |||||
| ${DepDIR}/lib/libminddata-lite.so | |||||
| ${DepDIR}/third_party/libjpeg-turbo/lib/libjpeg.so.62 | |||||
| ${DepDIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so.0 | |||||
| pthread) | |||||
| @@ -0,0 +1,62 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <sys/stat.h> | |||||
| #include <unistd.h> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <set> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include "include/datasets.h" | |||||
| #include "include/iterator.h" | |||||
| #include "include/vision_lite.h" | |||||
| #include "include/transforms.h" | |||||
| #include "include/tensor.h" | |||||
| using mindspore::dataset::Dataset; | |||||
| using mindspore::dataset::Iterator; | |||||
| using mindspore::dataset::Mnist; | |||||
| using mindspore::dataset::Tensor; | |||||
| using mindspore::dataset::TensorOperation; | |||||
| int main(int argc, char **argv) { | |||||
| std::string folder_path = "./testMnistData/"; | |||||
| std::shared_ptr<Dataset> ds = Mnist(folder_path, "all"); | |||||
| std::shared_ptr<TensorOperation> resize = mindspore::dataset::vision::Resize({32, 32}); | |||||
| ds = ds->Map({resize}); | |||||
| ds->Shuffle(2); | |||||
| ds->Batch(2); | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||||
| iter->GetNextRow(&row); | |||||
| uint64_t i = 0; | |||||
| while (row.size() != 0) { | |||||
| i++; | |||||
| auto image = row["image"]; | |||||
| iter->GetNextRow(&row); | |||||
| } | |||||
| iter->Stop(); | |||||
| } | |||||