| @@ -49,7 +49,7 @@ usage() | |||
| echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | |||
| echo " -D Enable dumping of function graph ir, default on" | |||
| echo " -z Compile dataset & mindrecord, default on" | |||
| echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv mode in lite predict" | |||
| echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv, wrapper mode in lite predict" | |||
| echo " -M Enable MPI and NCCL for GPU training, gpu default on" | |||
| echo " -V Specify the minimum required cuda version, default CUDA 10.1" | |||
| echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation" | |||
| @@ -129,7 +129,7 @@ checkopts() | |||
| DEBUG_MODE="on" | |||
| ;; | |||
| n) | |||
| if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" ]]; then | |||
| if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" || "X$OPTARG" == "Xwrapper" ]]; then | |||
| COMPILE_MINDDATA_LITE="$OPTARG" | |||
| else | |||
| echo "Invalid value ${OPTARG} for option -n" | |||
| @@ -678,7 +678,7 @@ build_lite() | |||
| build_gtest | |||
| fi | |||
| if [ "${COMPILE_MINDDATA_LITE}" == "lite" ] || [ "${COMPILE_MINDDATA_LITE}" == "full" ]; then | |||
| if [[ "${COMPILE_MINDDATA_LITE}" == "lite" || "${COMPILE_MINDDATA_LITE}" == "full" || "${COMPILE_MINDDATA_LITE}" == "wrapper" ]]; then | |||
| build_minddata_lite_deps | |||
| fi | |||
| @@ -20,7 +20,7 @@ set(OPENCV_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/minddata/third_part | |||
| set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf) | |||
| set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers) | |||
| if (BUILD_MINDDATA STREQUAL "full") | |||
| if (BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") | |||
| install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") | |||
| if (PLATFORM_ARM64) | |||
| install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${COMPONENT_NAME}) | |||
| @@ -28,7 +28,7 @@ set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION}) | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") | |||
| set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv or full") | |||
| set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full") | |||
| set(BUILD_LITE "on") | |||
| set(PLATFORM_ARM "off") | |||
| if (PLATFORM_ARM64 OR PLATFORM_ARM32) | |||
| @@ -182,7 +182,7 @@ if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64) | |||
| endif () | |||
| endif () | |||
| if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full") | |||
| if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") | |||
| # add sentencepiece dependency | |||
| # include(${TOP_DIR}/cmake/external_libs/sentencepiece.cmake) | |||
| # json | |||
| @@ -81,6 +81,12 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/util MINDDATA_UTIL_SRC_FILES) | |||
| AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES) | |||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||
| if (BUILD_MINDDATA STREQUAL "full") | |||
| set(BUILD_MINDDATA "wrapper") | |||
| endif () | |||
| endif () | |||
| if (BUILD_MINDDATA STREQUAL "full") | |||
| include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image") | |||
| list(REMOVE_ITEM MINDDATA_API_SRC_FILES | |||
| @@ -114,102 +120,102 @@ if (BUILD_MINDDATA STREQUAL "full") | |||
| ) | |||
| list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES | |||
| "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||
| ) | |||
| "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc" | |||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||
| ) | |||
| list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES | |||
| "${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc" | |||
| ) | |||
| "${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc" | |||
| ) | |||
| list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES | |||
| "${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc" | |||
| ) | |||
| "${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc" | |||
| ) | |||
| list(REMOVE_ITEM MINDDATA_ENGINE_OPT_PRE_SRC_FILES | |||
| "${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc" | |||
| "${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc" | |||
| ) | |||
| "${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc" | |||
| "${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc" | |||
| ) | |||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_CACHE_SRC_FILES | |||
| "${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc" | |||
| "${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc" | |||
| ) | |||
| "${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc" | |||
| "${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc" | |||
| ) | |||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc" | |||
| ) | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc" | |||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc" | |||
| ) | |||
| list(REMOVE_ITEM MINDDATA_KERNELS_IMAGE_SRC_FILES | |||
| "${MINDDATA_DIR}/kernels/image/affine_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/bounding_box_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/concatenate_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/equalize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/image_utils.cc" | |||
| "${MINDDATA_DIR}/kernels/image/invert_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/math_utils.cc" | |||
| "${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/pad_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/posterize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_affine_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_resize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/rescale_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/sharpness_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/solarize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_color_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/affine_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/bounding_box_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/concatenate_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/equalize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/image_utils.cc" | |||
| "${MINDDATA_DIR}/kernels/image/invert_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/math_utils.cc" | |||
| "${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/pad_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/posterize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_affine_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_resize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/rescale_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/sharpness_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/solarize_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc" | |||
| "${MINDDATA_DIR}/kernels/image/random_color_op.cc" | |||
| ) | |||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES | |||
| @@ -239,47 +245,114 @@ if (BUILD_MINDDATA STREQUAL "full") | |||
| include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache") | |||
| if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) | |||
| set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) | |||
| set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) | |||
| endif () | |||
| add_library(minddata-lite SHARED | |||
| ${MINDDATA_API_SRC_FILES} | |||
| ${MINDDATA_CALLBACK_SRC_FILES} | |||
| ${MINDDATA_CORE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_SRC_FILES} | |||
| #${MINDDATA_ENGINE_CACHE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_CONSUMERS_SRC_FILES} | |||
| ${MINDDATA_ENGINE_DATASETOPS_SRC_FILES} | |||
| ${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES} | |||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} | |||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES} | |||
| ${MINDDATA_ENGINE_IR_CACHE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_POST_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_PRE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES} | |||
| ${MINDDATA_ENGINE_PERF_SRC_FILES} | |||
| ${MINDDATA_KERNELS_SRC_FILES} | |||
| ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} | |||
| ${MINDDATA_KERNELS_IMAGE_SRC_FILES} | |||
| ${MINDDATA_KERNELS_DATA_SRC_FILES} | |||
| ${MINDDATA_UTIL_SRC_FILES} | |||
| ${MINDDATA_EXAMPLE_SRC} | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc | |||
| ${CORE_DIR}/utils/ms_utils.cc | |||
| ) | |||
| ${MINDDATA_CALLBACK_SRC_FILES} | |||
| ${MINDDATA_CORE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_SRC_FILES} | |||
| #${MINDDATA_ENGINE_CACHE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_CONSUMERS_SRC_FILES} | |||
| ${MINDDATA_ENGINE_DATASETOPS_SRC_FILES} | |||
| ${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES} | |||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} | |||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES} | |||
| ${MINDDATA_ENGINE_IR_CACHE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_POST_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_PRE_SRC_FILES} | |||
| ${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES} | |||
| ${MINDDATA_ENGINE_PERF_SRC_FILES} | |||
| ${MINDDATA_KERNELS_SRC_FILES} | |||
| ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} | |||
| ${MINDDATA_KERNELS_IMAGE_SRC_FILES} | |||
| ${MINDDATA_KERNELS_DATA_SRC_FILES} | |||
| ${MINDDATA_UTIL_SRC_FILES} | |||
| ${MINDDATA_EXAMPLE_SRC} | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc | |||
| ${CORE_DIR}/utils/ms_utils.cc | |||
| ) | |||
| find_package(Threads REQUIRED) | |||
| target_link_libraries(minddata-lite | |||
| securec | |||
| jpeg-turbo | |||
| jpeg | |||
| mindspore::json | |||
| Threads::Threads | |||
| ) | |||
| securec | |||
| jpeg-turbo | |||
| jpeg | |||
| mindspore::json | |||
| Threads::Threads | |||
| ) | |||
| # ref: https://github.com/android/ndk/issues/1202 | |||
| if (PLATFORM_ARM32) | |||
| file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a) | |||
| if (LIBCLANG_RT_LIB STREQUAL "") | |||
| MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}") | |||
| endif() | |||
| target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB}) | |||
| endif() | |||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||
| target_link_libraries(minddata-lite log) | |||
| elseif (BUILD_MINDDATA_EXAMPLE) | |||
| endif() | |||
| elseif (BUILD_MINDDATA STREQUAL "wrapper") | |||
| include_directories("${MINDDATA_DIR}/kernels/image") | |||
| include_directories("${MINDDATA_DIR}/util") | |||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/wrapper) | |||
| set(MINDDATA_TODAPI_SRC | |||
| ${MINDDATA_DIR}/core/tensor_shape.cc | |||
| ${MINDDATA_DIR}/core/tensor.cc | |||
| ${MINDDATA_DIR}/core/config_manager.cc | |||
| ${MINDDATA_DIR}/core/data_type.cc | |||
| ${MINDDATA_DIR}/core/tensor_helpers.cc | |||
| ${MINDDATA_DIR}/core/global_context.cc | |||
| ${MINDDATA_DIR}/core/tensor_row.cc | |||
| ${MINDDATA_DIR}/api/vision.cc | |||
| ${MINDDATA_DIR}/api/execute.cc | |||
| ${MINDDATA_DIR}/api/transforms.cc | |||
| ${MINDDATA_DIR}/api/de_tensor.cc | |||
| ${MINDDATA_DIR}/util/path.cc | |||
| ${MINDDATA_DIR}/util/status.cc | |||
| ${MINDDATA_DIR}/util/data_helper.cc | |||
| ${MINDDATA_DIR}/util/memory_pool.cc | |||
| ${MINDDATA_DIR}/engine/data_schema.cc | |||
| ${MINDDATA_DIR}/kernels/tensor_op.cc | |||
| ${MINDDATA_DIR}/kernels/image/lite_image_utils.cc | |||
| ${MINDDATA_DIR}/kernels/image/center_crop_op.cc | |||
| ${MINDDATA_DIR}/kernels/image/crop_op.cc | |||
| ${MINDDATA_DIR}/kernels/image/normalize_op.cc | |||
| ${MINDDATA_DIR}/kernels/image/resize_op.cc | |||
| ${MINDDATA_DIR}/kernels/data/compose_op.cc | |||
| ${MINDDATA_DIR}/kernels/data/duplicate_op.cc | |||
| ${MINDDATA_DIR}/kernels/data/one_hot_op.cc | |||
| ${MINDDATA_DIR}/kernels/data/random_apply_op.cc | |||
| ${MINDDATA_DIR}/kernels/data/random_choice_op.cc | |||
| ${MINDDATA_DIR}/kernels/data/type_cast_op.cc | |||
| ${MINDDATA_DIR}/kernels/data/data_utils.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/album_op_android.cc | |||
| ) | |||
| add_library(minddata-lite SHARED | |||
| ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc | |||
| ${CORE_DIR}/utils/ms_utils.cc | |||
| ${MINDDATA_TODAPI_SRC} | |||
| ) | |||
| find_package(Threads REQUIRED) | |||
| target_link_libraries(minddata-lite | |||
| securec | |||
| jpeg-turbo | |||
| jpeg | |||
| mindspore::json | |||
| Threads::Threads | |||
| ) | |||
| # ref: https://github.com/android/ndk/issues/1202 | |||
| if (PLATFORM_ARM32) | |||
| @@ -293,11 +366,6 @@ if (BUILD_MINDDATA STREQUAL "full") | |||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||
| target_link_libraries(minddata-lite log) | |||
| elseif (BUILD_MINDDATA_EXAMPLE) | |||
| # add_executable(mdlite-example ${CMAKE_CURRENT_SOURCE_DIR}/example/x86-example.cc) | |||
| # target_link_libraries(mdlite-example minddata-lite) | |||
| # add_custom_command(TARGET mdlite-example POST_BUILD | |||
| # COMMAND cp -rf ${CMAKE_CURRENT_SOURCE_DIR}/example/testCifar10Data ${CMAKE_BINARY_DIR}/minddata | |||
| # ) | |||
| endif() | |||
| elseif (BUILD_MINDDATA STREQUAL "lite") | |||
| list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES "${MINDDATA_DIR}/core/client.cc") | |||
| @@ -374,9 +442,6 @@ elseif (BUILD_MINDDATA STREQUAL "lite") | |||
| securec | |||
| jpeg-turbo | |||
| jpeg | |||
| # opencv_core | |||
| # opencv_imgcodecs | |||
| # opencv_imgproc | |||
| mindspore::json | |||
| ) | |||
| @@ -13,7 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "MDToDApi.h" | |||
| #include "MDToDApi.h" //NOLINT | |||
| #include <string> | |||
| #include <fstream> | |||
| @@ -22,7 +22,8 @@ | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "minddata/dataset/include/datasets.h" | |||
| #include "album_op_android.h" //NOLINT | |||
| #include "minddata/dataset/include/execute.h" | |||
| #include "minddata/dataset/util/path.h" | |||
| #include "minddata/dataset/include/vision.h" | |||
| @@ -35,7 +36,7 @@ | |||
| using mindspore::dataset::Path; | |||
| using mindspore::dataset::Tensor; | |||
| using mindspore::dataset; | |||
| using TensorOperation = mindspore::dataset::TensorOperation; | |||
| using mindspore::LogStream; | |||
| using mindspore::MsLogLevel::DEBUG; | |||
| @@ -48,22 +49,21 @@ using mindspore::dataset::Status; | |||
| class MDToDApi { | |||
| public: | |||
| std::shared_ptr<Dataset> _ds; | |||
| std::shared_ptr<Iterator> _iter; | |||
| std::shared_ptr<mindspore::dataset::AlbumOp> _iter; | |||
| std::vector<std::shared_ptr<TensorOperation>> _augs; | |||
| std::string _storage_folder; | |||
| std::string _folder_path; | |||
| bool _hasBatch; | |||
| int64_t _file_id; | |||
| MDToDApi() : _ds(nullptr), _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { | |||
| MS_LOG(WARNING) << "MDToDAPI Call constructor"; | |||
| public: | |||
| MDToDApi() : _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { | |||
| MS_LOG(WARNING) << "MDToDAPI Call constractor"; | |||
| } | |||
| ~MDToDApi() { | |||
| MS_LOG(WARNING) << "MDToDAPI Call destructor"; | |||
| MS_LOG(WARNING) << "MDToDAPI Call destractor"; | |||
| // derefernce dataset and iterator | |||
| _augs.clear(); | |||
| _ds = nullptr; | |||
| _iter = nullptr; | |||
| } | |||
| }; | |||
| @@ -79,7 +79,9 @@ std::vector<std::string> MDToDBuffToVector(MDToDBuff_t StrBuff) { | |||
| return strVector; | |||
| } | |||
| extern "C" int MDToDApi_pathTest(const char *path) { | |||
| extern "C" | |||
| int MDToDApi_pathTest(const char* path) { | |||
| Path f(path); | |||
| MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath(); | |||
| // Print out the first few items in the directory | |||
| @@ -114,36 +116,31 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { | |||
| if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) { | |||
| std::vector<int> Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2); | |||
| std::shared_ptr<TensorOperation> resize_op = vision::Resize(Resize); | |||
| std::shared_ptr<TensorOperation> resize_op = mindspore::dataset::vision::Resize(Resize); | |||
| assert(resize_op != nullptr); | |||
| MS_LOG(WARNING) << "Push back resize"; | |||
| mapOperations.push_back(resize_op); | |||
| // hasBatch = true; Batch not currently supported inMInddata-Lite | |||
| } | |||
| if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) { | |||
| std::vector<int> Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2); | |||
| std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop(Crop); | |||
| std::shared_ptr<TensorOperation> center_crop_op = mindspore::dataset::vision::CenterCrop(Crop); | |||
| assert(center_crop_op != nullptr); | |||
| MS_LOG(WARNING) << "Push back crop"; | |||
| mapOperations.push_back(center_crop_op); | |||
| // hasBatch = true; Batch not currently supported inMInddata-Lite | |||
| } | |||
| } | |||
| std::shared_ptr<Dataset> ds = nullptr; | |||
| MS_LOG(INFO) << "Read id =" << MDConf.fileid << " (-1) for all"; | |||
| MS_LOG(INFO) << "Read id=" << MDConf.fileid << " (-1) for all"; | |||
| std::shared_ptr<mindspore::dataset::AlbumOp> iter = nullptr; | |||
| const std::set<std::string> exts = {}; | |||
| if (MDConf.fileid > -1) { | |||
| // read specific image using SequentialSampler | |||
| ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(MDConf.fileid, 1L)); | |||
| // read specific image using SequentialSampler witn | |||
| iter = std::make_shared<mindspore::dataset::AlbumOp>(folder_path, true, schema_file, exts, MDConf.fileid); | |||
| } else { | |||
| // Distributed sampler takes num_shards then shard_id | |||
| ds = Album(folder_path, schema_file, column_names, true, SequentialSampler()); | |||
| iter = std::make_shared<mindspore::dataset::AlbumOp>(folder_path, true, schema_file, exts); | |||
| } | |||
| ds = ds->SetNumWorkers(1); | |||
| assert(ds != nullptr); | |||
| // Create a Repeat operation on ds | |||
| int32_t repeat_num = 1; | |||
| ds = ds->Repeat(repeat_num); | |||
| assert(ds != nullptr); | |||
| // Create objects for the tensor ops | |||
| MS_LOG(INFO) << " Create pipline parameters"; | |||
| @@ -154,16 +151,7 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { | |||
| } | |||
| bool hasBatch = false; | |||
| // Create an iterator over the result of the above dataset | |||
| // This will trigger the creation of the Execution Tree and launch it. | |||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||
| if (nullptr == iter) { | |||
| MS_LOG(ERROR) << "Iterator creation failed"; | |||
| return nullptr; | |||
| } | |||
| assert(iter != nullptr); | |||
| MDToDApi *pMDToDApi = new MDToDApi; | |||
| pMDToDApi->_ds = ds; | |||
| pMDToDApi->_iter = iter; | |||
| pMDToDApi->_augs = mapOperations; | |||
| pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath); | |||
| @@ -173,11 +161,11 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { | |||
| } | |||
| template <typename T> | |||
| void MDBuffToVector(MDToDBuff_t MDBuff, std::vector<T> *vec) { | |||
| vec.clear(); | |||
| void MDBuffToVector(const MDToDBuff_t MDBuff, std::vector<T> *vec) { | |||
| vec->clear(); | |||
| if (MDBuff.DataSize > 0) { | |||
| int nofElements = MDBuff.DataSize / sizeof(T); | |||
| *vec.assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements); | |||
| vec->assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements); | |||
| } | |||
| } | |||
| @@ -217,7 +205,7 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro | |||
| resBuff->TensorSize[0] = 1; | |||
| } | |||
| if (column->shape()[firstDim] > 0) { | |||
| if (DataType::DE_STRING == column->type()) { | |||
| if (mindspore::dataset::DataType::DE_STRING == column->type()) { | |||
| std::string str; | |||
| for (int ix = 0; ix < column->shape()[firstDim]; ix++) { | |||
| std::string_view strView; | |||
| @@ -238,14 +226,14 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro | |||
| MS_LOG(ERROR) << "memcpy_s return: " << ret; | |||
| } | |||
| } else { | |||
| DataHelper dh; | |||
| mindspore::dataset::DataHelper dh; | |||
| resBuff->DataSize = | |||
| dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize); | |||
| } | |||
| MS_LOG(INFO) << columnName << " " << resBuff->DataSize | |||
| << " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") "; | |||
| if (0 == resBuff->DataSize) { | |||
| MS_LOG(ERROR) << "Copy Failed!!!! " << columnName << " Too large" | |||
| MS_LOG(ERROR) << "COPY FAIL!!!! " << columnName << " Too large" | |||
| << "."; // memcpy failed | |||
| } | |||
| } else { | |||
| @@ -259,7 +247,7 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro | |||
| extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | |||
| MS_LOG(INFO) << "Start GetNext"; | |||
| if (pMDToDApi == nullptr) { | |||
| MS_LOG(ERROR) << "GetNext called with nullptr. Abort"; | |||
| MS_LOG(ERROR) << "GetNext called with null ptr. abort"; | |||
| assert(pMDToDApi != nullptr); | |||
| } | |||
| @@ -271,12 +259,13 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | |||
| // get next row for dataset | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | |||
| if (pMDToDApi->_iter == nullptr) { | |||
| MS_LOG(ERROR) << "GetNext called with no iterator. abort"; | |||
| MS_LOG(ERROR) << "GetNext called with no iteratoe. abort"; | |||
| return -1; | |||
| } | |||
| // create Execute functions, this replaces Map in Pipeline | |||
| pMDToDApi->_iter->GetNextRow(&row); | |||
| if (row.size() != 0) { | |||
| bool ret = pMDToDApi->_iter->GetNextRow(&row); | |||
| if (row.size() != 0 && ret) { | |||
| if ((pMDToDApi->_augs).size() > 0) { | |||
| // String and Tensors | |||
| GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); | |||
| @@ -285,7 +274,7 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | |||
| for (int i = 0; i < (pMDToDApi->_augs).size(); i++) { | |||
| // each Execute call will invoke a memcpy, this cannot really be optimized further | |||
| // for this use case, std move is added for fail save. | |||
| row["image"] = Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); | |||
| row["image"] = mindspore::dataset::Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); | |||
| if (row["image"] == nullptr) { | |||
| // nullptr means that the eager mode image processing failed, we fail in this case | |||
| return -1; | |||
| @@ -316,20 +305,18 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | |||
| extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) { | |||
| // Manually terminate the pipeline | |||
| pMDToDApi->_iter->Stop(); | |||
| MS_LOG(WARNING) << "pipline stoped"; | |||
| return 0; | |||
| } | |||
| extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) { | |||
| MS_LOG(WARNING) << "pipeline deleted start"; | |||
| pMDToDApi->_iter->Stop(); | |||
| MS_LOG(WARNING) << "pipline deleted start"; | |||
| delete pMDToDApi; | |||
| MS_LOG(WARNING) << "pipeline deleted end"; | |||
| MS_LOG(WARNING) << "pipline deleted end"; | |||
| return 0; | |||
| } | |||
| int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { | |||
| int GetJsonFullFileName(const MDToDApi *pMDToDApi, std::string *filePath) { | |||
| int64_t file_id = pMDToDApi->_file_id; | |||
| if (file_id < 0) { | |||
| MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << "."; | |||
| @@ -343,12 +330,12 @@ int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { | |||
| extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, | |||
| size_t emmbeddingsSize) { | |||
| auto columnName = std::string(column); | |||
| MS_LOG(INFO) << "Start update " << columnName; | |||
| MS_LOG(INFO) << "Start Update " << columnName; | |||
| std::string converted = std::to_string(pMDToDApi->_file_id); | |||
| std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin"; | |||
| DataHelper dh; | |||
| MS_LOG(INFO) << "Try to save file " << embedding_file_path; | |||
| mindspore::dataset::DataHelper dh; | |||
| MS_LOG(INFO) << "Try to Save file " << embedding_file_path; | |||
| std::vector<float> bin_content(emmbeddings, emmbeddings + emmbeddingsSize); | |||
| Status rc = dh.template WriteBinFile<float>(embedding_file_path, bin_content); | |||
| if (rc.IsError()) { | |||
| @@ -379,8 +366,8 @@ extern "C" int MDToDApi_UpdateStringArray(MDToDApi *pMDToDApi, const char *colum | |||
| MS_LOG(ERROR) << "Failed to update " << columnName; | |||
| return -1; | |||
| } | |||
| MS_LOG(INFO) << "Start Update string array column: " << columnName << " in file " << file_path; | |||
| DataHelper dh; | |||
| MS_LOG(INFO) << "Start Update string Array column: " << columnName << " in file " << file_path; | |||
| mindspore::dataset::DataHelper dh; | |||
| std::vector<std::string> strVec; | |||
| if (MDbuff.DataSize > 0) { | |||
| const char *p = reinterpret_cast<char *>(MDbuff.Buff); | |||
| @@ -405,7 +392,7 @@ extern "C" int MDToDApi_UpdateFloatArray(MDToDApi *pMDToDApi, const char *column | |||
| return -1; | |||
| } | |||
| MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path; | |||
| DataHelper dh; | |||
| mindspore::dataset::DataHelper dh; | |||
| std::vector<float> vec; | |||
| MDBuffToVector<float>(MDBuff, &vec); | |||
| Status rc = dh.UpdateArray<float>(file_path, columnName, vec); | |||
| @@ -423,7 +410,7 @@ extern "C" int MDToDApi_UpdateIsForTrain(MDToDApi *pMDToDApi, int32_t isForTrain | |||
| if (file_id < 0) return -1; | |||
| std::string converted = std::to_string(pMDToDApi->_file_id); | |||
| std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; | |||
| DataHelper dh; | |||
| mindspore::dataset::DataHelper dh; | |||
| MS_LOG(INFO) << "Updating file: " << file_path; | |||
| Status rc = dh.UpdateValue<int32_t>(file_path, "_isForTrain", isForTrain, ""); | |||
| if (rc.IsError()) { | |||
| @@ -440,7 +427,7 @@ extern "C" int MDToDApi_UpdateNoOfFaces(MDToDApi *pMDToDApi, int32_t noOfFaces) | |||
| if (file_id < 0) return -1; | |||
| std::string converted = std::to_string(pMDToDApi->_file_id); | |||
| std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; | |||
| DataHelper dh; | |||
| mindspore::dataset::DataHelper dh; | |||
| MS_LOG(INFO) << "Updating file: " << file_path; | |||
| Status rc = dh.UpdateValue<int32_t>(file_path, "_noOfFaces", noOfFaces, ""); | |||
| if (rc.IsError()) { | |||
| @@ -0,0 +1,470 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "album_op_android.h" //NOLINT | |||
| #include <fstream> | |||
| #include <iomanip> | |||
| #include "minddata/dataset/core/tensor_shape.h" | |||
| #include "minddata/dataset/kernels/image/lite_image_utils.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, | |||
| const std::set<std::string> &exts) | |||
| : folder_path_(file_dir), | |||
| decode_(do_decode), | |||
| extensions_(exts), | |||
| schema_file_(schema_file), | |||
| row_cnt_(0), | |||
| buf_cnt_(0), | |||
| current_cnt_(0), | |||
| dirname_offset_(0), | |||
| sampler_(false), | |||
| sampler_index_(0) { | |||
| PrescanEntry(); | |||
| } | |||
| AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, | |||
| const std::set<std::string> &exts, uint32_t index) | |||
| : folder_path_(file_dir), | |||
| decode_(do_decode), | |||
| extensions_(exts), | |||
| schema_file_(schema_file), | |||
| row_cnt_(0), | |||
| buf_cnt_(0), | |||
| current_cnt_(0), | |||
| dirname_offset_(0), | |||
| sampler_(true), | |||
| sampler_index_(0) { | |||
| PrescanEntry(); | |||
| } | |||
| // Helper function for string comparison | |||
| // album sorts the files via numerical values, so this is not a simple string comparison | |||
| bool StrComp(const std::string &a, const std::string &b) { | |||
| // returns 1 if string "a" represent a numeric value less than string "b" | |||
| // the following will always return name, provided there is only one "." character in name | |||
| // "." character is guaranteed to exist since the extension is checked befor this function call. | |||
| int64_t value_a = std::atoi(a.substr(1, a.find(".")).c_str()); | |||
| int64_t value_b = std::atoi(b.substr(1, b.find(".")).c_str()); | |||
| return value_a < value_b; | |||
| } | |||
| // Single thread to go through the folder directory and gets all file names | |||
| // calculate numRows then return | |||
| Status AlbumOp::PrescanEntry() { | |||
| data_schema_ = std::make_unique<DataSchema>(); | |||
| Path schema_file(schema_file_); | |||
| if (schema_file_ == "" || !schema_file.Exists()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, schema_file is invalid or not set: " + schema_file_); | |||
| } else { | |||
| MS_LOG(WARNING) << "Schema file provided: " << schema_file_ << "."; | |||
| data_schema_->LoadSchemaFile(schema_file_, columns_to_load_); | |||
| } | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| } | |||
| Path folder(folder_path_); | |||
| dirname_offset_ = folder_path_.length(); | |||
| std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder); | |||
| if (folder.Exists() == false || dirItr == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_); | |||
| } | |||
| MS_LOG(WARNING) << "Album folder Path found: " << folder_path_ << "."; | |||
| while (dirItr->hasNext()) { | |||
| Path file = dirItr->next(); | |||
| if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { | |||
| (void)image_rows_.push_back(file.toString().substr(dirname_offset_)); | |||
| } else { | |||
| MS_LOG(WARNING) << "Album operator unsupported file found: " << file.toString() | |||
| << ", extension: " << file.Extension() << "."; | |||
| } | |||
| } | |||
| std::sort(image_rows_.begin(), image_rows_.end(), StrComp); | |||
| if (image_rows_.size() == 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| "Invalid data, no valid data matching the dataset API AlbumDataset. Please check file path or dataset API."); | |||
| } | |||
| if (sampler_) { | |||
| if (sampler_index_ < 0 || sampler_index_ >= image_rows_.size()) { | |||
| RETURN_STATUS_UNEXPECTED("the sampler index was out of range"); | |||
| } | |||
| std::vector<std::string> tmp; | |||
| tmp.emplace_back(image_rows_[sampler_index_]); | |||
| image_rows_.clear(); | |||
| image_rows_ = tmp; | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| // contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ | |||
| // IMPORTANT: 1 IOBlock produces 1 DataBuffer | |||
| bool AlbumOp::GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) { | |||
| if (map_row == nullptr) { | |||
| MS_LOG(WARNING) << "GetNextRow in AlbumOp: the point of map_row is nullptr"; | |||
| return false; | |||
| } | |||
| if (current_cnt_ == image_rows_.size()) { | |||
| return false; | |||
| } | |||
| Status ret = LoadTensorRow(current_cnt_, image_rows_[current_cnt_], map_row); | |||
| if (ret.IsError()) { | |||
| MS_LOG(ERROR) << "GetNextRow in AlbumOp: " << ret.ToString() << "\n"; | |||
| return false; | |||
| } | |||
| current_cnt_++; | |||
| return true; | |||
| } | |||
| // Only support JPEG/PNG/GIF/BMP | |||
| // Optimization: Could take in a tensor | |||
| // This function does not return status because we want to just skip bad input, not crash | |||
| bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { | |||
| std::ifstream file_handle; | |||
| constexpr int read_num = 3; | |||
| *valid = false; | |||
| file_handle.open(file_name, std::ios::binary | std::ios::in); | |||
| if (!file_handle.is_open()) { | |||
| return false; | |||
| } | |||
| unsigned char file_type[read_num]; | |||
| (void)file_handle.read(reinterpret_cast<char *>(file_type), read_num); | |||
| if (file_handle.fail()) { | |||
| file_handle.close(); | |||
| return false; | |||
| } | |||
| file_handle.close(); | |||
| if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { | |||
| // Normal JPEGs start with \xff\xd8\xff\xe0 | |||
| // JPEG with EXIF stats with \xff\xd8\xff\xe1 | |||
| // Use \xff\xd8\xff to cover both. | |||
| *valid = true; | |||
| } | |||
| return true; | |||
| } | |||
| Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorPtr *tensor) { | |||
| TensorPtr image; | |||
| std::ifstream fs; | |||
| fs.open(image_file_path, std::ios::binary | std::ios::in); | |||
| if (fs.fail()) { | |||
| MS_LOG(WARNING) << "File not found:" << image_file_path << "."; | |||
| // If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor | |||
| RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); | |||
| return Status::OK(); | |||
| } | |||
| // Hack logic to replace png images with empty tensor | |||
| Path file(image_file_path); | |||
| std::set<std::string> png_ext = {".png", ".PNG"}; | |||
| if (png_ext.find(file.Extension()) != png_ext.end()) { | |||
| // load empty tensor since image is not jpg | |||
| MS_LOG(INFO) << "PNG!" << image_file_path << "."; | |||
| RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); | |||
| return Status::OK(); | |||
| } | |||
| // treat bin files separately | |||
| std::set<std::string> bin_ext = {".bin", ".BIN"}; | |||
| if (bin_ext.find(file.Extension()) != bin_ext.end()) { | |||
| // load empty tensor since image is not jpg | |||
| MS_LOG(INFO) << "Bin file found" << image_file_path << "."; | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, tensor)); | |||
| // row->push_back(std::move(image)); | |||
| return Status::OK(); | |||
| } | |||
| // check that the file is an image before decoding | |||
| bool valid = false; | |||
| bool check_success = CheckImageType(image_file_path, &valid); | |||
| if (!check_success || !valid) { | |||
| RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); | |||
| return Status::OK(); | |||
| } | |||
| // if it is a jpeg image, load and try to decode | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image)); | |||
| if (decode_ && valid) { | |||
| Status rc = Decode(image, tensor); | |||
| if (rc.IsError()) { | |||
| RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); | |||
| return Status::OK(); | |||
| } | |||
| } | |||
| // row->push_back(std::move(image)); | |||
| return Status::OK(); | |||
| } | |||
| Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||
| std::vector<std::string> data = json_obj.get<std::vector<std::string>>(); | |||
| MS_LOG(WARNING) << "String array label found: " << data << "."; | |||
| // TensorPtr label; | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||
| // row->push_back(std::move(label)); | |||
| return Status::OK(); | |||
| } | |||
| Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||
| std::string data = json_obj; | |||
| // now we iterate over the elements in json | |||
| MS_LOG(INFO) << "String label found: " << data << "."; | |||
| TensorPtr label; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(data, tensor)); | |||
| // row->push_back(std::move(label)); | |||
| return Status::OK(); | |||
| } | |||
| Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||
| // TensorPtr label; | |||
| // consider templating this function to handle all ints | |||
| if (data_schema_->column(col_num).type() == DataType::DE_INT64) { | |||
| std::vector<int64_t> data; | |||
| // Iterate over the integer list and add those values to the output shape tensor | |||
| auto items = json_obj.items(); | |||
| using it_type = decltype(items.begin()); | |||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||
| } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { | |||
| std::vector<int32_t> data; | |||
| // Iterate over the integer list and add those values to the output shape tensor | |||
| auto items = json_obj.items(); | |||
| using it_type = decltype(items.begin()); | |||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " + | |||
| data_schema_->column(col_num).type().ToString()); | |||
| } | |||
| // row->push_back(std::move(label)); | |||
| return Status::OK(); | |||
| } | |||
| Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||
| // TensorPtr float_array; | |||
| // consider templating this function to handle all ints | |||
| if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { | |||
| std::vector<double> data; | |||
| // Iterate over the integer list and add those values to the output shape tensor | |||
| auto items = json_obj.items(); | |||
| using it_type = decltype(items.begin()); | |||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||
| } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { | |||
| std::vector<float> data; | |||
| // Iterate over the integer list and add those values to the output shape tensor | |||
| auto items = json_obj.items(); | |||
| using it_type = decltype(items.begin()); | |||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " + | |||
| data_schema_->column(col_num).type().ToString()); | |||
| } | |||
| // row->push_back(std::move(float_array)); | |||
| return Status::OK(); | |||
| } | |||
| Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor) { | |||
| if (data_schema_->column(col_num).type() == DataType::DE_STRING) { | |||
| // TensorPtr id; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, tensor)); | |||
| // row->push_back(std::move(id)); | |||
| return Status::OK(); | |||
| } | |||
| // hack to get the file name without extension, the 1 is to get rid of the backslash character | |||
| int64_t image_id = std::atoi(file.substr(1, file.find(".")).c_str()); | |||
| // TensorPtr id; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(image_id, tensor)); | |||
| MS_LOG(INFO) << "File ID " << image_id << "."; | |||
| // row->push_back(std::move(id)); | |||
| return Status::OK(); | |||
| } | |||
| Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) { | |||
| // hack to get the file name without extension, the 1 is to get rid of the backslash character | |||
| // TensorPtr empty_tensor; | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor)); | |||
| // row->push_back(std::move(empty_tensor)); | |||
| return Status::OK(); | |||
| } | |||
| // Loads a tensor with float value, issue with float64, we don't have reverse look up to the type | |||
| // So we actually have to check what type we want to fill the tensor with. | |||
| // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to | |||
| // only be float32, seems like a weird limitation to impose | |||
| Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||
| // TensorPtr float_tensor; | |||
| if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { | |||
| double data = json_obj; | |||
| MS_LOG(INFO) << "double found: " << json_obj << "."; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, tensor)); | |||
| } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { | |||
| float data = json_obj; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data, tensor)); | |||
| MS_LOG(INFO) << "float found: " << json_obj << "."; | |||
| } | |||
| // row->push_back(std::move(float_tensor)); | |||
| return Status::OK(); | |||
| } | |||
| // Loads a tensor with int value, we have to cast the value to type specified in the schema. | |||
| Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||
| // TensorPtr int_tensor; | |||
| if (data_schema_->column(col_num).type() == DataType::DE_INT64) { | |||
| int64_t data = json_obj; | |||
| MS_LOG(INFO) << "int64 found: " << json_obj << "."; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, tensor)); | |||
| } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { | |||
| int32_t data = json_obj; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, tensor)); | |||
| MS_LOG(INFO) << "int32 found: " << json_obj << "."; | |||
| } | |||
| // row->push_back(std::move(int_tensor)); | |||
| return Status::OK(); | |||
| } | |||
| // Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer | |||
| // possible optimization: the helper functions of LoadTensorRow should be optimized | |||
| // to take a reference to a column descriptor? | |||
| // the design of this class is to make the code more readable, forgoing minor perfomance gain like | |||
| // getting rid of duplicated checks | |||
| Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) { | |||
| // testing here is to just print out file path | |||
| // (*row) = TensorRow(row_id, {}); | |||
| MS_LOG(INFO) << "Image row file: " << file << "."; | |||
| std::ifstream file_handle(folder_path_ + file); | |||
| if (!file_handle.is_open()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file); | |||
| } | |||
| std::string line; | |||
| while (getline(file_handle, line)) { | |||
| try { | |||
| nlohmann::json js = nlohmann::json::parse(line); | |||
| MS_LOG(INFO) << "This Line: " << line << "."; | |||
| // note if take a schema here, then we have to iterate over all column descriptors in schema and check for key | |||
| // get columns in schema: | |||
| int32_t columns = data_schema_->NumColumns(); | |||
| // loop over each column descriptor, this can optimized by switch cases | |||
| for (int32_t i = 0; i < columns; i++) { | |||
| // special case to handle | |||
| if (data_schema_->column(i).name() == "id") { | |||
| // id is internal, special case to load from file | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } | |||
| // find if key does not exist, insert placeholder nullptr if not found | |||
| if (js.find(data_schema_->column(i).name()) == js.end()) { | |||
| // iterator not found, push nullptr as placeholder | |||
| MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << "."; | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } | |||
| nlohmann::json column_value = js.at(data_schema_->column(i).name()); | |||
| MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << "."; | |||
| bool is_array = column_value.is_array(); | |||
| // load single string | |||
| if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) { | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } | |||
| // load string array | |||
| if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) { | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } | |||
| // load image file | |||
| if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) { | |||
| std::string image_file_path = column_value; | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } | |||
| // load float value | |||
| if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 || | |||
| data_schema_->column(i).type() == DataType::DE_FLOAT64)) { | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } | |||
| // load float array | |||
| if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 || | |||
| data_schema_->column(i).type() == DataType::DE_FLOAT64)) { | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } | |||
| // int value | |||
| if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 || | |||
| data_schema_->column(i).type() == DataType::DE_INT32)) { | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } | |||
| // int array | |||
| if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 || | |||
| data_schema_->column(i).type() == DataType::DE_INT32)) { | |||
| TensorPtr tensor; | |||
| RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor)); | |||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||
| continue; | |||
| } else { | |||
| MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported."; | |||
| continue; | |||
| } | |||
| } | |||
| } catch (const std::exception &err) { | |||
| file_handle.close(); | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file); | |||
| } | |||
| } | |||
| file_handle.close(); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,173 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_ | |||
| #include <deque> | |||
| #include <memory> | |||
| #include <queue> | |||
| #include <string> | |||
| #include <algorithm> | |||
| #include <map> | |||
| #include <set> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <unordered_map> | |||
| #include "minddata/dataset/core/tensor.h" | |||
| #include "minddata/dataset/engine/data_buffer.h" | |||
| #include "minddata/dataset/engine/data_schema.h" | |||
| #include "minddata/dataset/util/path.h" | |||
| #include "minddata/dataset/util/queue.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Forward declares | |||
| template <typename T> | |||
| class Queue; | |||
| // Define row information as a list of file objects to read | |||
| using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>; | |||
| /// \class AlbumOp | |||
| class AlbumOp { | |||
| public: | |||
| /// \brief Constructor | |||
| /// \param[in] file_dir - directory of Album | |||
| /// \param[in] do_decode - decode image files | |||
| /// \param[in] schema_file - schema file | |||
| /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir | |||
| AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, | |||
| const std::set<std::string> &exts); | |||
| /// \brief Constructor | |||
| /// \param[in] file_dir - directory of Album | |||
| /// \param[in] do_decode - decode image files | |||
| /// \param[in] schema_file - schema file | |||
| /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir | |||
| /// \param[in] index - the specific file index | |||
| AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, | |||
| const std::set<std::string> &exts, uint32_t index); | |||
| /// \brief Destructor. | |||
| ~AlbumOp() = default; | |||
| /// \brief Initialize AlbumOp related var, calls the function to walk all files | |||
| /// \return - The error code returned | |||
| Status PrescanEntry(); | |||
| /// \brief Initialize AlbumOp related var, calls the function to walk all files | |||
| /// \return - The error code returned | |||
| bool GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row); | |||
| /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP | |||
| /// This function could be optimized to return the tensor to reduce open/closing files | |||
| /// \return bool - if file is bad then return false | |||
| bool CheckImageType(const std::string &file_name, bool *valid); | |||
| // Op name getter | |||
| // @return Name of the current Op | |||
| std::string Name() const { return "AlbumOp"; } | |||
| private: | |||
| /// \brief Load image to tensor | |||
| /// \param[in] image_file Image name of file | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load vector of ints to tensor, append tensor to tensor | |||
| /// \param[in] json_obj Json object containing multi-dimensional label | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load vector of floatss to tensor, append tensor to tensor | |||
| /// \param[in] json_obj Json object containing array data | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load string array into a tensor, append tensor to tensor | |||
| /// \param[in] json_obj Json object containing string tensor | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load string into a tensor, append tensor to tensor | |||
| /// \param[in] json_obj Json object containing string tensor | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load float value to tensor | |||
| /// \param[in] json_obj Json object containing float | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load int value to tensor | |||
| /// \param[in] json_obj Json object containing int | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load emtpy tensor to tensor | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load id from file name to tensor | |||
| /// \param[in] file The file name to get ID from | |||
| /// \param[in] col_num Column num in schema | |||
| /// \param[inout] Tensor to push to | |||
| /// \return Status The error code returned | |||
| Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor); | |||
| /// \brief Load a tensor according to a json file | |||
| /// \param[in] row_id_type row_id - id for this tensor row | |||
| /// \param[in] ImageColumns file Json file location | |||
| /// \param[inout] TensorRow Json content stored into a tensor row | |||
| /// \return Status The error code returned | |||
| Status LoadTensorRow(row_id_type row_id, const std::string &file, | |||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row); | |||
| std::string folder_path_; // directory of image folder | |||
| bool decode_; | |||
| std::vector<std::string> columns_to_load_; | |||
| std::set<std::string> extensions_; // extensions allowed | |||
| std::unique_ptr<DataSchema> data_schema_; | |||
| std::string schema_file_; | |||
| int64_t row_cnt_; | |||
| int64_t current_cnt_; | |||
| int64_t buf_cnt_; | |||
| int64_t dirname_offset_; | |||
| bool sampler_; | |||
| int64_t sampler_index_; | |||
| std::vector<std::string> image_rows_; | |||
| std::unordered_map<std::string, int32_t> column_name_id_map_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_ | |||