diff --git a/build.sh b/build.sh index 7edb05544b..217fa055b7 100755 --- a/build.sh +++ b/build.sh @@ -49,7 +49,7 @@ usage() echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" echo " -D Enable dumping of function graph ir, default on" echo " -z Compile dataset & mindrecord, default on" - echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv mode in lite predict" + echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv, wrapper mode in lite predict" echo " -M Enable MPI and NCCL for GPU training, gpu default on" echo " -V Specify the minimum required cuda version, default CUDA 10.1" echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation" @@ -129,7 +129,7 @@ checkopts() DEBUG_MODE="on" ;; n) - if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" ]]; then + if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" || "X$OPTARG" == "Xwrapper" ]]; then COMPILE_MINDDATA_LITE="$OPTARG" else echo "Invalid value ${OPTARG} for option -n" @@ -678,7 +678,7 @@ build_lite() build_gtest fi - if [ "${COMPILE_MINDDATA_LITE}" == "lite" ] || [ "${COMPILE_MINDDATA_LITE}" == "full" ]; then + if [[ "${COMPILE_MINDDATA_LITE}" == "lite" || "${COMPILE_MINDDATA_LITE}" == "full" || "${COMPILE_MINDDATA_LITE}" == "wrapper" ]]; then build_minddata_lite_deps fi diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index 431e373c2b..a8b1872be5 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -20,7 +20,7 @@ set(OPENCV_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/minddata/third_part set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf) set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers) -if (BUILD_MINDDATA STREQUAL "full") +if (BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") if (PLATFORM_ARM64) install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${COMPONENT_NAME}) diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 2b912a2f79..cca5534ad5 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -28,7 +28,7 @@ set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") -set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv or full") +set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full") set(BUILD_LITE "on") set(PLATFORM_ARM "off") if (PLATFORM_ARM64 OR PLATFORM_ARM32) @@ -182,7 +182,7 @@ if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64) endif () endif () -if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full") +if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") # add sentencepiece dependency # include(${TOP_DIR}/cmake/external_libs/sentencepiece.cmake) # json diff --git a/mindspore/lite/minddata/CMakeLists.txt b/mindspore/lite/minddata/CMakeLists.txt index f70aa3a45a..28b08a9c72 100644 --- a/mindspore/lite/minddata/CMakeLists.txt +++ b/mindspore/lite/minddata/CMakeLists.txt @@ -81,6 +81,12 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/util MINDDATA_UTIL_SRC_FILES) AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES) +if (PLATFORM_ARM32 OR PLATFORM_ARM64) + if (BUILD_MINDDATA STREQUAL "full") + set(BUILD_MINDDATA "wrapper") + endif () +endif () + if (BUILD_MINDDATA STREQUAL "full") include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image") list(REMOVE_ITEM MINDDATA_API_SRC_FILES @@ -114,102 +120,102 @@ if (BUILD_MINDDATA STREQUAL "full") ) list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES - "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" - ) + "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc" + "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" + ) list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES - "${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc" - ) + "${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc" + ) list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES - "${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc" - ) + "${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc" + ) list(REMOVE_ITEM MINDDATA_ENGINE_OPT_PRE_SRC_FILES - "${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc" - "${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc" - ) + "${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc" + "${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc" + ) list(REMOVE_ITEM MINDDATA_ENGINE_IR_CACHE_SRC_FILES - "${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc" - "${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc" - ) + "${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc" + "${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc" + ) list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES - "${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc" - ) + "${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc" + "${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc" + ) list(REMOVE_ITEM MINDDATA_KERNELS_IMAGE_SRC_FILES - "${MINDDATA_DIR}/kernels/image/affine_op.cc" - "${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc" - "${MINDDATA_DIR}/kernels/image/bounding_box_op.cc" - "${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc" - "${MINDDATA_DIR}/kernels/image/concatenate_op.cc" - "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" - "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" - "${MINDDATA_DIR}/kernels/image/equalize_op.cc" - "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" - "${MINDDATA_DIR}/kernels/image/image_utils.cc" - "${MINDDATA_DIR}/kernels/image/invert_op.cc" - "${MINDDATA_DIR}/kernels/image/math_utils.cc" - "${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc" - "${MINDDATA_DIR}/kernels/image/pad_op.cc" - "${MINDDATA_DIR}/kernels/image/posterize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_affine_op.cc" - "${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc" - "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_resize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc" - "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc" - "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc" - "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" - "${MINDDATA_DIR}/kernels/image/rescale_op.cc" - "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" - "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" - "${MINDDATA_DIR}/kernels/image/sharpness_op.cc" - "${MINDDATA_DIR}/kernels/image/solarize_op.cc" - "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc" - "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc" - "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_color_op.cc" + "${MINDDATA_DIR}/kernels/image/affine_op.cc" + "${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc" + "${MINDDATA_DIR}/kernels/image/bounding_box_op.cc" + "${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc" + "${MINDDATA_DIR}/kernels/image/concatenate_op.cc" + "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" + "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" + "${MINDDATA_DIR}/kernels/image/equalize_op.cc" + "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" + "${MINDDATA_DIR}/kernels/image/image_utils.cc" + "${MINDDATA_DIR}/kernels/image/invert_op.cc" + "${MINDDATA_DIR}/kernels/image/math_utils.cc" + "${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc" + "${MINDDATA_DIR}/kernels/image/pad_op.cc" + "${MINDDATA_DIR}/kernels/image/posterize_op.cc" + "${MINDDATA_DIR}/kernels/image/random_affine_op.cc" + "${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc" + "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc" + "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc" + "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc" + "${MINDDATA_DIR}/kernels/image/random_crop_op.cc" + "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc" + "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc" + "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc" + "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc" + "${MINDDATA_DIR}/kernels/image/random_resize_op.cc" + "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc" + "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc" + "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc" + "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc" + "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" + "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" + "${MINDDATA_DIR}/kernels/image/rescale_op.cc" + "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" + "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" + "${MINDDATA_DIR}/kernels/image/sharpness_op.cc" + "${MINDDATA_DIR}/kernels/image/solarize_op.cc" + "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc" + "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc" + "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc" + "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc" + "${MINDDATA_DIR}/kernels/image/random_color_op.cc" ) list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES @@ -239,47 +245,114 @@ if (BUILD_MINDDATA STREQUAL "full") include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache") if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) - set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) + set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) endif () add_library(minddata-lite SHARED ${MINDDATA_API_SRC_FILES} - ${MINDDATA_CALLBACK_SRC_FILES} - ${MINDDATA_CORE_SRC_FILES} - ${MINDDATA_ENGINE_SRC_FILES} - #${MINDDATA_ENGINE_CACHE_SRC_FILES} - ${MINDDATA_ENGINE_CONSUMERS_SRC_FILES} - ${MINDDATA_ENGINE_DATASETOPS_SRC_FILES} - ${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES} - ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES} - ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} - ${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES} - ${MINDDATA_ENGINE_IR_CACHE_SRC_FILES} - ${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES} - ${MINDDATA_ENGINE_OPT_SRC_FILES} - ${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES} - ${MINDDATA_ENGINE_OPT_POST_SRC_FILES} - ${MINDDATA_ENGINE_OPT_PRE_SRC_FILES} - ${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES} - ${MINDDATA_ENGINE_PERF_SRC_FILES} - ${MINDDATA_KERNELS_SRC_FILES} - ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} - ${MINDDATA_KERNELS_IMAGE_SRC_FILES} - ${MINDDATA_KERNELS_DATA_SRC_FILES} - ${MINDDATA_UTIL_SRC_FILES} - ${MINDDATA_EXAMPLE_SRC} - ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc - ${CORE_DIR}/utils/ms_utils.cc - ) + ${MINDDATA_CALLBACK_SRC_FILES} + ${MINDDATA_CORE_SRC_FILES} + ${MINDDATA_ENGINE_SRC_FILES} + #${MINDDATA_ENGINE_CACHE_SRC_FILES} + ${MINDDATA_ENGINE_CONSUMERS_SRC_FILES} + ${MINDDATA_ENGINE_DATASETOPS_SRC_FILES} + ${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES} + ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES} + ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} + ${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES} + ${MINDDATA_ENGINE_IR_CACHE_SRC_FILES} + ${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES} + ${MINDDATA_ENGINE_OPT_SRC_FILES} + ${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES} + ${MINDDATA_ENGINE_OPT_POST_SRC_FILES} + ${MINDDATA_ENGINE_OPT_PRE_SRC_FILES} + ${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES} + ${MINDDATA_ENGINE_PERF_SRC_FILES} + ${MINDDATA_KERNELS_SRC_FILES} + ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} + ${MINDDATA_KERNELS_IMAGE_SRC_FILES} + ${MINDDATA_KERNELS_DATA_SRC_FILES} + ${MINDDATA_UTIL_SRC_FILES} + ${MINDDATA_EXAMPLE_SRC} + ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc + ${CORE_DIR}/utils/ms_utils.cc + ) find_package(Threads REQUIRED) target_link_libraries(minddata-lite - securec - jpeg-turbo - jpeg - mindspore::json - Threads::Threads - ) + securec + jpeg-turbo + jpeg + mindspore::json + Threads::Threads + ) + + # ref: https://github.com/android/ndk/issues/1202 + if (PLATFORM_ARM32) + file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a) + if (LIBCLANG_RT_LIB STREQUAL "") + MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}") + endif() + target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB}) + endif() + + if (PLATFORM_ARM32 OR PLATFORM_ARM64) + target_link_libraries(minddata-lite log) + elseif (BUILD_MINDDATA_EXAMPLE) + endif() +elseif (BUILD_MINDDATA STREQUAL "wrapper") + include_directories("${MINDDATA_DIR}/kernels/image") + include_directories("${MINDDATA_DIR}/util") + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/wrapper) + set(MINDDATA_TODAPI_SRC + ${MINDDATA_DIR}/core/tensor_shape.cc + ${MINDDATA_DIR}/core/tensor.cc + ${MINDDATA_DIR}/core/config_manager.cc + ${MINDDATA_DIR}/core/data_type.cc + ${MINDDATA_DIR}/core/tensor_helpers.cc + ${MINDDATA_DIR}/core/global_context.cc + ${MINDDATA_DIR}/core/tensor_row.cc + ${MINDDATA_DIR}/api/vision.cc + ${MINDDATA_DIR}/api/execute.cc + ${MINDDATA_DIR}/api/transforms.cc + ${MINDDATA_DIR}/api/de_tensor.cc + ${MINDDATA_DIR}/util/path.cc + ${MINDDATA_DIR}/util/status.cc + ${MINDDATA_DIR}/util/data_helper.cc + ${MINDDATA_DIR}/util/memory_pool.cc + ${MINDDATA_DIR}/engine/data_schema.cc + ${MINDDATA_DIR}/kernels/tensor_op.cc + ${MINDDATA_DIR}/kernels/image/lite_image_utils.cc + ${MINDDATA_DIR}/kernels/image/center_crop_op.cc + ${MINDDATA_DIR}/kernels/image/crop_op.cc + ${MINDDATA_DIR}/kernels/image/normalize_op.cc + ${MINDDATA_DIR}/kernels/image/resize_op.cc + ${MINDDATA_DIR}/kernels/data/compose_op.cc + ${MINDDATA_DIR}/kernels/data/duplicate_op.cc + ${MINDDATA_DIR}/kernels/data/one_hot_op.cc + ${MINDDATA_DIR}/kernels/data/random_apply_op.cc + ${MINDDATA_DIR}/kernels/data/random_choice_op.cc + ${MINDDATA_DIR}/kernels/data/type_cast_op.cc + ${MINDDATA_DIR}/kernels/data/data_utils.cc + ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc + ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/album_op_android.cc + ) + + add_library(minddata-lite SHARED + ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc + ${CORE_DIR}/utils/ms_utils.cc + ${MINDDATA_TODAPI_SRC} + ) + + find_package(Threads REQUIRED) + target_link_libraries(minddata-lite + securec + jpeg-turbo + jpeg + mindspore::json + Threads::Threads + ) # ref: https://github.com/android/ndk/issues/1202 if (PLATFORM_ARM32) @@ -293,11 +366,6 @@ if (BUILD_MINDDATA STREQUAL "full") if (PLATFORM_ARM32 OR PLATFORM_ARM64) target_link_libraries(minddata-lite log) elseif (BUILD_MINDDATA_EXAMPLE) - # add_executable(mdlite-example ${CMAKE_CURRENT_SOURCE_DIR}/example/x86-example.cc) - # target_link_libraries(mdlite-example minddata-lite) - # add_custom_command(TARGET mdlite-example POST_BUILD - # COMMAND cp -rf ${CMAKE_CURRENT_SOURCE_DIR}/example/testCifar10Data ${CMAKE_BINARY_DIR}/minddata - # ) endif() elseif (BUILD_MINDDATA STREQUAL "lite") list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES "${MINDDATA_DIR}/core/client.cc") @@ -374,9 +442,6 @@ elseif (BUILD_MINDDATA STREQUAL "lite") securec jpeg-turbo jpeg - # opencv_core - # opencv_imgcodecs - # opencv_imgproc mindspore::json ) diff --git a/mindspore/lite/minddata/wrapper/MDToDApi.cc b/mindspore/lite/minddata/wrapper/MDToDApi.cc index b914568d7e..281ff4ca24 100644 --- a/mindspore/lite/minddata/wrapper/MDToDApi.cc +++ b/mindspore/lite/minddata/wrapper/MDToDApi.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "MDToDApi.h" +#include "MDToDApi.h" //NOLINT #include #include @@ -22,7 +22,8 @@ #include #include #include -#include "minddata/dataset/include/datasets.h" + +#include "album_op_android.h" //NOLINT #include "minddata/dataset/include/execute.h" #include "minddata/dataset/util/path.h" #include "minddata/dataset/include/vision.h" @@ -35,7 +36,7 @@ using mindspore::dataset::Path; using mindspore::dataset::Tensor; -using mindspore::dataset; +using TensorOperation = mindspore::dataset::TensorOperation; using mindspore::LogStream; using mindspore::MsLogLevel::DEBUG; @@ -48,22 +49,21 @@ using mindspore::dataset::Status; class MDToDApi { public: - std::shared_ptr _ds; - std::shared_ptr _iter; + std::shared_ptr _iter; std::vector> _augs; std::string _storage_folder; std::string _folder_path; bool _hasBatch; int64_t _file_id; - MDToDApi() : _ds(nullptr), _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { - MS_LOG(WARNING) << "MDToDAPI Call constructor"; + public: + MDToDApi() : _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { + MS_LOG(WARNING) << "MDToDAPI Call constractor"; } ~MDToDApi() { - MS_LOG(WARNING) << "MDToDAPI Call destructor"; + MS_LOG(WARNING) << "MDToDAPI Call destractor"; + // derefernce dataset and iterator _augs.clear(); - _ds = nullptr; - _iter = nullptr; } }; @@ -79,7 +79,9 @@ std::vector MDToDBuffToVector(MDToDBuff_t StrBuff) { return strVector; } -extern "C" int MDToDApi_pathTest(const char *path) { +extern "C" + +int MDToDApi_pathTest(const char* path) { Path f(path); MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath(); // Print out the first few items in the directory @@ -114,36 +116,31 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) { std::vector Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2); - std::shared_ptr resize_op = vision::Resize(Resize); + std::shared_ptr resize_op = mindspore::dataset::vision::Resize(Resize); assert(resize_op != nullptr); MS_LOG(WARNING) << "Push back resize"; mapOperations.push_back(resize_op); + // hasBatch = true; Batch not currently supported inMInddata-Lite } if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) { std::vector Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2); - std::shared_ptr center_crop_op = vision::CenterCrop(Crop); + std::shared_ptr center_crop_op = mindspore::dataset::vision::CenterCrop(Crop); assert(center_crop_op != nullptr); MS_LOG(WARNING) << "Push back crop"; mapOperations.push_back(center_crop_op); + // hasBatch = true; Batch not currently supported inMInddata-Lite } } - std::shared_ptr ds = nullptr; - MS_LOG(INFO) << "Read id =" << MDConf.fileid << " (-1) for all"; + + MS_LOG(INFO) << "Read id=" << MDConf.fileid << " (-1) for all"; + std::shared_ptr iter = nullptr; + const std::set exts = {}; if (MDConf.fileid > -1) { - // read specific image using SequentialSampler - ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(MDConf.fileid, 1L)); + // read specific image using SequentialSampler witn + iter = std::make_shared(folder_path, true, schema_file, exts, MDConf.fileid); } else { - // Distributed sampler takes num_shards then shard_id - ds = Album(folder_path, schema_file, column_names, true, SequentialSampler()); + iter = std::make_shared(folder_path, true, schema_file, exts); } - ds = ds->SetNumWorkers(1); - - assert(ds != nullptr); - - // Create a Repeat operation on ds - int32_t repeat_num = 1; - ds = ds->Repeat(repeat_num); - assert(ds != nullptr); // Create objects for the tensor ops MS_LOG(INFO) << " Create pipline parameters"; @@ -154,16 +151,7 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { } bool hasBatch = false; - // Create an iterator over the result of the above dataset - // This will trigger the creation of the Execution Tree and launch it. - std::shared_ptr iter = ds->CreateIterator(); - if (nullptr == iter) { - MS_LOG(ERROR) << "Iterator creation failed"; - return nullptr; - } - assert(iter != nullptr); MDToDApi *pMDToDApi = new MDToDApi; - pMDToDApi->_ds = ds; pMDToDApi->_iter = iter; pMDToDApi->_augs = mapOperations; pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath); @@ -173,11 +161,11 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { } template -void MDBuffToVector(MDToDBuff_t MDBuff, std::vector *vec) { - vec.clear(); +void MDBuffToVector(const MDToDBuff_t MDBuff, std::vector *vec) { + vec->clear(); if (MDBuff.DataSize > 0) { int nofElements = MDBuff.DataSize / sizeof(T); - *vec.assign(reinterpret_cast(MDBuff.Buff), reinterpret_cast(MDBuff.Buff) + nofElements); + vec->assign(reinterpret_cast(MDBuff.Buff), reinterpret_cast(MDBuff.Buff) + nofElements); } } @@ -217,7 +205,7 @@ void GetTensorToBuff(std::unordered_map> ro resBuff->TensorSize[0] = 1; } if (column->shape()[firstDim] > 0) { - if (DataType::DE_STRING == column->type()) { + if (mindspore::dataset::DataType::DE_STRING == column->type()) { std::string str; for (int ix = 0; ix < column->shape()[firstDim]; ix++) { std::string_view strView; @@ -238,14 +226,14 @@ void GetTensorToBuff(std::unordered_map> ro MS_LOG(ERROR) << "memcpy_s return: " << ret; } } else { - DataHelper dh; + mindspore::dataset::DataHelper dh; resBuff->DataSize = dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize); } MS_LOG(INFO) << columnName << " " << resBuff->DataSize << " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") "; if (0 == resBuff->DataSize) { - MS_LOG(ERROR) << "Copy Failed!!!! " << columnName << " Too large" + MS_LOG(ERROR) << "COPY FAIL!!!! " << columnName << " Too large" << "."; // memcpy failed } } else { @@ -259,7 +247,7 @@ void GetTensorToBuff(std::unordered_map> ro extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { MS_LOG(INFO) << "Start GetNext"; if (pMDToDApi == nullptr) { - MS_LOG(ERROR) << "GetNext called with nullptr. Abort"; + MS_LOG(ERROR) << "GetNext called with null ptr. abort"; assert(pMDToDApi != nullptr); } @@ -271,12 +259,13 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { // get next row for dataset std::unordered_map> row; if (pMDToDApi->_iter == nullptr) { - MS_LOG(ERROR) << "GetNext called with no iterator. abort"; + MS_LOG(ERROR) << "GetNext called with no iteratoe. abort"; return -1; } // create Execute functions, this replaces Map in Pipeline - pMDToDApi->_iter->GetNextRow(&row); - if (row.size() != 0) { + + bool ret = pMDToDApi->_iter->GetNextRow(&row); + if (row.size() != 0 && ret) { if ((pMDToDApi->_augs).size() > 0) { // String and Tensors GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); @@ -285,7 +274,7 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { for (int i = 0; i < (pMDToDApi->_augs).size(); i++) { // each Execute call will invoke a memcpy, this cannot really be optimized further // for this use case, std move is added for fail save. - row["image"] = Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); + row["image"] = mindspore::dataset::Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); if (row["image"] == nullptr) { // nullptr means that the eager mode image processing failed, we fail in this case return -1; @@ -316,20 +305,18 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) { // Manually terminate the pipeline - pMDToDApi->_iter->Stop(); MS_LOG(WARNING) << "pipline stoped"; return 0; } extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) { - MS_LOG(WARNING) << "pipeline deleted start"; - pMDToDApi->_iter->Stop(); + MS_LOG(WARNING) << "pipline deleted start"; delete pMDToDApi; - MS_LOG(WARNING) << "pipeline deleted end"; + MS_LOG(WARNING) << "pipline deleted end"; return 0; } -int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { +int GetJsonFullFileName(const MDToDApi *pMDToDApi, std::string *filePath) { int64_t file_id = pMDToDApi->_file_id; if (file_id < 0) { MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << "."; @@ -343,12 +330,12 @@ int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, size_t emmbeddingsSize) { auto columnName = std::string(column); - MS_LOG(INFO) << "Start update " << columnName; + MS_LOG(INFO) << "Start Update " << columnName; std::string converted = std::to_string(pMDToDApi->_file_id); std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin"; - DataHelper dh; - MS_LOG(INFO) << "Try to save file " << embedding_file_path; + mindspore::dataset::DataHelper dh; + MS_LOG(INFO) << "Try to Save file " << embedding_file_path; std::vector bin_content(emmbeddings, emmbeddings + emmbeddingsSize); Status rc = dh.template WriteBinFile(embedding_file_path, bin_content); if (rc.IsError()) { @@ -379,8 +366,8 @@ extern "C" int MDToDApi_UpdateStringArray(MDToDApi *pMDToDApi, const char *colum MS_LOG(ERROR) << "Failed to update " << columnName; return -1; } - MS_LOG(INFO) << "Start Update string array column: " << columnName << " in file " << file_path; - DataHelper dh; + MS_LOG(INFO) << "Start Update string Array column: " << columnName << " in file " << file_path; + mindspore::dataset::DataHelper dh; std::vector strVec; if (MDbuff.DataSize > 0) { const char *p = reinterpret_cast(MDbuff.Buff); @@ -405,7 +392,7 @@ extern "C" int MDToDApi_UpdateFloatArray(MDToDApi *pMDToDApi, const char *column return -1; } MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path; - DataHelper dh; + mindspore::dataset::DataHelper dh; std::vector vec; MDBuffToVector(MDBuff, &vec); Status rc = dh.UpdateArray(file_path, columnName, vec); @@ -423,7 +410,7 @@ extern "C" int MDToDApi_UpdateIsForTrain(MDToDApi *pMDToDApi, int32_t isForTrain if (file_id < 0) return -1; std::string converted = std::to_string(pMDToDApi->_file_id); std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; - DataHelper dh; + mindspore::dataset::DataHelper dh; MS_LOG(INFO) << "Updating file: " << file_path; Status rc = dh.UpdateValue(file_path, "_isForTrain", isForTrain, ""); if (rc.IsError()) { @@ -440,7 +427,7 @@ extern "C" int MDToDApi_UpdateNoOfFaces(MDToDApi *pMDToDApi, int32_t noOfFaces) if (file_id < 0) return -1; std::string converted = std::to_string(pMDToDApi->_file_id); std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; - DataHelper dh; + mindspore::dataset::DataHelper dh; MS_LOG(INFO) << "Updating file: " << file_path; Status rc = dh.UpdateValue(file_path, "_noOfFaces", noOfFaces, ""); if (rc.IsError()) { diff --git a/mindspore/lite/minddata/wrapper/album_op_android.cc b/mindspore/lite/minddata/wrapper/album_op_android.cc new file mode 100644 index 0000000000..0e101e495d --- /dev/null +++ b/mindspore/lite/minddata/wrapper/album_op_android.cc @@ -0,0 +1,470 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "album_op_android.h" //NOLINT +#include +#include +#include "minddata/dataset/core/tensor_shape.h" +#include "minddata/dataset/kernels/image/lite_image_utils.h" + +namespace mindspore { +namespace dataset { + +AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, + const std::set &exts) + : folder_path_(file_dir), + decode_(do_decode), + extensions_(exts), + schema_file_(schema_file), + row_cnt_(0), + buf_cnt_(0), + current_cnt_(0), + dirname_offset_(0), + sampler_(false), + sampler_index_(0) { + PrescanEntry(); +} + +AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, + const std::set &exts, uint32_t index) + : folder_path_(file_dir), + decode_(do_decode), + extensions_(exts), + schema_file_(schema_file), + row_cnt_(0), + buf_cnt_(0), + current_cnt_(0), + dirname_offset_(0), + sampler_(true), + sampler_index_(0) { + PrescanEntry(); +} + +// Helper function for string comparison +// album sorts the files via numerical values, so this is not a simple string comparison +bool StrComp(const std::string &a, const std::string &b) { + // returns 1 if string "a" represent a numeric value less than string "b" + // the following will always return name, provided there is only one "." character in name + // "." character is guaranteed to exist since the extension is checked befor this function call. + int64_t value_a = std::atoi(a.substr(1, a.find(".")).c_str()); + int64_t value_b = std::atoi(b.substr(1, b.find(".")).c_str()); + return value_a < value_b; +} + +// Single thread to go through the folder directory and gets all file names +// calculate numRows then return +Status AlbumOp::PrescanEntry() { + data_schema_ = std::make_unique(); + Path schema_file(schema_file_); + if (schema_file_ == "" || !schema_file.Exists()) { + RETURN_STATUS_UNEXPECTED("Invalid file, schema_file is invalid or not set: " + schema_file_); + } else { + MS_LOG(WARNING) << "Schema file provided: " << schema_file_ << "."; + data_schema_->LoadSchemaFile(schema_file_, columns_to_load_); + } + + for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { + column_name_id_map_[data_schema_->column(i).name()] = i; + } + + Path folder(folder_path_); + dirname_offset_ = folder_path_.length(); + std::shared_ptr dirItr = Path::DirIterator::OpenDirectory(&folder); + if (folder.Exists() == false || dirItr == nullptr) { + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_); + } + MS_LOG(WARNING) << "Album folder Path found: " << folder_path_ << "."; + + while (dirItr->hasNext()) { + Path file = dirItr->next(); + if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { + (void)image_rows_.push_back(file.toString().substr(dirname_offset_)); + } else { + MS_LOG(WARNING) << "Album operator unsupported file found: " << file.toString() + << ", extension: " << file.Extension() << "."; + } + } + + std::sort(image_rows_.begin(), image_rows_.end(), StrComp); + + if (image_rows_.size() == 0) { + RETURN_STATUS_UNEXPECTED( + "Invalid data, no valid data matching the dataset API AlbumDataset. Please check file path or dataset API."); + } + + if (sampler_) { + if (sampler_index_ < 0 || sampler_index_ >= image_rows_.size()) { + RETURN_STATUS_UNEXPECTED("the sampler index was out of range"); + } + std::vector tmp; + tmp.emplace_back(image_rows_[sampler_index_]); + image_rows_.clear(); + image_rows_ = tmp; + } + + return Status::OK(); +} + +// contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ +// IMPORTANT: 1 IOBlock produces 1 DataBuffer +bool AlbumOp::GetNextRow(std::unordered_map> *map_row) { + if (map_row == nullptr) { + MS_LOG(WARNING) << "GetNextRow in AlbumOp: the point of map_row is nullptr"; + return false; + } + + if (current_cnt_ == image_rows_.size()) { + return false; + } + + Status ret = LoadTensorRow(current_cnt_, image_rows_[current_cnt_], map_row); + if (ret.IsError()) { + MS_LOG(ERROR) << "GetNextRow in AlbumOp: " << ret.ToString() << "\n"; + return false; + } + current_cnt_++; + return true; +} + +// Only support JPEG/PNG/GIF/BMP +// Optimization: Could take in a tensor +// This function does not return status because we want to just skip bad input, not crash +bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { + std::ifstream file_handle; + constexpr int read_num = 3; + *valid = false; + file_handle.open(file_name, std::ios::binary | std::ios::in); + if (!file_handle.is_open()) { + return false; + } + unsigned char file_type[read_num]; + (void)file_handle.read(reinterpret_cast(file_type), read_num); + + if (file_handle.fail()) { + file_handle.close(); + return false; + } + file_handle.close(); + if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { + // Normal JPEGs start with \xff\xd8\xff\xe0 + // JPEG with EXIF stats with \xff\xd8\xff\xe1 + // Use \xff\xd8\xff to cover both. + *valid = true; + } + return true; +} + +Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorPtr *tensor) { + TensorPtr image; + std::ifstream fs; + fs.open(image_file_path, std::ios::binary | std::ios::in); + if (fs.fail()) { + MS_LOG(WARNING) << "File not found:" << image_file_path << "."; + // If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); + return Status::OK(); + } + // Hack logic to replace png images with empty tensor + Path file(image_file_path); + std::set png_ext = {".png", ".PNG"}; + if (png_ext.find(file.Extension()) != png_ext.end()) { + // load empty tensor since image is not jpg + MS_LOG(INFO) << "PNG!" << image_file_path << "."; + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); + return Status::OK(); + } + // treat bin files separately + std::set bin_ext = {".bin", ".BIN"}; + if (bin_ext.find(file.Extension()) != bin_ext.end()) { + // load empty tensor since image is not jpg + MS_LOG(INFO) << "Bin file found" << image_file_path << "."; + RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, tensor)); + // row->push_back(std::move(image)); + return Status::OK(); + } + + // check that the file is an image before decoding + bool valid = false; + bool check_success = CheckImageType(image_file_path, &valid); + if (!check_success || !valid) { + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); + return Status::OK(); + } + // if it is a jpeg image, load and try to decode + RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image)); + if (decode_ && valid) { + Status rc = Decode(image, tensor); + if (rc.IsError()) { + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); + return Status::OK(); + } + } + // row->push_back(std::move(image)); + return Status::OK(); +} + +Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { + std::vector data = json_obj.get>(); + + MS_LOG(WARNING) << "String array label found: " << data << "."; + // TensorPtr label; + RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); + // row->push_back(std::move(label)); + return Status::OK(); +} + +Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { + std::string data = json_obj; + // now we iterate over the elements in json + + MS_LOG(INFO) << "String label found: " << data << "."; + TensorPtr label; + RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); + // row->push_back(std::move(label)); + return Status::OK(); +} + +Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { + // TensorPtr label; + // consider templating this function to handle all ints + if (data_schema_->column(col_num).type() == DataType::DE_INT64) { + std::vector data; + + // Iterate over the integer list and add those values to the output shape tensor + auto items = json_obj.items(); + using it_type = decltype(items.begin()); + (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); + + RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); + } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { + std::vector data; + + // Iterate over the integer list and add those values to the output shape tensor + auto items = json_obj.items(); + using it_type = decltype(items.begin()); + (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); + + RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); + } else { + RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " + + data_schema_->column(col_num).type().ToString()); + } + // row->push_back(std::move(label)); + return Status::OK(); +} + +Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { + // TensorPtr float_array; + // consider templating this function to handle all ints + if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { + std::vector data; + + // Iterate over the integer list and add those values to the output shape tensor + auto items = json_obj.items(); + using it_type = decltype(items.begin()); + (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); + + RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); + } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { + std::vector data; + + // Iterate over the integer list and add those values to the output shape tensor + auto items = json_obj.items(); + using it_type = decltype(items.begin()); + (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); + + RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); + } else { + RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " + + data_schema_->column(col_num).type().ToString()); + } + // row->push_back(std::move(float_array)); + return Status::OK(); +} + +Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor) { + if (data_schema_->column(col_num).type() == DataType::DE_STRING) { + // TensorPtr id; + RETURN_IF_NOT_OK(Tensor::CreateScalar(file, tensor)); + // row->push_back(std::move(id)); + return Status::OK(); + } + // hack to get the file name without extension, the 1 is to get rid of the backslash character + int64_t image_id = std::atoi(file.substr(1, file.find(".")).c_str()); + // TensorPtr id; + RETURN_IF_NOT_OK(Tensor::CreateScalar(image_id, tensor)); + MS_LOG(INFO) << "File ID " << image_id << "."; + // row->push_back(std::move(id)); + return Status::OK(); +} + +Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) { + // hack to get the file name without extension, the 1 is to get rid of the backslash character + // TensorPtr empty_tensor; + RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor)); + // row->push_back(std::move(empty_tensor)); + return Status::OK(); +} + +// Loads a tensor with float value, issue with float64, we don't have reverse look up to the type +// So we actually have to check what type we want to fill the tensor with. +// Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to +// only be float32, seems like a weird limitation to impose +Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { + // TensorPtr float_tensor; + if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { + double data = json_obj; + MS_LOG(INFO) << "double found: " << json_obj << "."; + RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); + } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { + float data = json_obj; + RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); + MS_LOG(INFO) << "float found: " << json_obj << "."; + } + // row->push_back(std::move(float_tensor)); + return Status::OK(); +} + +// Loads a tensor with int value, we have to cast the value to type specified in the schema. +Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { + // TensorPtr int_tensor; + if (data_schema_->column(col_num).type() == DataType::DE_INT64) { + int64_t data = json_obj; + MS_LOG(INFO) << "int64 found: " << json_obj << "."; + RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); + } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { + int32_t data = json_obj; + RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); + MS_LOG(INFO) << "int32 found: " << json_obj << "."; + } + // row->push_back(std::move(int_tensor)); + return Status::OK(); +} + +// Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer +// possible optimization: the helper functions of LoadTensorRow should be optimized +// to take a reference to a column descriptor? +// the design of this class is to make the code more readable, forgoing minor perfomance gain like +// getting rid of duplicated checks +Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, + std::unordered_map> *map_row) { + // testing here is to just print out file path + // (*row) = TensorRow(row_id, {}); + MS_LOG(INFO) << "Image row file: " << file << "."; + + std::ifstream file_handle(folder_path_ + file); + if (!file_handle.is_open()) { + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file); + } + std::string line; + while (getline(file_handle, line)) { + try { + nlohmann::json js = nlohmann::json::parse(line); + MS_LOG(INFO) << "This Line: " << line << "."; + + // note if take a schema here, then we have to iterate over all column descriptors in schema and check for key + // get columns in schema: + int32_t columns = data_schema_->NumColumns(); + + // loop over each column descriptor, this can optimized by switch cases + for (int32_t i = 0; i < columns; i++) { + // special case to handle + if (data_schema_->column(i).name() == "id") { + // id is internal, special case to load from file + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } + // find if key does not exist, insert placeholder nullptr if not found + if (js.find(data_schema_->column(i).name()) == js.end()) { + // iterator not found, push nullptr as placeholder + MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << "."; + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } + nlohmann::json column_value = js.at(data_schema_->column(i).name()); + MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << "."; + bool is_array = column_value.is_array(); + // load single string + if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) { + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } + // load string array + if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) { + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } + // load image file + if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) { + std::string image_file_path = column_value; + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } + // load float value + if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 || + data_schema_->column(i).type() == DataType::DE_FLOAT64)) { + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } + // load float array + if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 || + data_schema_->column(i).type() == DataType::DE_FLOAT64)) { + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } + // int value + if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 || + data_schema_->column(i).type() == DataType::DE_INT32)) { + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } + // int array + if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 || + data_schema_->column(i).type() == DataType::DE_INT32)) { + TensorPtr tensor; + RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor)); + (*map_row)[data_schema_->column(i).name()] = tensor; + continue; + } else { + MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported."; + continue; + } + } + } catch (const std::exception &err) { + file_handle.close(); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file); + } + } + file_handle.close(); + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/lite/minddata/wrapper/album_op_android.h b/mindspore/lite/minddata/wrapper/album_op_android.h new file mode 100644 index 0000000000..3c2ad0d66a --- /dev/null +++ b/mindspore/lite/minddata/wrapper/album_op_android.h @@ -0,0 +1,173 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/engine/data_buffer.h" +#include "minddata/dataset/engine/data_schema.h" +#include "minddata/dataset/util/path.h" +#include "minddata/dataset/util/queue.h" +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { +// Forward declares +template +class Queue; + +// Define row information as a list of file objects to read +using FolderImages = std::shared_ptr>>; + +/// \class AlbumOp +class AlbumOp { + public: + /// \brief Constructor + /// \param[in] file_dir - directory of Album + /// \param[in] do_decode - decode image files + /// \param[in] schema_file - schema file + /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir + AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, + const std::set &exts); + + /// \brief Constructor + /// \param[in] file_dir - directory of Album + /// \param[in] do_decode - decode image files + /// \param[in] schema_file - schema file + /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir + /// \param[in] index - the specific file index + AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, + const std::set &exts, uint32_t index); + + /// \brief Destructor. + ~AlbumOp() = default; + + /// \brief Initialize AlbumOp related var, calls the function to walk all files + /// \return - The error code returned + Status PrescanEntry(); + + /// \brief Initialize AlbumOp related var, calls the function to walk all files + /// \return - The error code returned + bool GetNextRow(std::unordered_map> *map_row); + + /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP + /// This function could be optimized to return the tensor to reduce open/closing files + /// \return bool - if file is bad then return false + bool CheckImageType(const std::string &file_name, bool *valid); + + // Op name getter + // @return Name of the current Op + std::string Name() const { return "AlbumOp"; } + + private: + /// \brief Load image to tensor + /// \param[in] image_file Image name of file + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorPtr *tensor); + + /// \brief Load vector of ints to tensor, append tensor to tensor + /// \param[in] json_obj Json object containing multi-dimensional label + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + + /// \brief Load vector of floatss to tensor, append tensor to tensor + /// \param[in] json_obj Json object containing array data + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + + /// \brief Load string array into a tensor, append tensor to tensor + /// \param[in] json_obj Json object containing string tensor + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + + /// \brief Load string into a tensor, append tensor to tensor + /// \param[in] json_obj Json object containing string tensor + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + + /// \brief Load float value to tensor + /// \param[in] json_obj Json object containing float + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + + /// \brief Load int value to tensor + /// \param[in] json_obj Json object containing int + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + + /// \brief Load emtpy tensor to tensor + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor); + + /// \brief Load id from file name to tensor + /// \param[in] file The file name to get ID from + /// \param[in] col_num Column num in schema + /// \param[inout] Tensor to push to + /// \return Status The error code returned + Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor); + + /// \brief Load a tensor according to a json file + /// \param[in] row_id_type row_id - id for this tensor row + /// \param[in] ImageColumns file Json file location + /// \param[inout] TensorRow Json content stored into a tensor row + /// \return Status The error code returned + Status LoadTensorRow(row_id_type row_id, const std::string &file, + std::unordered_map> *map_row); + + std::string folder_path_; // directory of image folder + bool decode_; + std::vector columns_to_load_; + std::set extensions_; // extensions allowed + std::unique_ptr data_schema_; + std::string schema_file_; + int64_t row_cnt_; + int64_t current_cnt_; + int64_t buf_cnt_; + int64_t dirname_offset_; + bool sampler_; + int64_t sampler_index_; + std::vector image_rows_; + std::unordered_map column_name_id_map_; +}; +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_