| @@ -49,7 +49,7 @@ usage() | |||||
| echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | ||||
| echo " -D Enable dumping of function graph ir, default on" | echo " -D Enable dumping of function graph ir, default on" | ||||
| echo " -z Compile dataset & mindrecord, default on" | echo " -z Compile dataset & mindrecord, default on" | ||||
| echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv mode in lite predict" | |||||
| echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv, wrapper mode in lite predict" | |||||
| echo " -M Enable MPI and NCCL for GPU training, gpu default on" | echo " -M Enable MPI and NCCL for GPU training, gpu default on" | ||||
| echo " -V Specify the minimum required cuda version, default CUDA 10.1" | echo " -V Specify the minimum required cuda version, default CUDA 10.1" | ||||
| echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation" | echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation" | ||||
| @@ -129,7 +129,7 @@ checkopts() | |||||
| DEBUG_MODE="on" | DEBUG_MODE="on" | ||||
| ;; | ;; | ||||
| n) | n) | ||||
| if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" ]]; then | |||||
| if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" || "X$OPTARG" == "Xwrapper" ]]; then | |||||
| COMPILE_MINDDATA_LITE="$OPTARG" | COMPILE_MINDDATA_LITE="$OPTARG" | ||||
| else | else | ||||
| echo "Invalid value ${OPTARG} for option -n" | echo "Invalid value ${OPTARG} for option -n" | ||||
| @@ -678,7 +678,7 @@ build_lite() | |||||
| build_gtest | build_gtest | ||||
| fi | fi | ||||
| if [ "${COMPILE_MINDDATA_LITE}" == "lite" ] || [ "${COMPILE_MINDDATA_LITE}" == "full" ]; then | |||||
| if [[ "${COMPILE_MINDDATA_LITE}" == "lite" || "${COMPILE_MINDDATA_LITE}" == "full" || "${COMPILE_MINDDATA_LITE}" == "wrapper" ]]; then | |||||
| build_minddata_lite_deps | build_minddata_lite_deps | ||||
| fi | fi | ||||
| @@ -20,7 +20,7 @@ set(OPENCV_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/minddata/third_part | |||||
| set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf) | set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf) | ||||
| set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers) | set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers) | ||||
| if (BUILD_MINDDATA STREQUAL "full") | |||||
| if (BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") | |||||
| install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") | install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") | ||||
| if (PLATFORM_ARM64) | if (PLATFORM_ARM64) | ||||
| install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${COMPONENT_NAME}) | install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${COMPONENT_NAME}) | ||||
| @@ -28,7 +28,7 @@ set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION}) | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") | ||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") | ||||
| set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv or full") | |||||
| set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full") | |||||
| set(BUILD_LITE "on") | set(BUILD_LITE "on") | ||||
| set(PLATFORM_ARM "off") | set(PLATFORM_ARM "off") | ||||
| if (PLATFORM_ARM64 OR PLATFORM_ARM32) | if (PLATFORM_ARM64 OR PLATFORM_ARM32) | ||||
| @@ -182,7 +182,7 @@ if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64) | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full") | |||||
| if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") | |||||
| # add sentencepiece dependency | # add sentencepiece dependency | ||||
| # include(${TOP_DIR}/cmake/external_libs/sentencepiece.cmake) | # include(${TOP_DIR}/cmake/external_libs/sentencepiece.cmake) | ||||
| # json | # json | ||||
| @@ -81,6 +81,12 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/util MINDDATA_UTIL_SRC_FILES) | |||||
| AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES) | AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES) | ||||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||||
| if (BUILD_MINDDATA STREQUAL "full") | |||||
| set(BUILD_MINDDATA "wrapper") | |||||
| endif () | |||||
| endif () | |||||
| if (BUILD_MINDDATA STREQUAL "full") | if (BUILD_MINDDATA STREQUAL "full") | ||||
| include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image") | include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image") | ||||
| list(REMOVE_ITEM MINDDATA_API_SRC_FILES | list(REMOVE_ITEM MINDDATA_API_SRC_FILES | ||||
| @@ -114,102 +120,102 @@ if (BUILD_MINDDATA STREQUAL "full") | |||||
| ) | ) | ||||
| list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES | list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES | ||||
| "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||||
| ) | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc" | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES | list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES | ||||
| "${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc" | |||||
| ) | |||||
| "${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES | list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES | ||||
| "${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc" | |||||
| ) | |||||
| "${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_OPT_PRE_SRC_FILES | list(REMOVE_ITEM MINDDATA_ENGINE_OPT_PRE_SRC_FILES | ||||
| "${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc" | |||||
| "${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc" | |||||
| ) | |||||
| "${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc" | |||||
| "${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_CACHE_SRC_FILES | list(REMOVE_ITEM MINDDATA_ENGINE_IR_CACHE_SRC_FILES | ||||
| "${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc" | |||||
| ) | |||||
| "${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES | list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES | ||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc" | |||||
| ) | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc" | |||||
| "${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc" | |||||
| ) | |||||
| list(REMOVE_ITEM MINDDATA_KERNELS_IMAGE_SRC_FILES | list(REMOVE_ITEM MINDDATA_KERNELS_IMAGE_SRC_FILES | ||||
| "${MINDDATA_DIR}/kernels/image/affine_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/bounding_box_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/concatenate_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/equalize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/image_utils.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/invert_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/math_utils.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/pad_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/posterize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_affine_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rescale_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/sharpness_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/solarize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_color_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/affine_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/bounding_box_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/concatenate_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/equalize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/image_utils.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/invert_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/math_utils.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/pad_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/posterize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_affine_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_resize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rescale_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/sharpness_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/solarize_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc" | |||||
| "${MINDDATA_DIR}/kernels/image/random_color_op.cc" | |||||
| ) | ) | ||||
| list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES | list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES | ||||
| @@ -239,47 +245,114 @@ if (BUILD_MINDDATA STREQUAL "full") | |||||
| include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache") | include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache") | ||||
| if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) | if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) | ||||
| set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) | |||||
| set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) | |||||
| endif () | endif () | ||||
| add_library(minddata-lite SHARED | add_library(minddata-lite SHARED | ||||
| ${MINDDATA_API_SRC_FILES} | ${MINDDATA_API_SRC_FILES} | ||||
| ${MINDDATA_CALLBACK_SRC_FILES} | |||||
| ${MINDDATA_CORE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_SRC_FILES} | |||||
| #${MINDDATA_ENGINE_CACHE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_CONSUMERS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_CACHE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_POST_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_PRE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_PERF_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} | |||||
| ${MINDDATA_KERNELS_IMAGE_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_DATA_SRC_FILES} | |||||
| ${MINDDATA_UTIL_SRC_FILES} | |||||
| ${MINDDATA_EXAMPLE_SRC} | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc | |||||
| ${CORE_DIR}/utils/ms_utils.cc | |||||
| ) | |||||
| ${MINDDATA_CALLBACK_SRC_FILES} | |||||
| ${MINDDATA_CORE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_SRC_FILES} | |||||
| #${MINDDATA_ENGINE_CACHE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_CONSUMERS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_CACHE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_POST_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_PRE_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES} | |||||
| ${MINDDATA_ENGINE_PERF_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} | |||||
| ${MINDDATA_KERNELS_IMAGE_SRC_FILES} | |||||
| ${MINDDATA_KERNELS_DATA_SRC_FILES} | |||||
| ${MINDDATA_UTIL_SRC_FILES} | |||||
| ${MINDDATA_EXAMPLE_SRC} | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc | |||||
| ${CORE_DIR}/utils/ms_utils.cc | |||||
| ) | |||||
| find_package(Threads REQUIRED) | find_package(Threads REQUIRED) | ||||
| target_link_libraries(minddata-lite | target_link_libraries(minddata-lite | ||||
| securec | |||||
| jpeg-turbo | |||||
| jpeg | |||||
| mindspore::json | |||||
| Threads::Threads | |||||
| ) | |||||
| securec | |||||
| jpeg-turbo | |||||
| jpeg | |||||
| mindspore::json | |||||
| Threads::Threads | |||||
| ) | |||||
| # ref: https://github.com/android/ndk/issues/1202 | |||||
| if (PLATFORM_ARM32) | |||||
| file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a) | |||||
| if (LIBCLANG_RT_LIB STREQUAL "") | |||||
| MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}") | |||||
| endif() | |||||
| target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB}) | |||||
| endif() | |||||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||||
| target_link_libraries(minddata-lite log) | |||||
| elseif (BUILD_MINDDATA_EXAMPLE) | |||||
| endif() | |||||
| elseif (BUILD_MINDDATA STREQUAL "wrapper") | |||||
| include_directories("${MINDDATA_DIR}/kernels/image") | |||||
| include_directories("${MINDDATA_DIR}/util") | |||||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/wrapper) | |||||
| set(MINDDATA_TODAPI_SRC | |||||
| ${MINDDATA_DIR}/core/tensor_shape.cc | |||||
| ${MINDDATA_DIR}/core/tensor.cc | |||||
| ${MINDDATA_DIR}/core/config_manager.cc | |||||
| ${MINDDATA_DIR}/core/data_type.cc | |||||
| ${MINDDATA_DIR}/core/tensor_helpers.cc | |||||
| ${MINDDATA_DIR}/core/global_context.cc | |||||
| ${MINDDATA_DIR}/core/tensor_row.cc | |||||
| ${MINDDATA_DIR}/api/vision.cc | |||||
| ${MINDDATA_DIR}/api/execute.cc | |||||
| ${MINDDATA_DIR}/api/transforms.cc | |||||
| ${MINDDATA_DIR}/api/de_tensor.cc | |||||
| ${MINDDATA_DIR}/util/path.cc | |||||
| ${MINDDATA_DIR}/util/status.cc | |||||
| ${MINDDATA_DIR}/util/data_helper.cc | |||||
| ${MINDDATA_DIR}/util/memory_pool.cc | |||||
| ${MINDDATA_DIR}/engine/data_schema.cc | |||||
| ${MINDDATA_DIR}/kernels/tensor_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/lite_image_utils.cc | |||||
| ${MINDDATA_DIR}/kernels/image/center_crop_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/crop_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/normalize_op.cc | |||||
| ${MINDDATA_DIR}/kernels/image/resize_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/compose_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/duplicate_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/one_hot_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/random_apply_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/random_choice_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/type_cast_op.cc | |||||
| ${MINDDATA_DIR}/kernels/data/data_utils.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/album_op_android.cc | |||||
| ) | |||||
| add_library(minddata-lite SHARED | |||||
| ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc | |||||
| ${CORE_DIR}/utils/ms_utils.cc | |||||
| ${MINDDATA_TODAPI_SRC} | |||||
| ) | |||||
| find_package(Threads REQUIRED) | |||||
| target_link_libraries(minddata-lite | |||||
| securec | |||||
| jpeg-turbo | |||||
| jpeg | |||||
| mindspore::json | |||||
| Threads::Threads | |||||
| ) | |||||
| # ref: https://github.com/android/ndk/issues/1202 | # ref: https://github.com/android/ndk/issues/1202 | ||||
| if (PLATFORM_ARM32) | if (PLATFORM_ARM32) | ||||
| @@ -293,11 +366,6 @@ if (BUILD_MINDDATA STREQUAL "full") | |||||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | if (PLATFORM_ARM32 OR PLATFORM_ARM64) | ||||
| target_link_libraries(minddata-lite log) | target_link_libraries(minddata-lite log) | ||||
| elseif (BUILD_MINDDATA_EXAMPLE) | elseif (BUILD_MINDDATA_EXAMPLE) | ||||
| # add_executable(mdlite-example ${CMAKE_CURRENT_SOURCE_DIR}/example/x86-example.cc) | |||||
| # target_link_libraries(mdlite-example minddata-lite) | |||||
| # add_custom_command(TARGET mdlite-example POST_BUILD | |||||
| # COMMAND cp -rf ${CMAKE_CURRENT_SOURCE_DIR}/example/testCifar10Data ${CMAKE_BINARY_DIR}/minddata | |||||
| # ) | |||||
| endif() | endif() | ||||
| elseif (BUILD_MINDDATA STREQUAL "lite") | elseif (BUILD_MINDDATA STREQUAL "lite") | ||||
| list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES "${MINDDATA_DIR}/core/client.cc") | list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES "${MINDDATA_DIR}/core/client.cc") | ||||
| @@ -374,9 +442,6 @@ elseif (BUILD_MINDDATA STREQUAL "lite") | |||||
| securec | securec | ||||
| jpeg-turbo | jpeg-turbo | ||||
| jpeg | jpeg | ||||
| # opencv_core | |||||
| # opencv_imgcodecs | |||||
| # opencv_imgproc | |||||
| mindspore::json | mindspore::json | ||||
| ) | ) | ||||
| @@ -13,7 +13,7 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "MDToDApi.h" | |||||
| #include "MDToDApi.h" //NOLINT | |||||
| #include <string> | #include <string> | ||||
| #include <fstream> | #include <fstream> | ||||
| @@ -22,7 +22,8 @@ | |||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #include "minddata/dataset/include/datasets.h" | |||||
| #include "album_op_android.h" //NOLINT | |||||
| #include "minddata/dataset/include/execute.h" | #include "minddata/dataset/include/execute.h" | ||||
| #include "minddata/dataset/util/path.h" | #include "minddata/dataset/util/path.h" | ||||
| #include "minddata/dataset/include/vision.h" | #include "minddata/dataset/include/vision.h" | ||||
| @@ -35,7 +36,7 @@ | |||||
| using mindspore::dataset::Path; | using mindspore::dataset::Path; | ||||
| using mindspore::dataset::Tensor; | using mindspore::dataset::Tensor; | ||||
| using mindspore::dataset; | |||||
| using TensorOperation = mindspore::dataset::TensorOperation; | |||||
| using mindspore::LogStream; | using mindspore::LogStream; | ||||
| using mindspore::MsLogLevel::DEBUG; | using mindspore::MsLogLevel::DEBUG; | ||||
| @@ -48,22 +49,21 @@ using mindspore::dataset::Status; | |||||
| class MDToDApi { | class MDToDApi { | ||||
| public: | public: | ||||
| std::shared_ptr<Dataset> _ds; | |||||
| std::shared_ptr<Iterator> _iter; | |||||
| std::shared_ptr<mindspore::dataset::AlbumOp> _iter; | |||||
| std::vector<std::shared_ptr<TensorOperation>> _augs; | std::vector<std::shared_ptr<TensorOperation>> _augs; | ||||
| std::string _storage_folder; | std::string _storage_folder; | ||||
| std::string _folder_path; | std::string _folder_path; | ||||
| bool _hasBatch; | bool _hasBatch; | ||||
| int64_t _file_id; | int64_t _file_id; | ||||
| MDToDApi() : _ds(nullptr), _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { | |||||
| MS_LOG(WARNING) << "MDToDAPI Call constructor"; | |||||
| public: | |||||
| MDToDApi() : _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { | |||||
| MS_LOG(WARNING) << "MDToDAPI Call constractor"; | |||||
| } | } | ||||
| ~MDToDApi() { | ~MDToDApi() { | ||||
| MS_LOG(WARNING) << "MDToDAPI Call destructor"; | |||||
| MS_LOG(WARNING) << "MDToDAPI Call destractor"; | |||||
| // derefernce dataset and iterator | |||||
| _augs.clear(); | _augs.clear(); | ||||
| _ds = nullptr; | |||||
| _iter = nullptr; | |||||
| } | } | ||||
| }; | }; | ||||
| @@ -79,7 +79,9 @@ std::vector<std::string> MDToDBuffToVector(MDToDBuff_t StrBuff) { | |||||
| return strVector; | return strVector; | ||||
| } | } | ||||
| extern "C" int MDToDApi_pathTest(const char *path) { | |||||
| extern "C" | |||||
| int MDToDApi_pathTest(const char* path) { | |||||
| Path f(path); | Path f(path); | ||||
| MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath(); | MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath(); | ||||
| // Print out the first few items in the directory | // Print out the first few items in the directory | ||||
| @@ -114,36 +116,31 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { | |||||
| if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) { | if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) { | ||||
| std::vector<int> Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2); | std::vector<int> Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2); | ||||
| std::shared_ptr<TensorOperation> resize_op = vision::Resize(Resize); | |||||
| std::shared_ptr<TensorOperation> resize_op = mindspore::dataset::vision::Resize(Resize); | |||||
| assert(resize_op != nullptr); | assert(resize_op != nullptr); | ||||
| MS_LOG(WARNING) << "Push back resize"; | MS_LOG(WARNING) << "Push back resize"; | ||||
| mapOperations.push_back(resize_op); | mapOperations.push_back(resize_op); | ||||
| // hasBatch = true; Batch not currently supported inMInddata-Lite | |||||
| } | } | ||||
| if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) { | if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) { | ||||
| std::vector<int> Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2); | std::vector<int> Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2); | ||||
| std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop(Crop); | |||||
| std::shared_ptr<TensorOperation> center_crop_op = mindspore::dataset::vision::CenterCrop(Crop); | |||||
| assert(center_crop_op != nullptr); | assert(center_crop_op != nullptr); | ||||
| MS_LOG(WARNING) << "Push back crop"; | MS_LOG(WARNING) << "Push back crop"; | ||||
| mapOperations.push_back(center_crop_op); | mapOperations.push_back(center_crop_op); | ||||
| // hasBatch = true; Batch not currently supported inMInddata-Lite | |||||
| } | } | ||||
| } | } | ||||
| std::shared_ptr<Dataset> ds = nullptr; | |||||
| MS_LOG(INFO) << "Read id =" << MDConf.fileid << " (-1) for all"; | |||||
| MS_LOG(INFO) << "Read id=" << MDConf.fileid << " (-1) for all"; | |||||
| std::shared_ptr<mindspore::dataset::AlbumOp> iter = nullptr; | |||||
| const std::set<std::string> exts = {}; | |||||
| if (MDConf.fileid > -1) { | if (MDConf.fileid > -1) { | ||||
| // read specific image using SequentialSampler | |||||
| ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(MDConf.fileid, 1L)); | |||||
| // read specific image using SequentialSampler witn | |||||
| iter = std::make_shared<mindspore::dataset::AlbumOp>(folder_path, true, schema_file, exts, MDConf.fileid); | |||||
| } else { | } else { | ||||
| // Distributed sampler takes num_shards then shard_id | |||||
| ds = Album(folder_path, schema_file, column_names, true, SequentialSampler()); | |||||
| iter = std::make_shared<mindspore::dataset::AlbumOp>(folder_path, true, schema_file, exts); | |||||
| } | } | ||||
| ds = ds->SetNumWorkers(1); | |||||
| assert(ds != nullptr); | |||||
| // Create a Repeat operation on ds | |||||
| int32_t repeat_num = 1; | |||||
| ds = ds->Repeat(repeat_num); | |||||
| assert(ds != nullptr); | |||||
| // Create objects for the tensor ops | // Create objects for the tensor ops | ||||
| MS_LOG(INFO) << " Create pipline parameters"; | MS_LOG(INFO) << " Create pipline parameters"; | ||||
| @@ -154,16 +151,7 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { | |||||
| } | } | ||||
| bool hasBatch = false; | bool hasBatch = false; | ||||
| // Create an iterator over the result of the above dataset | |||||
| // This will trigger the creation of the Execution Tree and launch it. | |||||
| std::shared_ptr<Iterator> iter = ds->CreateIterator(); | |||||
| if (nullptr == iter) { | |||||
| MS_LOG(ERROR) << "Iterator creation failed"; | |||||
| return nullptr; | |||||
| } | |||||
| assert(iter != nullptr); | |||||
| MDToDApi *pMDToDApi = new MDToDApi; | MDToDApi *pMDToDApi = new MDToDApi; | ||||
| pMDToDApi->_ds = ds; | |||||
| pMDToDApi->_iter = iter; | pMDToDApi->_iter = iter; | ||||
| pMDToDApi->_augs = mapOperations; | pMDToDApi->_augs = mapOperations; | ||||
| pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath); | pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath); | ||||
| @@ -173,11 +161,11 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { | |||||
| } | } | ||||
| template <typename T> | template <typename T> | ||||
| void MDBuffToVector(MDToDBuff_t MDBuff, std::vector<T> *vec) { | |||||
| vec.clear(); | |||||
| void MDBuffToVector(const MDToDBuff_t MDBuff, std::vector<T> *vec) { | |||||
| vec->clear(); | |||||
| if (MDBuff.DataSize > 0) { | if (MDBuff.DataSize > 0) { | ||||
| int nofElements = MDBuff.DataSize / sizeof(T); | int nofElements = MDBuff.DataSize / sizeof(T); | ||||
| *vec.assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements); | |||||
| vec->assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements); | |||||
| } | } | ||||
| } | } | ||||
| @@ -217,7 +205,7 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro | |||||
| resBuff->TensorSize[0] = 1; | resBuff->TensorSize[0] = 1; | ||||
| } | } | ||||
| if (column->shape()[firstDim] > 0) { | if (column->shape()[firstDim] > 0) { | ||||
| if (DataType::DE_STRING == column->type()) { | |||||
| if (mindspore::dataset::DataType::DE_STRING == column->type()) { | |||||
| std::string str; | std::string str; | ||||
| for (int ix = 0; ix < column->shape()[firstDim]; ix++) { | for (int ix = 0; ix < column->shape()[firstDim]; ix++) { | ||||
| std::string_view strView; | std::string_view strView; | ||||
| @@ -238,14 +226,14 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro | |||||
| MS_LOG(ERROR) << "memcpy_s return: " << ret; | MS_LOG(ERROR) << "memcpy_s return: " << ret; | ||||
| } | } | ||||
| } else { | } else { | ||||
| DataHelper dh; | |||||
| mindspore::dataset::DataHelper dh; | |||||
| resBuff->DataSize = | resBuff->DataSize = | ||||
| dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize); | dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize); | ||||
| } | } | ||||
| MS_LOG(INFO) << columnName << " " << resBuff->DataSize | MS_LOG(INFO) << columnName << " " << resBuff->DataSize | ||||
| << " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") "; | << " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") "; | ||||
| if (0 == resBuff->DataSize) { | if (0 == resBuff->DataSize) { | ||||
| MS_LOG(ERROR) << "Copy Failed!!!! " << columnName << " Too large" | |||||
| MS_LOG(ERROR) << "COPY FAIL!!!! " << columnName << " Too large" | |||||
| << "."; // memcpy failed | << "."; // memcpy failed | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -259,7 +247,7 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro | |||||
| extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | ||||
| MS_LOG(INFO) << "Start GetNext"; | MS_LOG(INFO) << "Start GetNext"; | ||||
| if (pMDToDApi == nullptr) { | if (pMDToDApi == nullptr) { | ||||
| MS_LOG(ERROR) << "GetNext called with nullptr. Abort"; | |||||
| MS_LOG(ERROR) << "GetNext called with null ptr. abort"; | |||||
| assert(pMDToDApi != nullptr); | assert(pMDToDApi != nullptr); | ||||
| } | } | ||||
| @@ -271,12 +259,13 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | |||||
| // get next row for dataset | // get next row for dataset | ||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | std::unordered_map<std::string, std::shared_ptr<Tensor>> row; | ||||
| if (pMDToDApi->_iter == nullptr) { | if (pMDToDApi->_iter == nullptr) { | ||||
| MS_LOG(ERROR) << "GetNext called with no iterator. abort"; | |||||
| MS_LOG(ERROR) << "GetNext called with no iteratoe. abort"; | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| // create Execute functions, this replaces Map in Pipeline | // create Execute functions, this replaces Map in Pipeline | ||||
| pMDToDApi->_iter->GetNextRow(&row); | |||||
| if (row.size() != 0) { | |||||
| bool ret = pMDToDApi->_iter->GetNextRow(&row); | |||||
| if (row.size() != 0 && ret) { | |||||
| if ((pMDToDApi->_augs).size() > 0) { | if ((pMDToDApi->_augs).size() > 0) { | ||||
| // String and Tensors | // String and Tensors | ||||
| GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); | GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); | ||||
| @@ -285,7 +274,7 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | |||||
| for (int i = 0; i < (pMDToDApi->_augs).size(); i++) { | for (int i = 0; i < (pMDToDApi->_augs).size(); i++) { | ||||
| // each Execute call will invoke a memcpy, this cannot really be optimized further | // each Execute call will invoke a memcpy, this cannot really be optimized further | ||||
| // for this use case, std move is added for fail save. | // for this use case, std move is added for fail save. | ||||
| row["image"] = Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); | |||||
| row["image"] = mindspore::dataset::Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); | |||||
| if (row["image"] == nullptr) { | if (row["image"] == nullptr) { | ||||
| // nullptr means that the eager mode image processing failed, we fail in this case | // nullptr means that the eager mode image processing failed, we fail in this case | ||||
| return -1; | return -1; | ||||
| @@ -316,20 +305,18 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { | |||||
| extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) { | extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) { | ||||
| // Manually terminate the pipeline | // Manually terminate the pipeline | ||||
| pMDToDApi->_iter->Stop(); | |||||
| MS_LOG(WARNING) << "pipline stoped"; | MS_LOG(WARNING) << "pipline stoped"; | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) { | extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) { | ||||
| MS_LOG(WARNING) << "pipeline deleted start"; | |||||
| pMDToDApi->_iter->Stop(); | |||||
| MS_LOG(WARNING) << "pipline deleted start"; | |||||
| delete pMDToDApi; | delete pMDToDApi; | ||||
| MS_LOG(WARNING) << "pipeline deleted end"; | |||||
| MS_LOG(WARNING) << "pipline deleted end"; | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { | |||||
| int GetJsonFullFileName(const MDToDApi *pMDToDApi, std::string *filePath) { | |||||
| int64_t file_id = pMDToDApi->_file_id; | int64_t file_id = pMDToDApi->_file_id; | ||||
| if (file_id < 0) { | if (file_id < 0) { | ||||
| MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << "."; | MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << "."; | ||||
| @@ -343,12 +330,12 @@ int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { | |||||
| extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, | extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, | ||||
| size_t emmbeddingsSize) { | size_t emmbeddingsSize) { | ||||
| auto columnName = std::string(column); | auto columnName = std::string(column); | ||||
| MS_LOG(INFO) << "Start update " << columnName; | |||||
| MS_LOG(INFO) << "Start Update " << columnName; | |||||
| std::string converted = std::to_string(pMDToDApi->_file_id); | std::string converted = std::to_string(pMDToDApi->_file_id); | ||||
| std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin"; | std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin"; | ||||
| DataHelper dh; | |||||
| MS_LOG(INFO) << "Try to save file " << embedding_file_path; | |||||
| mindspore::dataset::DataHelper dh; | |||||
| MS_LOG(INFO) << "Try to Save file " << embedding_file_path; | |||||
| std::vector<float> bin_content(emmbeddings, emmbeddings + emmbeddingsSize); | std::vector<float> bin_content(emmbeddings, emmbeddings + emmbeddingsSize); | ||||
| Status rc = dh.template WriteBinFile<float>(embedding_file_path, bin_content); | Status rc = dh.template WriteBinFile<float>(embedding_file_path, bin_content); | ||||
| if (rc.IsError()) { | if (rc.IsError()) { | ||||
| @@ -379,8 +366,8 @@ extern "C" int MDToDApi_UpdateStringArray(MDToDApi *pMDToDApi, const char *colum | |||||
| MS_LOG(ERROR) << "Failed to update " << columnName; | MS_LOG(ERROR) << "Failed to update " << columnName; | ||||
| return -1; | return -1; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Start Update string array column: " << columnName << " in file " << file_path; | |||||
| DataHelper dh; | |||||
| MS_LOG(INFO) << "Start Update string Array column: " << columnName << " in file " << file_path; | |||||
| mindspore::dataset::DataHelper dh; | |||||
| std::vector<std::string> strVec; | std::vector<std::string> strVec; | ||||
| if (MDbuff.DataSize > 0) { | if (MDbuff.DataSize > 0) { | ||||
| const char *p = reinterpret_cast<char *>(MDbuff.Buff); | const char *p = reinterpret_cast<char *>(MDbuff.Buff); | ||||
| @@ -405,7 +392,7 @@ extern "C" int MDToDApi_UpdateFloatArray(MDToDApi *pMDToDApi, const char *column | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path; | MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path; | ||||
| DataHelper dh; | |||||
| mindspore::dataset::DataHelper dh; | |||||
| std::vector<float> vec; | std::vector<float> vec; | ||||
| MDBuffToVector<float>(MDBuff, &vec); | MDBuffToVector<float>(MDBuff, &vec); | ||||
| Status rc = dh.UpdateArray<float>(file_path, columnName, vec); | Status rc = dh.UpdateArray<float>(file_path, columnName, vec); | ||||
| @@ -423,7 +410,7 @@ extern "C" int MDToDApi_UpdateIsForTrain(MDToDApi *pMDToDApi, int32_t isForTrain | |||||
| if (file_id < 0) return -1; | if (file_id < 0) return -1; | ||||
| std::string converted = std::to_string(pMDToDApi->_file_id); | std::string converted = std::to_string(pMDToDApi->_file_id); | ||||
| std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; | std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; | ||||
| DataHelper dh; | |||||
| mindspore::dataset::DataHelper dh; | |||||
| MS_LOG(INFO) << "Updating file: " << file_path; | MS_LOG(INFO) << "Updating file: " << file_path; | ||||
| Status rc = dh.UpdateValue<int32_t>(file_path, "_isForTrain", isForTrain, ""); | Status rc = dh.UpdateValue<int32_t>(file_path, "_isForTrain", isForTrain, ""); | ||||
| if (rc.IsError()) { | if (rc.IsError()) { | ||||
| @@ -440,7 +427,7 @@ extern "C" int MDToDApi_UpdateNoOfFaces(MDToDApi *pMDToDApi, int32_t noOfFaces) | |||||
| if (file_id < 0) return -1; | if (file_id < 0) return -1; | ||||
| std::string converted = std::to_string(pMDToDApi->_file_id); | std::string converted = std::to_string(pMDToDApi->_file_id); | ||||
| std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; | std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; | ||||
| DataHelper dh; | |||||
| mindspore::dataset::DataHelper dh; | |||||
| MS_LOG(INFO) << "Updating file: " << file_path; | MS_LOG(INFO) << "Updating file: " << file_path; | ||||
| Status rc = dh.UpdateValue<int32_t>(file_path, "_noOfFaces", noOfFaces, ""); | Status rc = dh.UpdateValue<int32_t>(file_path, "_noOfFaces", noOfFaces, ""); | ||||
| if (rc.IsError()) { | if (rc.IsError()) { | ||||
| @@ -0,0 +1,470 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "album_op_android.h" //NOLINT | |||||
| #include <fstream> | |||||
| #include <iomanip> | |||||
| #include "minddata/dataset/core/tensor_shape.h" | |||||
| #include "minddata/dataset/kernels/image/lite_image_utils.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, | |||||
| const std::set<std::string> &exts) | |||||
| : folder_path_(file_dir), | |||||
| decode_(do_decode), | |||||
| extensions_(exts), | |||||
| schema_file_(schema_file), | |||||
| row_cnt_(0), | |||||
| buf_cnt_(0), | |||||
| current_cnt_(0), | |||||
| dirname_offset_(0), | |||||
| sampler_(false), | |||||
| sampler_index_(0) { | |||||
| PrescanEntry(); | |||||
| } | |||||
| AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, | |||||
| const std::set<std::string> &exts, uint32_t index) | |||||
| : folder_path_(file_dir), | |||||
| decode_(do_decode), | |||||
| extensions_(exts), | |||||
| schema_file_(schema_file), | |||||
| row_cnt_(0), | |||||
| buf_cnt_(0), | |||||
| current_cnt_(0), | |||||
| dirname_offset_(0), | |||||
| sampler_(true), | |||||
| sampler_index_(0) { | |||||
| PrescanEntry(); | |||||
| } | |||||
| // Helper function for string comparison | |||||
| // album sorts the files via numerical values, so this is not a simple string comparison | |||||
| bool StrComp(const std::string &a, const std::string &b) { | |||||
| // returns 1 if string "a" represent a numeric value less than string "b" | |||||
| // the following will always return name, provided there is only one "." character in name | |||||
| // "." character is guaranteed to exist since the extension is checked befor this function call. | |||||
| int64_t value_a = std::atoi(a.substr(1, a.find(".")).c_str()); | |||||
| int64_t value_b = std::atoi(b.substr(1, b.find(".")).c_str()); | |||||
| return value_a < value_b; | |||||
| } | |||||
| // Single thread to go through the folder directory and gets all file names | |||||
| // calculate numRows then return | |||||
| Status AlbumOp::PrescanEntry() { | |||||
| data_schema_ = std::make_unique<DataSchema>(); | |||||
| Path schema_file(schema_file_); | |||||
| if (schema_file_ == "" || !schema_file.Exists()) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid file, schema_file is invalid or not set: " + schema_file_); | |||||
| } else { | |||||
| MS_LOG(WARNING) << "Schema file provided: " << schema_file_ << "."; | |||||
| data_schema_->LoadSchemaFile(schema_file_, columns_to_load_); | |||||
| } | |||||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||||
| } | |||||
| Path folder(folder_path_); | |||||
| dirname_offset_ = folder_path_.length(); | |||||
| std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder); | |||||
| if (folder.Exists() == false || dirItr == nullptr) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_); | |||||
| } | |||||
| MS_LOG(WARNING) << "Album folder Path found: " << folder_path_ << "."; | |||||
| while (dirItr->hasNext()) { | |||||
| Path file = dirItr->next(); | |||||
| if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { | |||||
| (void)image_rows_.push_back(file.toString().substr(dirname_offset_)); | |||||
| } else { | |||||
| MS_LOG(WARNING) << "Album operator unsupported file found: " << file.toString() | |||||
| << ", extension: " << file.Extension() << "."; | |||||
| } | |||||
| } | |||||
| std::sort(image_rows_.begin(), image_rows_.end(), StrComp); | |||||
| if (image_rows_.size() == 0) { | |||||
| RETURN_STATUS_UNEXPECTED( | |||||
| "Invalid data, no valid data matching the dataset API AlbumDataset. Please check file path or dataset API."); | |||||
| } | |||||
| if (sampler_) { | |||||
| if (sampler_index_ < 0 || sampler_index_ >= image_rows_.size()) { | |||||
| RETURN_STATUS_UNEXPECTED("the sampler index was out of range"); | |||||
| } | |||||
| std::vector<std::string> tmp; | |||||
| tmp.emplace_back(image_rows_[sampler_index_]); | |||||
| image_rows_.clear(); | |||||
| image_rows_ = tmp; | |||||
| } | |||||
| return Status::OK(); | |||||
| } | |||||
| // contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_ | |||||
| // IMPORTANT: 1 IOBlock produces 1 DataBuffer | |||||
| bool AlbumOp::GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) { | |||||
| if (map_row == nullptr) { | |||||
| MS_LOG(WARNING) << "GetNextRow in AlbumOp: the point of map_row is nullptr"; | |||||
| return false; | |||||
| } | |||||
| if (current_cnt_ == image_rows_.size()) { | |||||
| return false; | |||||
| } | |||||
| Status ret = LoadTensorRow(current_cnt_, image_rows_[current_cnt_], map_row); | |||||
| if (ret.IsError()) { | |||||
| MS_LOG(ERROR) << "GetNextRow in AlbumOp: " << ret.ToString() << "\n"; | |||||
| return false; | |||||
| } | |||||
| current_cnt_++; | |||||
| return true; | |||||
| } | |||||
| // Only support JPEG/PNG/GIF/BMP | |||||
| // Optimization: Could take in a tensor | |||||
| // This function does not return status because we want to just skip bad input, not crash | |||||
| bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { | |||||
| std::ifstream file_handle; | |||||
| constexpr int read_num = 3; | |||||
| *valid = false; | |||||
| file_handle.open(file_name, std::ios::binary | std::ios::in); | |||||
| if (!file_handle.is_open()) { | |||||
| return false; | |||||
| } | |||||
| unsigned char file_type[read_num]; | |||||
| (void)file_handle.read(reinterpret_cast<char *>(file_type), read_num); | |||||
| if (file_handle.fail()) { | |||||
| file_handle.close(); | |||||
| return false; | |||||
| } | |||||
| file_handle.close(); | |||||
| if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { | |||||
| // Normal JPEGs start with \xff\xd8\xff\xe0 | |||||
| // JPEG with EXIF stats with \xff\xd8\xff\xe1 | |||||
| // Use \xff\xd8\xff to cover both. | |||||
| *valid = true; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorPtr *tensor) { | |||||
| TensorPtr image; | |||||
| std::ifstream fs; | |||||
| fs.open(image_file_path, std::ios::binary | std::ios::in); | |||||
| if (fs.fail()) { | |||||
| MS_LOG(WARNING) << "File not found:" << image_file_path << "."; | |||||
| // If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor | |||||
| RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // Hack logic to replace png images with empty tensor | |||||
| Path file(image_file_path); | |||||
| std::set<std::string> png_ext = {".png", ".PNG"}; | |||||
| if (png_ext.find(file.Extension()) != png_ext.end()) { | |||||
| // load empty tensor since image is not jpg | |||||
| MS_LOG(INFO) << "PNG!" << image_file_path << "."; | |||||
| RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // treat bin files separately | |||||
| std::set<std::string> bin_ext = {".bin", ".BIN"}; | |||||
| if (bin_ext.find(file.Extension()) != bin_ext.end()) { | |||||
| // load empty tensor since image is not jpg | |||||
| MS_LOG(INFO) << "Bin file found" << image_file_path << "."; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, tensor)); | |||||
| // row->push_back(std::move(image)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // check that the file is an image before decoding | |||||
| bool valid = false; | |||||
| bool check_success = CheckImageType(image_file_path, &valid); | |||||
| if (!check_success || !valid) { | |||||
| RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // if it is a jpeg image, load and try to decode | |||||
| RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image)); | |||||
| if (decode_ && valid) { | |||||
| Status rc = Decode(image, tensor); | |||||
| if (rc.IsError()) { | |||||
| RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor)); | |||||
| return Status::OK(); | |||||
| } | |||||
| } | |||||
| // row->push_back(std::move(image)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||||
| std::vector<std::string> data = json_obj.get<std::vector<std::string>>(); | |||||
| MS_LOG(WARNING) << "String array label found: " << data << "."; | |||||
| // TensorPtr label; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||||
| // row->push_back(std::move(label)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||||
| std::string data = json_obj; | |||||
| // now we iterate over the elements in json | |||||
| MS_LOG(INFO) << "String label found: " << data << "."; | |||||
| TensorPtr label; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(data, tensor)); | |||||
| // row->push_back(std::move(label)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||||
| // TensorPtr label; | |||||
| // consider templating this function to handle all ints | |||||
| if (data_schema_->column(col_num).type() == DataType::DE_INT64) { | |||||
| std::vector<int64_t> data; | |||||
| // Iterate over the integer list and add those values to the output shape tensor | |||||
| auto items = json_obj.items(); | |||||
| using it_type = decltype(items.begin()); | |||||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||||
| } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { | |||||
| std::vector<int32_t> data; | |||||
| // Iterate over the integer list and add those values to the output shape tensor | |||||
| auto items = json_obj.items(); | |||||
| using it_type = decltype(items.begin()); | |||||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||||
| } else { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " + | |||||
| data_schema_->column(col_num).type().ToString()); | |||||
| } | |||||
| // row->push_back(std::move(label)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||||
| // TensorPtr float_array; | |||||
| // consider templating this function to handle all ints | |||||
| if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { | |||||
| std::vector<double> data; | |||||
| // Iterate over the integer list and add those values to the output shape tensor | |||||
| auto items = json_obj.items(); | |||||
| using it_type = decltype(items.begin()); | |||||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||||
| } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { | |||||
| std::vector<float> data; | |||||
| // Iterate over the integer list and add those values to the output shape tensor | |||||
| auto items = json_obj.items(); | |||||
| using it_type = decltype(items.begin()); | |||||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); | |||||
| } else { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " + | |||||
| data_schema_->column(col_num).type().ToString()); | |||||
| } | |||||
| // row->push_back(std::move(float_array)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor) { | |||||
| if (data_schema_->column(col_num).type() == DataType::DE_STRING) { | |||||
| // TensorPtr id; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, tensor)); | |||||
| // row->push_back(std::move(id)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // hack to get the file name without extension, the 1 is to get rid of the backslash character | |||||
| int64_t image_id = std::atoi(file.substr(1, file.find(".")).c_str()); | |||||
| // TensorPtr id; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(image_id, tensor)); | |||||
| MS_LOG(INFO) << "File ID " << image_id << "."; | |||||
| // row->push_back(std::move(id)); | |||||
| return Status::OK(); | |||||
| } | |||||
| Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) { | |||||
| // hack to get the file name without extension, the 1 is to get rid of the backslash character | |||||
| // TensorPtr empty_tensor; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor)); | |||||
| // row->push_back(std::move(empty_tensor)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // Loads a tensor with float value, issue with float64, we don't have reverse look up to the type | |||||
| // So we actually have to check what type we want to fill the tensor with. | |||||
| // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to | |||||
| // only be float32, seems like a weird limitation to impose | |||||
| Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||||
| // TensorPtr float_tensor; | |||||
| if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { | |||||
| double data = json_obj; | |||||
| MS_LOG(INFO) << "double found: " << json_obj << "."; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, tensor)); | |||||
| } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { | |||||
| float data = json_obj; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data, tensor)); | |||||
| MS_LOG(INFO) << "float found: " << json_obj << "."; | |||||
| } | |||||
| // row->push_back(std::move(float_tensor)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // Loads a tensor with int value, we have to cast the value to type specified in the schema. | |||||
| Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { | |||||
| // TensorPtr int_tensor; | |||||
| if (data_schema_->column(col_num).type() == DataType::DE_INT64) { | |||||
| int64_t data = json_obj; | |||||
| MS_LOG(INFO) << "int64 found: " << json_obj << "."; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, tensor)); | |||||
| } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { | |||||
| int32_t data = json_obj; | |||||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, tensor)); | |||||
| MS_LOG(INFO) << "int32 found: " << json_obj << "."; | |||||
| } | |||||
| // row->push_back(std::move(int_tensor)); | |||||
| return Status::OK(); | |||||
| } | |||||
| // Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer | |||||
| // possible optimization: the helper functions of LoadTensorRow should be optimized | |||||
| // to take a reference to a column descriptor? | |||||
| // the design of this class is to make the code more readable, forgoing minor perfomance gain like | |||||
| // getting rid of duplicated checks | |||||
| Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) { | |||||
| // testing here is to just print out file path | |||||
| // (*row) = TensorRow(row_id, {}); | |||||
| MS_LOG(INFO) << "Image row file: " << file << "."; | |||||
| std::ifstream file_handle(folder_path_ + file); | |||||
| if (!file_handle.is_open()) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file); | |||||
| } | |||||
| std::string line; | |||||
| while (getline(file_handle, line)) { | |||||
| try { | |||||
| nlohmann::json js = nlohmann::json::parse(line); | |||||
| MS_LOG(INFO) << "This Line: " << line << "."; | |||||
| // note if take a schema here, then we have to iterate over all column descriptors in schema and check for key | |||||
| // get columns in schema: | |||||
| int32_t columns = data_schema_->NumColumns(); | |||||
| // loop over each column descriptor, this can optimized by switch cases | |||||
| for (int32_t i = 0; i < columns; i++) { | |||||
| // special case to handle | |||||
| if (data_schema_->column(i).name() == "id") { | |||||
| // id is internal, special case to load from file | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } | |||||
| // find if key does not exist, insert placeholder nullptr if not found | |||||
| if (js.find(data_schema_->column(i).name()) == js.end()) { | |||||
| // iterator not found, push nullptr as placeholder | |||||
| MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << "."; | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } | |||||
| nlohmann::json column_value = js.at(data_schema_->column(i).name()); | |||||
| MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << "."; | |||||
| bool is_array = column_value.is_array(); | |||||
| // load single string | |||||
| if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) { | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } | |||||
| // load string array | |||||
| if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) { | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } | |||||
| // load image file | |||||
| if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) { | |||||
| std::string image_file_path = column_value; | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } | |||||
| // load float value | |||||
| if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 || | |||||
| data_schema_->column(i).type() == DataType::DE_FLOAT64)) { | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } | |||||
| // load float array | |||||
| if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 || | |||||
| data_schema_->column(i).type() == DataType::DE_FLOAT64)) { | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } | |||||
| // int value | |||||
| if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 || | |||||
| data_schema_->column(i).type() == DataType::DE_INT32)) { | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } | |||||
| // int array | |||||
| if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 || | |||||
| data_schema_->column(i).type() == DataType::DE_INT32)) { | |||||
| TensorPtr tensor; | |||||
| RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor)); | |||||
| (*map_row)[data_schema_->column(i).name()] = tensor; | |||||
| continue; | |||||
| } else { | |||||
| MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported."; | |||||
| continue; | |||||
| } | |||||
| } | |||||
| } catch (const std::exception &err) { | |||||
| file_handle.close(); | |||||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file); | |||||
| } | |||||
| } | |||||
| file_handle.close(); | |||||
| return Status::OK(); | |||||
| } | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,173 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_ | |||||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_ | |||||
| #include <deque> | |||||
| #include <memory> | |||||
| #include <queue> | |||||
| #include <string> | |||||
| #include <algorithm> | |||||
| #include <map> | |||||
| #include <set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include <unordered_map> | |||||
| #include "minddata/dataset/core/tensor.h" | |||||
| #include "minddata/dataset/engine/data_buffer.h" | |||||
| #include "minddata/dataset/engine/data_schema.h" | |||||
| #include "minddata/dataset/util/path.h" | |||||
| #include "minddata/dataset/util/queue.h" | |||||
| #include "minddata/dataset/util/status.h" | |||||
| namespace mindspore { | |||||
| namespace dataset { | |||||
| // Forward declares | |||||
| template <typename T> | |||||
| class Queue; | |||||
| // Define row information as a list of file objects to read | |||||
| using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>; | |||||
| /// \class AlbumOp | |||||
| class AlbumOp { | |||||
| public: | |||||
| /// \brief Constructor | |||||
| /// \param[in] file_dir - directory of Album | |||||
| /// \param[in] do_decode - decode image files | |||||
| /// \param[in] schema_file - schema file | |||||
| /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir | |||||
| AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, | |||||
| const std::set<std::string> &exts); | |||||
| /// \brief Constructor | |||||
| /// \param[in] file_dir - directory of Album | |||||
| /// \param[in] do_decode - decode image files | |||||
| /// \param[in] schema_file - schema file | |||||
| /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir | |||||
| /// \param[in] index - the specific file index | |||||
| AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file, | |||||
| const std::set<std::string> &exts, uint32_t index); | |||||
| /// \brief Destructor. | |||||
| ~AlbumOp() = default; | |||||
| /// \brief Initialize AlbumOp related var, calls the function to walk all files | |||||
| /// \return - The error code returned | |||||
| Status PrescanEntry(); | |||||
| /// \brief Initialize AlbumOp related var, calls the function to walk all files | |||||
| /// \return - The error code returned | |||||
| bool GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row); | |||||
| /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP | |||||
| /// This function could be optimized to return the tensor to reduce open/closing files | |||||
| /// \return bool - if file is bad then return false | |||||
| bool CheckImageType(const std::string &file_name, bool *valid); | |||||
| // Op name getter | |||||
| // @return Name of the current Op | |||||
| std::string Name() const { return "AlbumOp"; } | |||||
| private: | |||||
| /// \brief Load image to tensor | |||||
| /// \param[in] image_file Image name of file | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load vector of ints to tensor, append tensor to tensor | |||||
| /// \param[in] json_obj Json object containing multi-dimensional label | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load vector of floatss to tensor, append tensor to tensor | |||||
| /// \param[in] json_obj Json object containing array data | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load string array into a tensor, append tensor to tensor | |||||
| /// \param[in] json_obj Json object containing string tensor | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load string into a tensor, append tensor to tensor | |||||
| /// \param[in] json_obj Json object containing string tensor | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load float value to tensor | |||||
| /// \param[in] json_obj Json object containing float | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load int value to tensor | |||||
| /// \param[in] json_obj Json object containing int | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load emtpy tensor to tensor | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load id from file name to tensor | |||||
| /// \param[in] file The file name to get ID from | |||||
| /// \param[in] col_num Column num in schema | |||||
| /// \param[inout] Tensor to push to | |||||
| /// \return Status The error code returned | |||||
| Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor); | |||||
| /// \brief Load a tensor according to a json file | |||||
| /// \param[in] row_id_type row_id - id for this tensor row | |||||
| /// \param[in] ImageColumns file Json file location | |||||
| /// \param[inout] TensorRow Json content stored into a tensor row | |||||
| /// \return Status The error code returned | |||||
| Status LoadTensorRow(row_id_type row_id, const std::string &file, | |||||
| std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row); | |||||
| std::string folder_path_; // directory of image folder | |||||
| bool decode_; | |||||
| std::vector<std::string> columns_to_load_; | |||||
| std::set<std::string> extensions_; // extensions allowed | |||||
| std::unique_ptr<DataSchema> data_schema_; | |||||
| std::string schema_file_; | |||||
| int64_t row_cnt_; | |||||
| int64_t current_cnt_; | |||||
| int64_t buf_cnt_; | |||||
| int64_t dirname_offset_; | |||||
| bool sampler_; | |||||
| int64_t sampler_index_; | |||||
| std::vector<std::string> image_rows_; | |||||
| std::unordered_map<std::string, int32_t> column_name_id_map_; | |||||
| }; | |||||
| } // namespace dataset | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_ | |||||