Browse Source

[MSLITE][Develop] modify optimize.so to sdot and fp16 so

tags/v1.0.0
ling 5 years ago
parent
commit
a19e6251bc
20 changed files with 165 additions and 93 deletions
  1. +2
    -1
      cmake/package_lite.cmake
  2. +32
    -35
      mindspore/lite/nnacl/CMakeLists.txt
  3. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Border.S
  4. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Center.S
  5. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Row.S
  6. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Border.S
  7. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Center.S
  8. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/Float16ToFloat32.S
  9. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/Float32ToFloat16.S
  10. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/IndirectGemmFp16_16x8.S
  11. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/MatmulFp16.S
  12. +0
    -0
      mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC8Fp16.S
  13. +28
    -0
      mindspore/lite/nnacl/optimize/CMakeLists.txt
  14. +45
    -5
      mindspore/lite/nnacl/optimized_kernel.h
  15. +32
    -16
      mindspore/lite/src/CMakeLists.txt
  16. +6
    -0
      mindspore/lite/src/kernel_registry.cc
  17. +14
    -30
      mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
  18. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc
  19. +2
    -1
      mindspore/lite/test/run_benchmark_nets.sh
  20. +0
    -1
      mindspore/lite/tools/converter/CMakeLists.txt

+ 2
- 1
cmake/package_lite.cmake View File

@@ -58,7 +58,8 @@ if (PLATFORM_ARM64)
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR}/ir/dtype COMPONENT ${COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR}/schema COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
install(FILES ${TOP_DIR}/mindspore/lite/build/nnacl/liboptimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-optimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-fp16.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR} COMPONENT ${COMPONENT_NAME})
elseif (PLATFORM_ARM32)
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})


+ 32
- 35
mindspore/lite/nnacl/CMakeLists.txt View File

@@ -1,45 +1,42 @@
project(nnacl)

set(NNACL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(NNACL_DIR)

########################### optimized files ###########################
file(GLOB OPTIMIZED_ASSEMBLY
${NNACL_DIR}/assembly/opt/*.s
${NNACL_DIR}/assembly/opt/*.S
)

file(GLOB FP16_SRC
${NNACL_DIR}/fp16/*.c
${TOP_DIR}/src/runtime/kernel/arm/fp16/*.cc
)
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
endif()
endif ()

########################### share library build ########################
set(OPTIMIZED_OPS ${NNACL_DIR}/opt_op_handler.c)
########################### files ###########################
file(GLOB KERNEL_SRC
${NNACL_DIR}/*.c
${NNACL_DIR}/fp32/*.c
${NNACL_DIR}/int8/*.c
${NNACL_DIR}/quantization/*.c
)

set_property(SOURCE ${OPTIMIZED_ASSEMBLY} PROPERTY LANGUAGE C)
list(APPEND OPTIMIZED_OPS ${OPTIMIZED_ASSEMBLY} ${FP16_SRC})
if (SUPPORT_TRAIN)
file (GLOB TRAIN_SRC ${NNACL_DIR}/fp32_grad/*.c)
endif()

if (PLATFORM_ARM64)
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
add_library(optimize SHARED ${OPTIMIZED_OPS})
target_link_libraries(
optimize
mindspore-lite
)
set_target_properties(optimize PROPERTIES CLEAN_DIRECT_OUTPUT 1)
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
endif()

if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
add_custom_command(TARGET optimize POST_BUILD
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
${TOP_DIR}/build/nnacl/liboptimize.so)
endif ()

add_custom_command(TARGET optimize POST_BUILD
COMMAND rm -rf ${TOP_DIR}/output/lib/liboptimize.so
COMMAND mkdir -pv ${TOP_DIR}/output/lib
COMMAND cp ${TOP_DIR}/build/nnacl/liboptimize.so ${TOP_DIR}/output/lib)
endif ()
if (PLATFORM_ARM32)
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm32/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
endif()

########################### build nnacl static library ########################
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
add_library(nnacl STATIC ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})

########################### arm64 build optimize library ########################
if (PLATFORM_ARM64)
add_subdirectory(${NNACL_DIR}/optimize)
endif()

mindspore/lite/nnacl/assembly/opt/ConvDwFp16Border.S → mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Border.S View File


mindspore/lite/nnacl/assembly/opt/ConvDwFp16Center.S → mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Center.S View File


mindspore/lite/nnacl/assembly/opt/ConvDwFp16Row.S → mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Row.S View File


mindspore/lite/nnacl/assembly/opt/DeconvDwFp16Border.S → mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Border.S View File


mindspore/lite/nnacl/assembly/opt/DeconvDwFp16Center.S → mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Center.S View File


mindspore/lite/nnacl/assembly/opt/Float16ToFloat32.S → mindspore/lite/nnacl/assembly/fp16/Float16ToFloat32.S View File


mindspore/lite/nnacl/assembly/opt/Float32ToFloat16.S → mindspore/lite/nnacl/assembly/fp16/Float32ToFloat16.S View File


mindspore/lite/nnacl/assembly/opt/IndirectGemmFp16_16x8.S → mindspore/lite/nnacl/assembly/fp16/IndirectGemmFp16_16x8.S View File


mindspore/lite/nnacl/assembly/opt/MatmulFp16.S → mindspore/lite/nnacl/assembly/fp16/MatmulFp16.S View File


mindspore/lite/nnacl/assembly/opt/PostFuncBiasReluC8Fp16.S → mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC8Fp16.S View File


+ 28
- 0
mindspore/lite/nnacl/optimize/CMakeLists.txt View File

@@ -0,0 +1,28 @@
project(optimize)

set(NNACL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(NNACL_DIR)

########################### optimized files ###########################
file(GLOB SDOT_SRC ${NNACL_DIR}/assembly/opt/*.S)
file(GLOB FP16_C_SRC ${NNACL_DIR}/fp16/*.c)
file(GLOB FP16_NEON_SRC ${NNACL_DIR}/assembly/fp16/*.S)

set_property(SOURCE ${SDOT_SRC} PROPERTY LANGUAGE C)
set_property(SOURCE ${FP16_C_SRC} PROPERTY LANGUAGE C)
set_property(SOURCE ${FP16_NEON_SRC} PROPERTY LANGUAGE C)

########################### share library build ########################
list(APPEND SDOT_FILES ${SDOT_SRC})
list(APPEND FP16_FILES ${FP16_C_SRC})
list(APPEND FP16_FILES ${FP16_NEON_SRC})

string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")

add_library(nnacl_optimize STATIC ${SDOT_FILES})
target_link_libraries(nnacl_optimize mindspore-lite)

add_library(nnacl_fp16 STATIC ${FP16_FILES})
target_link_libraries(nnacl_fp16 mindspore-lite)

+ 45
- 5
mindspore/lite/nnacl/optimized_kernel.h View File

@@ -24,14 +24,15 @@
#include <asm/hwcap.h>
#include "nnacl/nnacl_utils.h"
#endif
#include "utils/log_adapter.h"

#define OPTIMIZE_SHARED_LIBRARY_PATH "liboptimize.so"
#define OPTIMIZE_SHARED_LIBRARY_PATH "libmindspore-lite-optimize.so"
#define FLOAT16_SHARED_LIBRARY_PATH "libmindspore-lite-fp16.so"

class OptimizeModule {
public:
OptimizeModule() {
bool support_optimize_ops = false;
bool support_fp16 = false;
#ifdef __ANDROID__
int hwcap_type = 16;
uint32_t hwcap = getHwCap(hwcap_type);
@@ -40,8 +41,7 @@ class OptimizeModule {
#elif defined(__arm__)
if (hwcap & HWCAP_HALF) {
#endif
MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap;
support_fp16 = true;

#ifdef ENABLE_ARM64
}
#elif defined(__arm__)
@@ -57,7 +57,7 @@ class OptimizeModule {
}
#endif
#endif
if (!(support_optimize_ops && support_fp16)) {
if (support_optimize_ops == false) {
return;
}
#ifndef _WIN32
@@ -77,4 +77,44 @@ class OptimizeModule {
void *optimized_op_handler_ = nullptr;
};

class Float16Module {
public:
Float16Module() {
bool support_fp16 = false;
#ifdef __ANDROID__
int hwcap_type = 16;
uint32_t hwcap = getHwCap(hwcap_type);
#ifdef ENABLE_ARM64
if (hwcap & HWCAP_FPHP) {
#elif defined(__arm__)
if (hwcap & HWCAP_HALF) {
#endif
MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap;
support_fp16 = true;
#ifdef ENABLE_ARM64
}
#elif defined(__arm__)
}
#endif
#endif
if (support_fp16 == false) {
return;
}
#ifndef _WIN32
float16_op_handler_ = dlopen(FLOAT16_SHARED_LIBRARY_PATH, RTLD_LAZY);
if (float16_op_handler_ == nullptr) {
MS_LOG(INFO) << "Open optimize shared library failed: " << dlerror();
}
#endif
}

~Float16Module() = default;

static Float16Module *GetInstance() {
static Float16Module fp16_module;
return &fp16_module;
}
void *float16_op_handler_ = nullptr;
};

#endif // MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_

+ 32
- 16
mindspore/lite/src/CMakeLists.txt View File

@@ -1,7 +1,12 @@

set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(${LITE_DIR}/nnacl/)
include_directories(${LITE_DIR}/nnacl/optimize)

if (PLATFORM_ARM32 OR PLATFORM_ARM64)
# for performance
#for performance
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
#-fno-rtti -fno-exceptions
#- fno - rtti - fno - exceptions
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
endif()
@@ -65,21 +70,11 @@ set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
if (SUPPORT_GPU)
add_subdirectory(runtime/kernel/opencl)
target_link_libraries(mindspore-lite
cpu_kernel_mid
opencl_kernel_mid
)
target_link_libraries(mindspore-lite_static
cpu_kernel_mid
opencl_kernel_mid
)
target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl)
target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl)
else ()
target_link_libraries(mindspore-lite
cpu_kernel_mid
)
target_link_libraries(mindspore-lite_static
cpu_kernel_mid
)
target_link_libraries(mindspore-lite cpu_kernel_mid nnacl)
target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl)
endif ()
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(mindspore-lite log)
@@ -106,3 +101,24 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
endif ()
endif ()

########################## build optimize and float16 library #################################3
if (PLATFORM_ARM64)
add_library(mindspore-lite-optimize SHARED)
target_link_libraries(mindspore-lite-optimize cpu_opt_kernel_mid)
target_link_libraries(mindspore-lite-optimize nnacl_optimize)

add_library(mindspore-lite-fp16 SHARED)
target_link_libraries(mindspore-lite-fp16 cpu_fp16_kernel_mid)
target_link_libraries(mindspore-lite-fp16 nnacl_fp16)
endif ()

if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND (PLATFORM_ARM64))
add_custom_command(TARGET mindspore-lite-optimize POST_BUILD COMMAND
${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-optimize.so)

add_custom_command(TARGET mindspore-lite-fp16 POST_BUILD COMMAND
${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-fp16.so)
endif ()


+ 6
- 0
mindspore/lite/src/kernel_registry.cc View File

@@ -43,6 +43,12 @@ int KernelRegistry::Init() {
} else {
MS_LOG(INFO) << "load optimize lib failed.";
}
void *float16_op_handler = Float16Module::GetInstance()->float16_op_handler_;
if (float16_op_handler != nullptr) {
MS_LOG(INFO) << "load float16 lib success.";
} else {
MS_LOG(INFO) << "load float16 lib failed.";
}
#endif
return RET_OK;
}


+ 14
- 30
mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt View File

@@ -1,39 +1,23 @@
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/)

file(GLOB KERNEL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/*.c
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/fp32/*.c
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/int8/*.c
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/quantization/*.c
${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc
)
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/opt_op_handler.c)
${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc
)
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)

if (SUPPORT_TRAIN)
file (GLOB TRAIN_KERNEL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/fp32_grad/*.c
)
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
endif()

if (PLATFORM_ARM64)
# assembly
file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm64/*.s
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm64/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
endif()
add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})

if (PLATFORM_ARM32)
# assembly
file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm32/*.s
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm32/*.S
)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
endif()
if (PLATFORM_ARM64)
file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc)
add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC})
file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC})
endif ()

add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})

mindspore/lite/nnacl/opt_op_handler.c → mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc View File


+ 2
- 1
mindspore/lite/test/run_benchmark_nets.sh View File

@@ -293,7 +293,8 @@ function Run_arm64() {
fi

cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/liboptimize.so ${benchmark_test_path}/liboptimize.so || exit 1
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite-fp16.so ${benchmark_test_path}/libmindspore-lite-fp16.so || exit 1
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite-optimize.so ${benchmark_test_path}/libmindspore-lite-optimize.so || exit 1
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1

# adb push all needed files to the phone


+ 0
- 1
mindspore/lite/tools/converter/CMakeLists.txt View File

@@ -106,7 +106,6 @@ file(GLOB KERNEL_SRC
${ARM_DIR}/fp32/*.cc
${ARM_DIR}/int8/*.cc
)
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../nnacl/opt_op_handler.c)

if (PLATFORM_ARM64)
# assembly


Loading…
Cancel
Save