| @@ -551,7 +551,7 @@ build_lite() | |||||
| cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \ | cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \ | ||||
| -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \ | -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \ | ||||
| -DANDROID_STL=${ANDROID_STL} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \ | -DANDROID_STL=${ANDROID_STL} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \ | ||||
| -DPLATFORM_ARM64=on -DENABLE_NEON=on -DENABLE_FP16="off" \ | |||||
| -DPLATFORM_ARM64=on -DENABLE_NEON=on -DENABLE_FP16="on" \ | |||||
| -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \ | -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \ | ||||
| -DSUPPORT_GPU=${LITE_ENABLE_GPU} -DSUPPORT_NPU=${LITE_ENABLE_NPU} -DENABLE_V0=on \ | -DSUPPORT_GPU=${LITE_ENABLE_GPU} -DSUPPORT_NPU=${LITE_ENABLE_NPU} -DENABLE_V0=on \ | ||||
| -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \ | -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \ | ||||
| @@ -23,4 +23,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16") | |||||
| add_library(nnacl_optimize_mid OBJECT ${SDOT_FILES}) | add_library(nnacl_optimize_mid OBJECT ${SDOT_FILES}) | ||||
| add_library(nnacl_fp16_mid OBJECT ${FP16_FILES}) | |||||
| if (ENABLE_FP16) | |||||
| add_library(nnacl_fp16_mid OBJECT ${FP16_FILES}) | |||||
| endif () | |||||
| @@ -128,8 +128,9 @@ endif () | |||||
| if (PLATFORM_ARM64) | if (PLATFORM_ARM64) | ||||
| target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid) | target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid) | ||||
| target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid) | target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid) | ||||
| target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid) | |||||
| target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid) | |||||
| if (ENABLE_FP16) | |||||
| target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid) | |||||
| target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid) | |||||
| endif () | |||||
| endif () | endif () | ||||
| @@ -9,17 +9,19 @@ file(GLOB KERNEL_SRC | |||||
| list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) | list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) | ||||
| if (SUPPORT_TRAIN) | if (SUPPORT_TRAIN) | ||||
| file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc) | |||||
| set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC}) | |||||
| file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc) | |||||
| set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC}) | |||||
| endif() | endif() | ||||
| add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC}) | add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC}) | ||||
| add_dependencies(cpu_kernel_mid fbs_src) | add_dependencies(cpu_kernel_mid fbs_src) | ||||
| if (PLATFORM_ARM64) | if (PLATFORM_ARM64) | ||||
| file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc) | |||||
| add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC}) | |||||
| file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) | |||||
| add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC}) | |||||
| if (ENABLE_FP16) | |||||
| file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc) | |||||
| add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC}) | |||||
| endif () | |||||
| file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) | |||||
| add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC}) | |||||
| endif () | endif () | ||||
| @@ -474,9 +474,14 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel | |||||
| #endif | #endif | ||||
| } | } | ||||
| if (type == kernel::kCpuFP16SubGraph) { | if (type == kernel::kCpuFP16SubGraph) { | ||||
| #ifdef ENABLE_FP16 | |||||
| auto sub_kernel = new (std::nothrow) | auto sub_kernel = new (std::nothrow) | ||||
| kernel::CpuFp16SubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); | kernel::CpuFp16SubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); | ||||
| return sub_kernel; | return sub_kernel; | ||||
| #else | |||||
| MS_LOG(ERROR) << "FP16 subgraph is not supported!"; | |||||
| return nullptr; | |||||
| #endif | |||||
| } | } | ||||
| if (type == kernel::kCpuFP32SubGraph) { | if (type == kernel::kCpuFP32SubGraph) { | ||||
| auto sub_kernel = new (std::nothrow) | auto sub_kernel = new (std::nothrow) | ||||
| @@ -16,8 +16,7 @@ | |||||
| #include "src/sub_graph_kernel.h" | #include "src/sub_graph_kernel.h" | ||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #ifdef ENABLE_ARM64 | |||||
| #include "src/common/utils.h" | |||||
| #if defined(ENABLE_ARM64) && defined(ENABLE_FP16) | |||||
| #include "src/runtime/kernel/arm/fp16/fp16_op_handler.h" | #include "src/runtime/kernel/arm/fp16/fp16_op_handler.h" | ||||
| #endif | #endif | ||||
| @@ -175,6 +174,7 @@ int CpuSubGraph::Prepare() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| #ifdef ENABLE_FP16 | |||||
| void CpuFp16SubGraph::FreeOriginInputData() { | void CpuFp16SubGraph::FreeOriginInputData() { | ||||
| for (auto *data_store : this->origin_input_data_) { | for (auto *data_store : this->origin_input_data_) { | ||||
| if (data_store == nullptr) { | if (data_store == nullptr) { | ||||
| @@ -300,4 +300,5 @@ int CpuFp16SubGraph::PostProcess() { | |||||
| return RET_OK; | return RET_OK; | ||||
| #endif | #endif | ||||
| } | } | ||||
| #endif | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -157,6 +157,7 @@ class CpuFp32SubGraph : public CpuSubGraph { | |||||
| int PostProcess() override { return CpuSubGraph::PostProcess(); } | int PostProcess() override { return CpuSubGraph::PostProcess(); } | ||||
| }; | }; | ||||
| #ifdef ENABLE_FP16 | |||||
| class CpuFp16SubGraph : public CpuSubGraph { | class CpuFp16SubGraph : public CpuSubGraph { | ||||
| public: | public: | ||||
| CpuFp16SubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | CpuFp16SubGraph(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| @@ -182,5 +183,6 @@ class CpuFp16SubGraph : public CpuSubGraph { | |||||
| private: | private: | ||||
| std::vector<DataStore *> origin_input_data_{}; | std::vector<DataStore *> origin_input_data_{}; | ||||
| }; | }; | ||||
| #endif | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_SUB_GRAPH_H | #endif // MINDSPORE_LITE_SRC_SUB_GRAPH_H | ||||
| @@ -288,15 +288,19 @@ if (SUPPORT_GPU) | |||||
| endif() | endif() | ||||
| if (ENABLE_FP16) | if (ENABLE_FP16) | ||||
| file(GLOB_RECURSE TEST_CASE_KERNEL_FP16_SRC | |||||
| ${TEST_DIR}/ut/src/runtime/kernel/arm/fp16/*.cc | |||||
| ) | |||||
| set(TEST_SRC | set(TEST_SRC | ||||
| ${TEST_SRC} | ${TEST_SRC} | ||||
| ${TEST_DIR}/ut/src/runtime/kernel/arm/fp16/convolution_fp16_tests.cc) | |||||
| ${TEST_CASE_KERNEL_FP16_SRC} | |||||
| ) | |||||
| endif () | endif () | ||||
| add_executable(lite-test ${TEST_SRC}) | add_executable(lite-test ${TEST_SRC}) | ||||
| add_dependencies(lite-test fbs_src) | add_dependencies(lite-test fbs_src) | ||||
| target_link_libraries(lite-test dl mindspore::gtest) | target_link_libraries(lite-test dl mindspore::gtest) | ||||
| if (PLATFORM_ARM64) | |||||
| if (PLATFORM_ARM64 AND ENABLE_FP16) | |||||
| target_link_libraries(lite-test nnacl_fp16_mid nnacl_optimize_mid) | target_link_libraries(lite-test nnacl_fp16_mid nnacl_optimize_mid) | ||||
| endif() | endif() | ||||
| @@ -20,6 +20,9 @@ | |||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "mindspore/lite/src/common/file_utils.h" | #include "mindspore/lite/src/common/file_utils.h" | ||||
| #include "mindspore/lite/nnacl/pack.h" | #include "mindspore/lite/nnacl/pack.h" | ||||
| #ifdef ENABLE_FP16 | |||||
| #include "mindspore/lite/nnacl/fp16/pack_fp16.h" | |||||
| #endif | |||||
| namespace mindspore { | namespace mindspore { | ||||
| class TestPack : public mindspore::CommonTest { | class TestPack : public mindspore::CommonTest { | ||||
| @@ -71,7 +71,7 @@ void TestReduceFp16::Prepare(const std::vector<int> &input_shape, const std::vec | |||||
| desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16, schema::PrimitiveType_Reduce}; | desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16, schema::PrimitiveType_Reduce}; | ||||
| ctx_ = lite::InnerContext(); | ctx_ = lite::InnerContext(); | ||||
| ctx_.thread_num_ = thread_num; | ctx_.thread_num_ = thread_num; | ||||
| ASSERT_EQ(lite::RET_OK, context->Init()); | |||||
| ASSERT_EQ(lite::RET_OK, ctx_.Init()); | |||||
| creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc); | creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc); | ||||
| ASSERT_NE(creator_, nullptr); | ASSERT_NE(creator_, nullptr); | ||||
| kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc, nullptr); | kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc, nullptr); | ||||