| @@ -136,6 +136,8 @@ if(PLATFORM_ARM64) | |||
| COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||
| install(DIRECTORY ${TOP_DIR}/include/api/ DESTINATION ${RUNTIME_INC_DIR}/api | |||
| COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ascend* ops*" EXCLUDE) | |||
| install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME} | |||
| COMPONENT ${CODEGEN_COMPONENT_NAME}) | |||
| if(ENABLE_TOOLS) | |||
| install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||
| endif() | |||
| @@ -157,6 +159,8 @@ elseif(PLATFORM_ARM32) | |||
| COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||
| install(DIRECTORY ${TOP_DIR}/include/api/ DESTINATION ${RUNTIME_INC_DIR}/api | |||
| COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ascend*" EXCLUDE) | |||
| install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME} | |||
| COMPONENT ${CODEGEN_COMPONENT_NAME}) | |||
| if(ENABLE_TOOLS) | |||
| install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) | |||
| endif() | |||
| @@ -231,6 +235,8 @@ else() | |||
| install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 | |||
| DESTINATION ${CONVERTER_PKG_NAME}/third_party/glog/lib RENAME libglog.so.0 | |||
| COMPONENT ${CONVERTER_COMPONENT_NAME}) | |||
| install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/operator_library DESTINATION ${CODEGEN_PKG_NAME} | |||
| COMPONENT ${CODEGEN_COMPONENT_NAME}) | |||
| install(TARGETS codegen RUNTIME DESTINATION ${CODEGEN_PKG_NAME}/ | |||
| COMPONENT ${CODEGEN_COMPONENT_NAME}) | |||
| endif() | |||
| @@ -249,7 +255,7 @@ else() | |||
| endif() | |||
| set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) | |||
| if(PLATFORM_ARM64 OR PLATFORM_ARM32) | |||
| set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME}) | |||
| set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CODEGEN_COMPONENT_NAME}) | |||
| else() | |||
| set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CONVERTER_COMPONENT_NAME} ${CODEGEN_COMPONENT_NAME}) | |||
| endif() | |||
| @@ -34,7 +34,7 @@ | |||
| #define LOG_HDR_FILE_REL_PATH "mindspore/core/utils/log_adapter.h" | |||
| // Get start index of file relative path in __FILE__ | |||
| static constexpr int GetRelPathPos() noexcept { | |||
| static constexpr size_t GetRelPathPos() noexcept { | |||
| return sizeof(__FILE__) > sizeof(LOG_HDR_FILE_REL_PATH) ? sizeof(__FILE__) - sizeof(LOG_HDR_FILE_REL_PATH) : 0; | |||
| } | |||
| @@ -89,8 +89,10 @@ if(SUPPORT_TRAIN) | |||
| else() | |||
| if(PLATFORM_ARM64) | |||
| set(RUNTIME_COMPONENT_NAME inference-android-aarch64) | |||
| set(CODEGEN_COMPONENT_NAME codegen-android-aarch64) | |||
| elseif(PLATFORM_ARM32) | |||
| set(RUNTIME_COMPONENT_NAME inference-android-aarch32) | |||
| set(CODEGEN_COMPONENT_NAME codegen-android-aarch32) | |||
| elseif(WIN32) | |||
| if("${X86_64_SIMD}" STREQUAL "off") | |||
| set(RUNTIME_COMPONENT_NAME inference-win-x64) | |||
| @@ -218,7 +220,6 @@ if(ENABLE_CONVERTER) | |||
| include(${TOP_DIR}/cmake/external_libs/eigen.cmake) | |||
| include(${TOP_DIR}/cmake/external_libs/protobuf.cmake) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder) | |||
| endif() | |||
| if(ENABLE_MINDRT) | |||
| @@ -272,6 +273,7 @@ endif() | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder) | |||
| if(ENABLE_TOOLS) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) | |||
| if(SUPPORT_TRAIN) | |||
| @@ -301,6 +301,30 @@ set(LITE_KERNEL_SRC | |||
| ${LITE_DIR}/nnacl/infer/splice_infer.c | |||
| ) | |||
| #### sse | |||
| if("${X86_64_SIMD}" STREQUAL "sse") | |||
| set(SSE_SRC | |||
| ${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c | |||
| ${LITE_DIR}/nnacl/intrinsics/sse/PackNHWCToNCHWFp32.c | |||
| ${LITE_DIR}/nnacl/intrinsics/sse/MatMul_Sse.c | |||
| ) | |||
| set_property(SOURCE ${SSE_SRC} PROPERTY LANGUAGE C) | |||
| endif() | |||
| #### avx | |||
| if("${X86_64_SIMD}" STREQUAL "avx") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -mavx -mavx2") | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2") | |||
| set(AVX_SRC | |||
| ${LITE_DIR}/nnacl/intrinsics/avx/common_utils.c | |||
| ${LITE_DIR}/nnacl/intrinsics/sse/sse_common.c | |||
| ${LITE_DIR}/nnacl/intrinsics/sse/MatMul_Sse.c | |||
| ${LITE_DIR}/nnacl/intrinsics/sse/PackNHWCToNCHWFp32.c | |||
| ${LITE_DIR}/nnacl/assembly/avx/MatmulAvx.S | |||
| ) | |||
| set_property(SOURCE ${AVX_SRC} PROPERTY LANGUAGE C) | |||
| endif() | |||
| list(APPEND FILE_SET ${CODER_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC} | |||
| ${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE}) | |||
| ${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE} ${SSE_SRC} ${AVX_SRC}) | |||
| @@ -25,10 +25,12 @@ include(${MICRO_DIR}/cmake/file_list.cmake) | |||
| include(${MICRO_DIR}/cmake/package_wrapper.cmake) | |||
| add_subdirectory(operator_library) | |||
| add_executable(codegen main.cc ${FILE_SET}) | |||
| add_dependencies(codegen fbs_src) | |||
| add_dependencies(codegen fbs_inner_src) | |||
| target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY} mindspore::glog) | |||
| if(NOT WIN32 AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release") | |||
| add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH}) | |||
| if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64) | |||
| add_executable(codegen main.cc ${FILE_SET}) | |||
| add_dependencies(codegen fbs_src) | |||
| add_dependencies(codegen fbs_inner_src) | |||
| target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY} mindspore::glog) | |||
| if(NOT WIN32 AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release") | |||
| add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH}) | |||
| endif() | |||
| endif() | |||
| @@ -92,19 +92,17 @@ class MemoryAllocator { | |||
| * including tensor, workspace | |||
| */ | |||
| template <typename T> | |||
| std::string GetRuntimeAddr(T t, bool is_const = false) { | |||
| std::string GetRuntimeAddr(T t, bool immutable = false) { | |||
| if (!t) { | |||
| return ""; | |||
| } | |||
| std::string type_info = is_const ? "const " : ""; | |||
| std::string type_name; | |||
| if (std::type_index(typeid(T)) == std::type_index(typeid(Tensor *))) { | |||
| type_name = GetTensorDataType(reinterpret_cast<Tensor *>(t)->data_type()) + "*"; | |||
| } else { | |||
| type_name = GetVariableTypeName<T>(); | |||
| } | |||
| type_info = wrap(type_info + type_name); | |||
| std::string type_info = wrap(type_name); | |||
| void *variable = reinterpret_cast<void *>(t); | |||
| auto item = inputs_addr_.find(variable); | |||
| if (item != inputs_addr_.end()) { | |||
| @@ -133,6 +131,9 @@ class MemoryAllocator { | |||
| [&variable](const std::pair<Tensor *, std::string> &a) { return variable == a.first; }); | |||
| if (iter != origin_weights_addr_.end()) { | |||
| saved_weights_addr_.insert(std::make_pair(iter->second, reinterpret_cast<Tensor *>(variable))); | |||
| if (immutable) { | |||
| malloc_weights_addr_.insert({reinterpret_cast<Tensor *>(variable), iter->second}); | |||
| } | |||
| return iter->second; | |||
| } | |||
| MS_LOG(ERROR) << "uninitialized memory"; | |||
| @@ -134,7 +134,7 @@ void CodeBenchmarkInference(std::ofstream &ofs, const std::string &module_name) | |||
| << " uint64_t timeAvg = 0;\n" | |||
| << " int loop_count = atoi(argv[3]);\n" | |||
| << " printf(\"======Inference Start======\\n\");\n" | |||
| << " printf(\"cycles: %d\", loop_count);\n" | |||
| << " printf(\"cycles: %d\\n\", loop_count);\n" | |||
| << " for (int i = 0; i < loop_count; i++) {\n" | |||
| << " uint64_t runBegin = GetTimeUs();\n" | |||
| << " " << module_name << "_Inference();\n" | |||
| @@ -48,7 +48,7 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::string &module_name, con | |||
| } | |||
| ofs << "file(GLOB NET_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.c)\n" | |||
| << "add_library(${PROJ_NAME} STATIC ${NET_SRC})\n"; | |||
| << "add_library(net STATIC ${NET_SRC})\n"; | |||
| } | |||
| } // namespace mindspore::lite::micro | |||
| @@ -19,9 +19,8 @@ | |||
| const char *bench_cmake_lists_txt = | |||
| "cmake_minimum_required(VERSION 3.14)\n" | |||
| "project(${PROJ_NAME})\n" | |||
| "project(benchmark)\n" | |||
| "\n" | |||
| "message(\"project name: ${PROJ_NAME}\")\n" | |||
| "message(\"project name: ${MODEL_LIB_PATH}\")\n" | |||
| "message(\"architecture cmake file path: ${ARCH_CMAKE_PATH}\")\n" | |||
| "\n" | |||
| @@ -54,14 +53,13 @@ const char *bench_cmake_lists_txt = | |||
| "endif ()\n" | |||
| "link_directories(${MODEL_LIB_PATH})\n" | |||
| "include(benchmark.cmake)\n" | |||
| "add_executable(${PROJ_NAME}_bench ${SRC_FILES})\n" | |||
| "target_link_libraries(${PROJ_NAME}_bench ${MODEL_LIB_NAME} -lm -pthread)\n"; | |||
| "add_executable(benchmark ${SRC_FILES})\n" | |||
| "target_link_libraries(benchmark ${MODEL_LIB_NAME} -lm -pthread)\n"; | |||
| const char *src_cmake_lists_txt = | |||
| "cmake_minimum_required(VERSION 3.14)\n" | |||
| "project(${PROJ_NAME})\n" | |||
| "project(net)\n" | |||
| "\n" | |||
| "message(\"project name: ${PROJ_NAME}\")\n" | |||
| "message(\"architecture cmake file path: ${ARCH_CMAKE_PATH}\")\n" | |||
| "message(\"operator lib path: ${OP_LIB}\")\n" | |||
| "message(\"operator header path: ${OP_HEADER_PATH}\")\n" | |||
| @@ -83,10 +81,11 @@ const char *src_cmake_lists_txt = | |||
| "else()\n" | |||
| " set(CMAKE_C_FLAGS \"-fPIC -fPIE -O3 -Werror -fstack-protector-strong -fomit-frame-pointer ${CMAKE_C_FLAGS}\")\n" | |||
| " set(CMAKE_C_FLAGS_Release \"${CMAKE_C_FLAGS_Release} -O3 -ffunction-sections -Werror -fdata-sections\")\n" | |||
| " string(REPLACE \"-g\" \"\" CMAKE_C_FLAGS \"${CMAKE_C_FLAGS}\")\n" | |||
| "endif()\n" | |||
| "\n" | |||
| "function(create_library)\n" | |||
| " add_custom_command(TARGET ${PROJ_NAME}\n" | |||
| " add_custom_command(TARGET net\n" | |||
| " POST_BUILD\n" | |||
| " COMMAND rm -rf tmp\n" | |||
| " COMMAND mkdir tmp\n" | |||
| @@ -97,9 +96,9 @@ const char *src_cmake_lists_txt = | |||
| " COMMENT \"unzip raw static library ${library_name}\"\n" | |||
| " )\n" | |||
| " foreach (object_file ${OP_SRC})\n" | |||
| " add_custom_command(TARGET ${PROJ_NAME} POST_BUILD COMMAND mv ./tmp/${object_file} .)\n" | |||
| " add_custom_command(TARGET net POST_BUILD COMMAND mv ./tmp/${object_file} .)\n" | |||
| " endforeach ()\n" | |||
| " add_custom_command(TARGET ${PROJ_NAME}\n" | |||
| " add_custom_command(TARGET net\n" | |||
| " POST_BUILD\n" | |||
| " COMMAND ar cr ${library_name} *.o\n" | |||
| " COMMAND ranlib ${library_name}\n" | |||
| @@ -109,7 +108,7 @@ const char *src_cmake_lists_txt = | |||
| " COMMENT \"generate specified static library ${library_name}\"\n" | |||
| " )\n" | |||
| "endfunction(create_library)\n" | |||
| "string(CONCAT library_name \"lib\" ${PROJ_NAME} \".a\")\n" | |||
| "string(CONCAT library_name \"lib\" net \".a\")\n" | |||
| "create_library()\n"; | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_ | |||
| @@ -36,7 +36,7 @@ void CodeCreateThreadPool(std::ofstream &ofs, const std::string &module_name) { | |||
| " MICRO_ERROR(\"set global thread pool failed\");\n" | |||
| " return RET_ERROR;\n" | |||
| " }\n" | |||
| " MICRO_INFO(\"config: ThreadNum: %d, BindMode: %d\", thread_num, bind_mode);\n"; | |||
| " printf(\"config: ThreadNum: %d, BindMode: %d\\n\", thread_num, bind_mode);\n"; | |||
| } | |||
| void CodeDestroyThreadPool(std::ofstream &ofs) { ofs << " DestroyThreadPool(thread_pool);\n"; } | |||
| @@ -17,9 +17,9 @@ | |||
| #include "coder/generator/component/weight_component.h" | |||
| #include <memory> | |||
| #include <utility> | |||
| #include <algorithm> | |||
| #include "coder/generator/component/const_blocks/license.h" | |||
| #include "coder/utils/coder_utils.h" | |||
| #include "coder/opcoders/parallel.h" | |||
| namespace mindspore::lite::micro { | |||
| void CodeWeightFileHeader(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx) { | |||
| @@ -89,7 +89,7 @@ void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, cons | |||
| << " if (weight_buffer == NULL) {\n" | |||
| << " return RET_ERROR;\n" | |||
| << " }\n"; | |||
| ofs << " int " << gThreadNum << " = 1;\n\n"; | |||
| ofs << " struct ModelParameter {\n" | |||
| << " void *addr;\n" | |||
| << " size_t size;\n" | |||
| @@ -82,9 +82,9 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() { | |||
| MS_CHECK_PTR(params_->decoded_boxes_); | |||
| params_->nms_candidate_ = allocator_->Malloc(kNumberTypeUInt8, num_boxes_ * sizeof(uint8_t), kWorkspace); | |||
| MS_CHECK_PTR(params_->nms_candidate_); | |||
| params_->selected_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace); | |||
| params_->selected_ = allocator_->Malloc(kNumberTypeInt32, num_boxes_ * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->selected_); | |||
| params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace); | |||
| params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt32, num_boxes_ * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->single_class_indexes_); | |||
| if (params_->use_regular_nms_) { | |||
| @@ -92,13 +92,13 @@ int DetectionPostProcessBaseCoder::AllocateBuffer() { | |||
| allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace); | |||
| MS_CHECK_PTR(params_->scores_); | |||
| params_->indexes_ = | |||
| allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); | |||
| allocator_->Malloc(kNumberTypeInt32, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->indexes_); | |||
| params_->all_class_scores_ = | |||
| allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace); | |||
| MS_CHECK_PTR(params_->all_class_scores_); | |||
| params_->all_class_indexes_ = | |||
| allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); | |||
| allocator_->Malloc(kNumberTypeInt32, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->all_class_indexes_); | |||
| } else { | |||
| params_->scores_ = allocator_->Malloc(kNumberTypeFloat, num_boxes_ * sizeof(float), kWorkspace); | |||
| @@ -36,7 +36,7 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) { | |||
| return RET_ERROR; | |||
| } | |||
| size_t data_size = input_tensor_->ElementsNum(); | |||
| std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex)); | |||
| std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex), true); | |||
| Collect(ctx, | |||
| {"nnacl/arithmetic.h", "nnacl/nnacl_utils.h", "nnacl/nnacl_common.h", "nnacl/base/arithmetic_base.h", | |||
| "nnacl/fp32/add_fp32.h", "nnacl/fp32/arithmetic_fp32.h"}, | |||
| @@ -183,13 +183,15 @@ int Conv2DINT8Coder::Resize() { | |||
| int Conv2DINT8Coder::DoCode(CoderContext *const context) { | |||
| std::vector<std::string> asm_files; | |||
| if (target_ == kARM32A) { | |||
| asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8Neon32.S"}; | |||
| asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S"}; | |||
| } else if (target_ == kARM64) { | |||
| asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8Neon64.S"}; | |||
| asm_files = {"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "MatmulInt8.S", "MatmulDpInt8.S"}; | |||
| } | |||
| Collect(context, {"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h"}, | |||
| Collect(context, | |||
| {"nnacl/int8/conv_int8.h", "nnacl/common_func.h", "wrapper/int8/convolution_int8_wrapper.h", | |||
| "wrapper/base/common_wrapper.h", "wrapper/base/optimize_handler_wrapper.h"}, | |||
| {"common_func.c", "pack_int8.c", "conv_int8.c", "winograd_transform.c", "matmul_int8.c", "fixed_point.c", | |||
| "convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "thread_pool.c"}, | |||
| "convolution_int8_wrapper.c", "conv_init_int8_wrapper.c", "common_wrapper.c", "optimize_handler_wrapper.c"}, | |||
| asm_files); | |||
| // call the op function | |||
| nnacl::NNaclInt8Serializer code; | |||
| @@ -202,7 +204,6 @@ int Conv2DINT8Coder::DoCode(CoderContext *const context) { | |||
| code.CodeBaseStruct("ConvolutionInt8Args", kRunArgs, input_tensor_, packed_input_, matmul_packed_input_, | |||
| packed_weight_, bias_data_, output_tensor_, filter_zp_ptr_, input_sum_, | |||
| "(ConvParameter *)&conv_param", matmul_func_, support_optimize_); | |||
| code.CodeFunction("CheckSupportOptimize", kRunArgsAddr); | |||
| if (support_parallel_) { | |||
| code.CodeFunction(kParallelLaunch, gThreadPool, "ConvolutionInt8Run", kRunArgsAddr, gThreadNum); | |||
| } else { | |||
| @@ -44,10 +44,8 @@ class Conv2DINT8Coder final : public Conv2DBaseCoder { | |||
| } | |||
| private: | |||
| int InitWeightBias(CoderContext *ctx); | |||
| void CheckSupportOptimize(); | |||
| int InitWeightBias(CoderContext *ctx); | |||
| int InitTmpBuffer(CoderContext *ctx); | |||
| int Resize(); | |||
| @@ -70,7 +68,7 @@ class Conv2DINT8Coder final : public Conv2DBaseCoder { | |||
| int32_t *input_sum_{nullptr}; | |||
| int8_t *matmul_packed_input_{nullptr}; | |||
| std::string matmul_func_; | |||
| std::string matmul_func_{"NULL"}; | |||
| std::function<int(nnacl::NNaclInt8Serializer &, const std::string &, const std::string &)> pack_weight_init_{nullptr}; | |||
| }; | |||
| @@ -168,9 +168,13 @@ class Serializer { | |||
| * "int pointer_gen[4] = {1 ,3, 2, 42};\n | |||
| * const Foo foo_gen = {{1, 2, 3}, pointer_gen, 4};\n" | |||
| */ | |||
| template <typename... PARAMETERS> | |||
| template <bool immutable = true, typename... PARAMETERS> | |||
| void CodeBaseStruct(const std::string &type, const std::string &name, PARAMETERS... parameters) { | |||
| code << "const " << type << " " << name << " = {"; | |||
| if constexpr (immutable) { | |||
| code << "const " << type << " " << name << " = {"; | |||
| } else { | |||
| code << type << " " << name << " = {"; | |||
| } | |||
| GenCode(parameters...); | |||
| code << "};\n"; | |||
| } | |||
| @@ -22,7 +22,6 @@ endif() | |||
| set(MICRO_CMAKE_PATH ${MICRO_DIR}/cmake) | |||
| set(OPERATOR_LIBRARY_PATH ${CMAKE_BINARY_DIR}/operator_library) | |||
| set(HEADER_PATH "${OPERATOR_LIBRARY_PATH}/include") | |||
| set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/x86") | |||
| message("===========>start to pack operators' head file") | |||
| file(REMOVE_RECURSE ${OPERATOR_LIBRARY_PATH}) | |||
| @@ -36,14 +35,31 @@ file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/assembly) | |||
| file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp16) | |||
| file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp16_grad) | |||
| file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/fp32_grad) | |||
| file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/intrinsics) | |||
| file(REMOVE_RECURSE ${HEADER_PATH}/nnacl/optimize) | |||
| if(PLATFORM_ARM64) | |||
| set(MICRO_BUILD_ARM64 ON) | |||
| endif() | |||
| if(PLATFORM_ARM32) | |||
| set(MICRO_BUILD_ARM32A ON) | |||
| endif() | |||
| include(${MICRO_CMAKE_PATH}/package_android.cmake) | |||
| include(${MICRO_CMAKE_PATH}/package_nnacl.cmake) | |||
| include(${MICRO_CMAKE_PATH}/package_cmsis.cmake) | |||
| include(${MICRO_CMAKE_PATH}/package_wrapper.cmake) | |||
| list(APPEND OP_FILES ${NNACL_OPS} ${WRAPPER_SRC} ${RUNTIME_SRC}) | |||
| if(PLATFORM_ARM64) | |||
| set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/arm64") | |||
| elseif(PLATFORM_ARM32) | |||
| set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/arm32a") | |||
| else() | |||
| set(LIB_PATH "${OPERATOR_LIBRARY_PATH}/lib/x86") | |||
| list(APPEND OP_FILES ${CMSIS_OPS}) | |||
| endif() | |||
| # generate static library | |||
| add_library(ops STATIC ${NNACL_OPS} ${CMSIS_OPS} ${WRAPPER_SRC} ${RUNTIME_SRC}) | |||
| add_library(ops STATIC ${OP_FILES}) | |||
| install(TARGETS ops ARCHIVE DESTINATION ${LIB_PATH}) | |||
| @@ -0,0 +1,36 @@ | |||
| /* | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "wrapper/base/common_wrapper.h" | |||
| #ifdef __ANDROID__ | |||
| #include <sys/auxv.h> | |||
| #include <asm/hwcap.h> | |||
| #endif | |||
| bool GetSupportOptFlag() { | |||
| bool status = false; | |||
| #ifdef ENABLE_ARM64 | |||
| int hwcap_type = 16; | |||
| // getHwCap | |||
| uint32_t hwcap = getauxval(hwcap_type); | |||
| if (hwcap & HWCAP_ASIMDDP) { | |||
| status = true; | |||
| } else { | |||
| status = false; | |||
| } | |||
| #endif | |||
| return status; | |||
| } | |||
| @@ -0,0 +1,24 @@ | |||
| /* | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_ | |||
| #include "nnacl/op_base.h" | |||
| bool GetSupportOptFlag(); | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_COMMON_WRAPPER_H_ | |||
| @@ -0,0 +1,49 @@ | |||
| /* | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "wrapper/base/optimize_handler_wrapper.h" | |||
| extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, | |||
| const int *input_sum, const int *bias); | |||
| extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, | |||
| const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, | |||
| int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, | |||
| size_t peroc); | |||
| extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4, | |||
| const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier, | |||
| int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp); | |||
| #ifdef ENABLE_ARM64 | |||
| void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, | |||
| const int *input_sum, const int *bias) { | |||
| return MatMulOptR4Int8Neon64(a, b, dst, row4, col4, deep16, input_sum, bias); | |||
| } | |||
| void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, | |||
| size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, | |||
| int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, | |||
| int32_t maxi, size_t per_channel) { | |||
| return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi, | |||
| output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel); | |||
| } | |||
| void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, | |||
| size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, | |||
| int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, | |||
| int32_t maxi, size_t per_channel, int32_t *filter_zp) { | |||
| return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift, | |||
| right_shift, stride, per_channel, filter_zp); | |||
| } | |||
| #endif | |||
| @@ -0,0 +1,41 @@ | |||
| /* | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_ | |||
| #include "nnacl/op_base.h" | |||
| #ifdef ENABLE_ARM64 | |||
| void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias, | |||
| size_t ksize, size_t ic4, size_t output_channel, size_t offset, | |||
| const int32_t *input_sum, size_t act_min, size_t act_max, size_t out_zp, | |||
| int32_t *out_multiplier, int32_t *shift_before, int32_t *shift_after, | |||
| size_t asymmetric, size_t per_channel, size_t per_channel_offset); | |||
| void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, | |||
| const int *input_sum, const int *bias); | |||
| void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, | |||
| size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, | |||
| int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, | |||
| int32_t maxi, size_t per_channel); | |||
| void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, | |||
| size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, | |||
| int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, | |||
| int32_t maxi, size_t per_channel, int32_t *filter_zp); | |||
| #endif | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_ | |||
| @@ -22,21 +22,12 @@ void InitMatrixA(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa | |||
| } | |||
| for (int i = 0; i < params_->batch; i++) { | |||
| const float *src = src_ptr + i * params_->deep_ * params_->row_; | |||
| #ifdef ENABLE_ARM32 | |||
| float *dst = dst_ptr + i * params_->deep_ * params_->row_4_; | |||
| if (params_->a_transpose_) { | |||
| RowMajor2Row4Major(src, dst, params_->deep_, params_->row_); | |||
| } else { | |||
| RowMajor2Col4Major(src, dst, params_->row_, params_->deep_); | |||
| } | |||
| #else | |||
| float *dst = dst_ptr + i * params_->deep_ * params_->row_12_; | |||
| float *dst = dst_ptr + i * params_->deep_ * params_->row_align_; | |||
| if (params_->a_transpose_) { | |||
| RowMajor2Row12Major(src, dst, params_->deep_, params_->row_); | |||
| } else { | |||
| RowMajor2Col12Major(src, dst, params_->row_, params_->deep_); | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| @@ -55,11 +46,19 @@ void InitMatrixB(const float *src_ptr, float *dst_ptr, const MatMulParameter *pa | |||
| } | |||
| for (int i = 0; i < params_->batch; i++) { | |||
| const float *src = src_ptr + i * params_->deep_ * params_->col_; | |||
| float *dst = dst_ptr + i * params_->deep_ * params_->col_8_; | |||
| float *dst = dst_ptr + i * params_->deep_ * params_->col_align_; | |||
| #ifdef ENABLE_ARM32 | |||
| if (params_->b_transpose_) { | |||
| RowMajor2Col4Major(src, dst, params_->col_, params_->deep_); | |||
| } else { | |||
| RowMajor2Row4Major(src, dst, params_->deep_, params_->col_); | |||
| } | |||
| #else | |||
| if (params_->b_transpose_) { | |||
| RowMajor2Col8Major(src, dst, params_->col_, params_->deep_); | |||
| } else { | |||
| RowMajor2Row8Major(src, dst, params_->deep_, params_->col_); | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| @@ -16,24 +16,6 @@ | |||
| #include "wrapper/int8/convolution_int8_wrapper.h" | |||
| void CheckSupportOptimize(const ConvolutionInt8Args *args) { | |||
| int tile_num = 8; | |||
| #ifdef ENABLE_ARM32 | |||
| tile_num = 4; | |||
| args->is_optimize_ = false; | |||
| #endif | |||
| #ifdef ENABLE_ARM64 | |||
| if (mindspore::lite::IsSupportSDot()) { | |||
| matmul_func_ = MatMulRInt8_optimize_handler; | |||
| args->is_optimize_ = true; | |||
| } else { | |||
| tile_num = 4; | |||
| args->is_optimize_ = false; | |||
| } | |||
| #endif | |||
| args->conv_param_->tile_num_ = tile_num; | |||
| } | |||
| int ConvolutionInt8Run(void *cdata, int task_id) { | |||
| ConvolutionInt8Args *args = (ConvolutionInt8Args *)cdata; | |||
| ConvInt8(args->input_data_, args->packed_input_, args->matmul_input_, args->packed_weight_, args->bias_data_, | |||
| @@ -36,8 +36,6 @@ typedef struct { | |||
| bool is_optimize_; | |||
| } ConvolutionInt8Args; | |||
| void CheckSupportOptimize(const ConvolutionInt8Args *args); | |||
| int ConvolutionInt8Run(void *cdata, int task_id); | |||
| #endif // MINDSPORE_LITE_MICRO_INT8_CONVOLUTION_WRAPPER_INT8_WRAPPER_H_ | |||