| @@ -29,6 +29,7 @@ jobs: | |||||
| uses: actions/checkout@v2 | uses: actions/checkout@v2 | ||||
| - name: Checkout submodules | - name: Checkout submodules | ||||
| run: | | run: | | ||||
| apt update&&apt install ninja-build | |||||
| ./third_party/prepare.sh | ./third_party/prepare.sh | ||||
| ./third_party/install-mkl.sh | ./third_party/install-mkl.sh | ||||
| - name: Build MegEngine | - name: Build MegEngine | ||||
| @@ -57,6 +58,7 @@ jobs: | |||||
| uses: actions/checkout@v2 | uses: actions/checkout@v2 | ||||
| - name: Checkout submodules | - name: Checkout submodules | ||||
| run: | | run: | | ||||
| apt update&&apt install ninja-build | |||||
| ./third_party/prepare.sh | ./third_party/prepare.sh | ||||
| ./third_party/install-mkl.sh | ./third_party/install-mkl.sh | ||||
| - name: Build MegEngine | - name: Build MegEngine | ||||
| @@ -27,7 +27,8 @@ function build() { | |||||
| -DMGE_WITH_DISTRIBUTED=${DMGE_WITH_DISTRIBUTED} \ | -DMGE_WITH_DISTRIBUTED=${DMGE_WITH_DISTRIBUTED} \ | ||||
| -DMGE_WITH_CUDA=${DMGE_WITH_CUDA} \ | -DMGE_WITH_CUDA=${DMGE_WITH_CUDA} \ | ||||
| -DMGE_WITH_TEST=ON \ | -DMGE_WITH_TEST=ON \ | ||||
| -DCMAKE_BUILD_TYPE=RelWithDebInfo | |||||
| -DCMAKE_BUILD_TYPE=RelWithDebInfo \ | |||||
| -DMGE_WITH_CUSTOM_OP=ON | |||||
| make -j$(($(nproc) * 2)) -I ${build_dir} | make -j$(($(nproc) * 2)) -I ${build_dir} | ||||
| make develop | make develop | ||||
| popd >/dev/null | popd >/dev/null | ||||
| @@ -1,59 +1,56 @@ | |||||
| # Copyright 2015 Google Inc. All rights reserved. | # Copyright 2015 Google Inc. All rights reserved. | ||||
| # | # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this | |||||
| # file except in compliance with the License. You may obtain a copy of the License at | |||||
| # | # | ||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | # | ||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # Unless required by applicable law or agreed to in writing, software distributed under | |||||
| # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF | |||||
| # ANY KIND, either express or implied. See the License for the specific language | |||||
| # governing permissions and limitations under the License. | |||||
| # General function to create FlatBuffer build rules for the given list of | |||||
| # schemas. | |||||
| # General function to create FlatBuffer build rules for the given list of schemas. | |||||
| # | # | ||||
| # flatbuffers_schemas: A list of flatbuffer schema files to process. | # flatbuffers_schemas: A list of flatbuffer schema files to process. | ||||
| # | # | ||||
| # schema_include_dirs: A list of schema file include directories, which will be | |||||
| # passed to flatc via the -I parameter. | |||||
| # schema_include_dirs: A list of schema file include directories, which will be passed | |||||
| # to flatc via the -I parameter. | |||||
| # | # | ||||
| # custom_target_name: The generated files will be added as dependencies for a | |||||
| # new custom target with this name. You should add that target as a dependency | |||||
| # for your main target to ensure these files are built. You can also retrieve | |||||
| # various properties from this target, such as GENERATED_INCLUDES_DIR, | |||||
| # BINARY_SCHEMAS_DIR, and COPY_TEXT_SCHEMAS_DIR. | |||||
| # custom_target_name: The generated files will be added as dependencies for a new custom | |||||
| # target with this name. You should add that target as a dependency for your main target | |||||
| # to ensure these files are built. You can also retrieve various properties from this | |||||
| # target, such as GENERATED_INCLUDES_DIR, BINARY_SCHEMAS_DIR, and COPY_TEXT_SCHEMAS_DIR. | |||||
| # | # | ||||
| # additional_dependencies: A list of additional dependencies that you'd like | |||||
| # all generated files to depend on. Pass in a blank string if you have none. | |||||
| # additional_dependencies: A list of additional dependencies that you'd like all | |||||
| # generated files to depend on. Pass in a blank string if you have none. | |||||
| # | # | ||||
| # generated_includes_dir: Where to generate the C++ header files for these | |||||
| # schemas. The generated includes directory will automatically be added to | |||||
| # CMake's include directories, and will be where generated header files are | |||||
| # placed. This parameter is optional; pass in empty string if you don't want to | |||||
| # generate include files for these schemas. | |||||
| # generated_includes_dir: Where to generate the C++ header files for these schemas. The | |||||
| # generated includes directory will automatically be added to CMake's include | |||||
| # directories, and will be where generated header files are placed. This parameter is | |||||
| # optional; pass in empty string if you don't want to generate include files for these | |||||
| # schemas. | |||||
| # | # | ||||
| # binary_schemas_dir: If you specify an optional binary schema directory, binary | |||||
| # schemas will be generated for these schemas as well, and placed into the given | |||||
| # directory. | |||||
| # binary_schemas_dir: If you specify an optional binary schema directory, binary schemas | |||||
| # will be generated for these schemas as well, and placed into the given directory. | |||||
| # | # | ||||
| # copy_text_schemas_dir: If you want all text schemas (including schemas from | |||||
| # all schema include directories) copied into a directory (for example, if you | |||||
| # need them within your project to build JSON files), you can specify that | |||||
| # folder here. All text schemas will be copied to that folder. | |||||
| # copy_text_schemas_dir: If you want all text schemas (including schemas from all schema | |||||
| # include directories) copied into a directory (for example, if you need them within | |||||
| # your project to build JSON files), you can specify that folder here. All text schemas | |||||
| # will be copied to that folder. | |||||
| # | # | ||||
| # IMPORTANT: Make sure you quote all list arguments you pass to this function! | |||||
| # Otherwise CMake will only pass in the first element. | |||||
| # Example: build_flatbuffers("${fb_files}" "${include_dirs}" target_name ...) | |||||
| function(build_flatbuffers flatbuffers_schemas | |||||
| schema_include_dirs | |||||
| custom_target_name | |||||
| additional_dependencies | |||||
| generated_includes_dir | |||||
| binary_schemas_dir | |||||
| copy_text_schemas_dir) | |||||
| # IMPORTANT: Make sure you quote all list arguments you pass to this function! Otherwise | |||||
| # CMake will only pass in the first element. Example: build_flatbuffers("${fb_files}" | |||||
| # "${include_dirs}" target_name ...) | |||||
| function( | |||||
| build_flatbuffers | |||||
| flatbuffers_schemas | |||||
| schema_include_dirs | |||||
| custom_target_name | |||||
| additional_dependencies | |||||
| generated_includes_dir | |||||
| binary_schemas_dir | |||||
| copy_text_schemas_dir) | |||||
| # Test if including from FindFlatBuffers | # Test if including from FindFlatBuffers | ||||
| if(FLATBUFFERS_FLATC_EXECUTABLE) | if(FLATBUFFERS_FLATC_EXECUTABLE) | ||||
| @@ -65,10 +62,7 @@ function(build_flatbuffers flatbuffers_schemas | |||||
| endif() | endif() | ||||
| set(FLATC_SCHEMA_ARGS --gen-mutable) | set(FLATC_SCHEMA_ARGS --gen-mutable) | ||||
| if(FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS) | if(FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS) | ||||
| set(FLATC_SCHEMA_ARGS | |||||
| ${FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS} | |||||
| ${FLATC_SCHEMA_ARGS} | |||||
| ) | |||||
| set(FLATC_SCHEMA_ARGS ${FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS} ${FLATC_SCHEMA_ARGS}) | |||||
| endif() | endif() | ||||
| set(working_dir "${CMAKE_CURRENT_SOURCE_DIR}") | set(working_dir "${CMAKE_CURRENT_SOURCE_DIR}") | ||||
| @@ -77,12 +71,12 @@ function(build_flatbuffers flatbuffers_schemas | |||||
| # Generate the include files parameters. | # Generate the include files parameters. | ||||
| set(include_params "") | set(include_params "") | ||||
| set(all_generated_files "") | set(all_generated_files "") | ||||
| foreach (include_dir ${schema_include_dirs}) | |||||
| foreach(include_dir ${schema_include_dirs}) | |||||
| set(include_params -I ${include_dir} ${include_params}) | set(include_params -I ${include_dir} ${include_params}) | ||||
| if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
| if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
| # Copy text schemas from dependent folders. | # Copy text schemas from dependent folders. | ||||
| file(GLOB_RECURSE dependent_schemas ${include_dir}/${schema_glob}) | file(GLOB_RECURSE dependent_schemas ${include_dir}/${schema_glob}) | ||||
| foreach (dependent_schema ${dependent_schemas}) | |||||
| foreach(dependent_schema ${dependent_schemas}) | |||||
| file(COPY ${dependent_schema} DESTINATION ${copy_text_schemas_dir}) | file(COPY ${dependent_schema} DESTINATION ${copy_text_schemas_dir}) | ||||
| endforeach() | endforeach() | ||||
| endif() | endif() | ||||
| @@ -91,62 +85,54 @@ function(build_flatbuffers flatbuffers_schemas | |||||
| foreach(schema ${flatbuffers_schemas}) | foreach(schema ${flatbuffers_schemas}) | ||||
| get_filename_component(filename ${schema} NAME_WE) | get_filename_component(filename ${schema} NAME_WE) | ||||
| # For each schema, do the things we requested. | # For each schema, do the things we requested. | ||||
| if (NOT ${generated_includes_dir} STREQUAL "") | |||||
| if(NOT ${generated_includes_dir} STREQUAL "") | |||||
| set(generated_include ${generated_includes_dir}/${filename}_generated.h) | set(generated_include ${generated_includes_dir}/${filename}_generated.h) | ||||
| add_custom_command( | add_custom_command( | ||||
| OUTPUT ${generated_include} | OUTPUT ${generated_include} | ||||
| COMMAND ${FLATC} ${FLATC_SCHEMA_ARGS} | |||||
| -o ${generated_includes_dir} | |||||
| ${include_params} | |||||
| -c ${schema} | |||||
| COMMAND ${FLATC} ${FLATC_SCHEMA_ARGS} -o ${generated_includes_dir} | |||||
| ${include_params} -c ${schema} | |||||
| DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | ||||
| WORKING_DIRECTORY "${working_dir}") | WORKING_DIRECTORY "${working_dir}") | ||||
| list(APPEND all_generated_files ${generated_include}) | list(APPEND all_generated_files ${generated_include}) | ||||
| endif() | endif() | ||||
| if (NOT ${binary_schemas_dir} STREQUAL "") | |||||
| if(NOT ${binary_schemas_dir} STREQUAL "") | |||||
| set(binary_schema ${binary_schemas_dir}/${filename}.bfbs) | set(binary_schema ${binary_schemas_dir}/${filename}.bfbs) | ||||
| add_custom_command( | add_custom_command( | ||||
| OUTPUT ${binary_schema} | OUTPUT ${binary_schema} | ||||
| COMMAND ${FLATC} -b --schema | |||||
| -o ${binary_schemas_dir} | |||||
| ${include_params} | |||||
| ${schema} | |||||
| COMMAND ${FLATC} -b --schema -o ${binary_schemas_dir} ${include_params} | |||||
| ${schema} | |||||
| DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | ||||
| WORKING_DIRECTORY "${working_dir}") | WORKING_DIRECTORY "${working_dir}") | ||||
| list(APPEND all_generated_files ${binary_schema}) | list(APPEND all_generated_files ${binary_schema}) | ||||
| endif() | endif() | ||||
| if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
| if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
| file(COPY ${schema} DESTINATION ${copy_text_schemas_dir}) | file(COPY ${schema} DESTINATION ${copy_text_schemas_dir}) | ||||
| endif() | endif() | ||||
| endforeach() | endforeach() | ||||
| # Create a custom target that depends on all the generated files. | |||||
| # This is the target that you can depend on to trigger all these | |||||
| # to be built. | |||||
| add_custom_target(${custom_target_name} | |||||
| DEPENDS ${all_generated_files} ${additional_dependencies}) | |||||
| # Create a custom target that depends on all the generated files. This is the target | |||||
| # that you can depend on to trigger all these to be built. | |||||
| add_custom_target(${custom_target_name} DEPENDS ${all_generated_files} | |||||
| ${additional_dependencies}) | |||||
| # Register the include directory we are using. | # Register the include directory we are using. | ||||
| if (NOT ${generated_includes_dir} STREQUAL "") | |||||
| if(NOT ${generated_includes_dir} STREQUAL "") | |||||
| include_directories(${generated_includes_dir}) | include_directories(${generated_includes_dir}) | ||||
| set_property(TARGET ${custom_target_name} | |||||
| PROPERTY GENERATED_INCLUDES_DIR | |||||
| ${generated_includes_dir}) | |||||
| set_property(TARGET ${custom_target_name} PROPERTY GENERATED_INCLUDES_DIR | |||||
| ${generated_includes_dir}) | |||||
| endif() | endif() | ||||
| # Register the binary schemas dir we are using. | # Register the binary schemas dir we are using. | ||||
| if (NOT ${binary_schemas_dir} STREQUAL "") | |||||
| set_property(TARGET ${custom_target_name} | |||||
| PROPERTY BINARY_SCHEMAS_DIR | |||||
| ${binary_schemas_dir}) | |||||
| if(NOT ${binary_schemas_dir} STREQUAL "") | |||||
| set_property(TARGET ${custom_target_name} PROPERTY BINARY_SCHEMAS_DIR | |||||
| ${binary_schemas_dir}) | |||||
| endif() | endif() | ||||
| # Register the text schema copy dir we are using. | # Register the text schema copy dir we are using. | ||||
| if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
| set_property(TARGET ${custom_target_name} | |||||
| PROPERTY COPY_TEXT_SCHEMAS_DIR | |||||
| ${copy_text_schemas_dir}) | |||||
| if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
| set_property(TARGET ${custom_target_name} PROPERTY COPY_TEXT_SCHEMAS_DIR | |||||
| ${copy_text_schemas_dir}) | |||||
| endif() | endif() | ||||
| endfunction() | endfunction() | ||||
| @@ -1,49 +1,45 @@ | |||||
| # Parses the version set in src/core/include/megbrain/version.h | |||||
| # Exports the following variables: | |||||
| # MGB_VER_MAJOR: Major version | |||||
| # MGB_VER_MINOR: Minor version | |||||
| # MGB_VER_PATCH: Patch version | |||||
| # MGB_IS_DEV: Is development version | |||||
| # MGB_VER_STRING: Version string | |||||
| # Parses the version set in src/core/include/megbrain/version.h Exports the following | |||||
| # variables: MGB_VER_MAJOR: Major version MGB_VER_MINOR: Minor version MGB_VER_PATCH: | |||||
| # Patch version MGB_IS_DEV: Is development version MGB_VER_STRING: Version string | |||||
| option(MGB_FORCE_DEV_VERSION "Force -dev tag in version stamp" OFF) | option(MGB_FORCE_DEV_VERSION "Force -dev tag in version stamp" OFF) | ||||
| file (READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/include/megbrain/version.h" content) | |||||
| file(READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/include/megbrain/version.h" content) | |||||
| string (REGEX MATCH "MGB_MAJOR +([0-9]+)" _ ${content}) | |||||
| set (MGB_VER_MAJOR ${CMAKE_MATCH_1}) | |||||
| string(REGEX MATCH "MGB_MAJOR +([0-9]+)" _ ${content}) | |||||
| set(MGB_VER_MAJOR ${CMAKE_MATCH_1}) | |||||
| string (REGEX MATCH "MGB_MINOR +([0-9]+)" _ ${content}) | |||||
| set (MGB_VER_MINOR ${CMAKE_MATCH_1}) | |||||
| string(REGEX MATCH "MGB_MINOR +([0-9]+)" _ ${content}) | |||||
| set(MGB_VER_MINOR ${CMAKE_MATCH_1}) | |||||
| string (REGEX MATCH "MGB_PATCH *([0-9]+)" _ ${content}) | |||||
| set (MGB_VER_PATCH ${CMAKE_MATCH_1}) | |||||
| string(REGEX MATCH "MGB_PATCH *([0-9]+)" _ ${content}) | |||||
| set(MGB_VER_PATCH ${CMAKE_MATCH_1}) | |||||
| string (REGEX MATCH "MGE_MAJOR +([0-9]+)" _ ${content}) | |||||
| set (MGE_VER_MAJOR ${CMAKE_MATCH_1}) | |||||
| string(REGEX MATCH "MGE_MAJOR +([0-9]+)" _ ${content}) | |||||
| set(MGE_VER_MAJOR ${CMAKE_MATCH_1}) | |||||
| string (REGEX MATCH "MGE_MINOR +([0-9]+)" _ ${content}) | |||||
| set (MGE_VER_MINOR ${CMAKE_MATCH_1}) | |||||
| string(REGEX MATCH "MGE_MINOR +([0-9]+)" _ ${content}) | |||||
| set(MGE_VER_MINOR ${CMAKE_MATCH_1}) | |||||
| string (REGEX MATCH "MGE_PATCH *([0-9]+)" _ ${content}) | |||||
| set (MGE_VER_PATCH ${CMAKE_MATCH_1}) | |||||
| string(REGEX MATCH "MGE_PATCH *([0-9]+)" _ ${content}) | |||||
| set(MGE_VER_PATCH ${CMAKE_MATCH_1}) | |||||
| string (REGEX MATCH "MGE_EXTRA_NAME *\"(.*)\"" _ ${content}) | |||||
| set (MGE_EXTRA_NAME ${CMAKE_MATCH_1}) | |||||
| string(REGEX MATCH "MGE_EXTRA_NAME *\"(.*)\"" _ ${content}) | |||||
| set(MGE_EXTRA_NAME ${CMAKE_MATCH_1}) | |||||
| if (MGB_FORCE_DEV_VERSION) | |||||
| set (MGB_IS_DEV 1) | |||||
| if(MGB_FORCE_DEV_VERSION) | |||||
| set(MGB_IS_DEV 1) | |||||
| else() | else() | ||||
| string (REGEX MATCH "MGB_IS_DEV +([01])" _ ${content}) | |||||
| set (MGB_IS_DEV ${CMAKE_MATCH_1}) | |||||
| string(REGEX MATCH "MGB_IS_DEV +([01])" _ ${content}) | |||||
| set(MGB_IS_DEV ${CMAKE_MATCH_1}) | |||||
| endif() | endif() | ||||
| if (DEFINED MGB_VER_MAJOR) | |||||
| set (MGB_VER_STRING "${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}") | |||||
| if(DEFINED MGB_VER_MAJOR) | |||||
| set(MGB_VER_STRING "${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}") | |||||
| else() | else() | ||||
| set (MGB_VER_STRING "${MGE_VER_MAJOR}.${MGE_VER_MINOR}.${MGE_VER_PATCH}") | |||||
| set(MGB_VER_STRING "${MGE_VER_MAJOR}.${MGE_VER_MINOR}.${MGE_VER_PATCH}") | |||||
| endif(DEFINED MGB_VER_MAJOR) | endif(DEFINED MGB_VER_MAJOR) | ||||
| if (MGB_IS_DEV) | |||||
| set (MGB_VER_STRING "${MGB_VER_STRING}-dev") | |||||
| if(MGB_IS_DEV) | |||||
| set(MGB_VER_STRING "${MGB_VER_STRING}-dev") | |||||
| endif() | endif() | ||||
| message(STATUS "Building MegBrain ${MGB_VER_STRING}") | message(STATUS "Building MegBrain ${MGB_VER_STRING}") | ||||
| @@ -2,31 +2,40 @@ | |||||
| include(ExternalProject) | include(ExternalProject) | ||||
| find_package(LLVM 6.0 REQUIRED CONFIG) | find_package(LLVM 6.0 REQUIRED CONFIG) | ||||
| STRING(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION}) | |||||
| string(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION}) | |||||
| list(GET LLVM_VERSION_LIST 0 LLVM_VERSION_MAJOR) | list(GET LLVM_VERSION_LIST 0 LLVM_VERSION_MAJOR) | ||||
| list(GET LLVM_VERSION_LIST 1 LLVM_VERSION_MINOR) | list(GET LLVM_VERSION_LIST 1 LLVM_VERSION_MINOR) | ||||
| set(HALIDE_DIR "${PROJECT_SOURCE_DIR}/third_party/Halide" CACHE STRING "halide directory") | |||||
| set(HALIDE_DIR | |||||
| "${PROJECT_SOURCE_DIR}/third_party/Halide" | |||||
| CACHE STRING "halide directory") | |||||
| set(HALIDE_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/Halide) | set(HALIDE_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/Halide) | ||||
| set(HALIDE_LIB ${HALIDE_BUILD_DIR}/lib/libHalide.a) | set(HALIDE_LIB ${HALIDE_BUILD_DIR}/lib/libHalide.a) | ||||
| ExternalProject_add( | |||||
| halide | |||||
| SOURCE_DIR ${HALIDE_DIR} | |||||
| PREFIX ${HALIDE_BUILD_DIR} | |||||
| CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR} -DWITH_APPS=OFF -DWITH_TESTS=OFF -DWITH_TUTORIALS=OFF -DHALIDE_SHARED_LIBRARY=OFF -DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DTARGET_MIPS=OFF -DTARGET_POWERPC=OFF | |||||
| BUILD_BYPRODUCTS ${HALIDE_LIB} | |||||
| ) | |||||
| ExternalProject_Add( | |||||
| halide | |||||
| SOURCE_DIR ${HALIDE_DIR} | |||||
| PREFIX ${HALIDE_BUILD_DIR} | |||||
| CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||||
| -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||||
| -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||||
| -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||||
| -DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR} | |||||
| -DWITH_APPS=OFF | |||||
| -DWITH_TESTS=OFF | |||||
| -DWITH_TUTORIALS=OFF | |||||
| -DHALIDE_SHARED_LIBRARY=OFF | |||||
| -DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} | |||||
| -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
| -DTARGET_MIPS=OFF | |||||
| -DTARGET_POWERPC=OFF | |||||
| BUILD_BYPRODUCTS ${HALIDE_LIB}) | |||||
| set(HALIDE_INC ${HALIDE_BUILD_DIR}/include) | set(HALIDE_INC ${HALIDE_BUILD_DIR}/include) | ||||
| file(MAKE_DIRECTORY ${HALIDE_INC}) | file(MAKE_DIRECTORY ${HALIDE_INC}) | ||||
| add_library(libhalide STATIC IMPORTED GLOBAL) | add_library(libhalide STATIC IMPORTED GLOBAL) | ||||
| add_dependencies(libhalide halide) | add_dependencies(libhalide halide) | ||||
| set_target_properties( | |||||
| libhalide PROPERTIES | |||||
| IMPORTED_LOCATION ${HALIDE_LIB} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC} | |||||
| ) | |||||
| set_target_properties(libhalide PROPERTIES IMPORTED_LOCATION ${HALIDE_LIB} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC}) | |||||
| set(LLVM_COMPONENTS mcjit;bitwriter;linker;passes;X86;ARM;AArch64;Hexagon;NVPTX;AMDGPU) | set(LLVM_COMPONENTS mcjit;bitwriter;linker;passes;X86;ARM;AArch64;Hexagon;NVPTX;AMDGPU) | ||||
| llvm_map_components_to_libnames(HALIDE_LLVM_LIBS ${LLVM_COMPONENTS}) | llvm_map_components_to_libnames(HALIDE_LLVM_LIBS ${LLVM_COMPONENTS}) | ||||
| @@ -1,25 +1,31 @@ | |||||
| if (MGE_USE_SYSTEM_LIB) | |||||
| find_package(dnnl) | |||||
| if (dnnl_FOUND) | |||||
| message(STATUS "Using system provided MKL-DNN.") | |||||
| set (MGE_USE_SYSTEM_MKLDNN ON) | |||||
| return() | |||||
| endif() | |||||
| if(MGE_USE_SYSTEM_LIB) | |||||
| find_package(dnnl) | |||||
| if(dnnl_FOUND) | |||||
| message(STATUS "Using system provided MKL-DNN.") | |||||
| set(MGE_USE_SYSTEM_MKLDNN ON) | |||||
| return() | |||||
| endif() | |||||
| endif() | endif() | ||||
| option(DNNL_BUILD_TESTS "" OFF) | option(DNNL_BUILD_TESTS "" OFF) | ||||
| option(DNNL_BUILD_EXAMPLES "" OFF) | option(DNNL_BUILD_EXAMPLES "" OFF) | ||||
| # we do not want to use OMP now, so config to CPU mode | |||||
| # if set to OMP, some dnnl algo will be more fast | |||||
| set(DNNL_CPU_RUNTIME "SEQ" CACHE STRING "config dnnl to DNNL_RUNTIME_SEQ") | |||||
| # we do not want to use OMP now, so config to CPU mode if set to OMP, some dnnl algo | |||||
| # will be more fast | |||||
| set(DNNL_CPU_RUNTIME | |||||
| "SEQ" | |||||
| CACHE STRING "config dnnl to DNNL_RUNTIME_SEQ") | |||||
| if(MGE_BLAS STREQUAL "MKL") | if(MGE_BLAS STREQUAL "MKL") | ||||
| option(_DNNL_USE_MKL "" ON) | |||||
| set(MKLROOT ${MKL_ROOT_DIR} CACHE STRING "MKL ROOT FOR DNNL") | |||||
| set(MKLLIB libmkl) | |||||
| option(_DNNL_USE_MKL "" ON) | |||||
| set(MKLROOT | |||||
| ${MKL_ROOT_DIR} | |||||
| CACHE STRING "MKL ROOT FOR DNNL") | |||||
| set(MKLLIB libmkl) | |||||
| else() | else() | ||||
| option(_DNNL_USE_MKL "" OFF) | |||||
| option(_DNNL_USE_MKL "" OFF) | |||||
| endif() | endif() | ||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter -Wno-extra") | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter -Wno-extra") | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-extra") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-extra") | ||||
| set(DNNL_LIBRARY_TYPE STATIC CACHE STRING "config dnnl to STATIC") | |||||
| set(DNNL_LIBRARY_TYPE | |||||
| STATIC | |||||
| CACHE STRING "config dnnl to STATIC") | |||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/intel-mkl-dnn) | add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/intel-mkl-dnn) | ||||
| @@ -1,30 +1,28 @@ | |||||
| # - Find the NumPy libraries | |||||
| # This module finds if NumPy is installed, and sets the following variables | |||||
| # indicating where it is. | |||||
| # * Find the NumPy libraries This module finds if NumPy is installed, and sets the | |||||
| # following variables indicating where it is. | |||||
| # | # | ||||
| # TODO: Update to provide the libraries and paths for linking npymath lib. | # TODO: Update to provide the libraries and paths for linking npymath lib. | ||||
| # | # | ||||
| # NUMPY_FOUND - was NumPy found | |||||
| # NUMPY_VERSION - the version of NumPy found as a string | |||||
| # NUMPY_VERSION_MAJOR - the major version number of NumPy | |||||
| # NUMPY_VERSION_MINOR - the minor version number of NumPy | |||||
| # NUMPY_VERSION_PATCH - the patch version number of NumPy | |||||
| # NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 | |||||
| # NUMPY_INCLUDE_DIR - path to the NumPy include files | |||||
| # NUMPY_FOUND - was NumPy found NUMPY_VERSION - the version of | |||||
| # NumPy found as a string NUMPY_VERSION_MAJOR - the major version number of NumPy | |||||
| # NUMPY_VERSION_MINOR - the minor version number of NumPy NUMPY_VERSION_PATCH - | |||||
| # the patch version number of NumPy NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is | |||||
| # 10601 NUMPY_INCLUDE_DIR - path to the NumPy include files | |||||
| unset(NUMPY_VERSION) | unset(NUMPY_VERSION) | ||||
| unset(NUMPY_INCLUDE_DIR) | unset(NUMPY_INCLUDE_DIR) | ||||
| if(PYTHONINTERP_FOUND) | if(PYTHONINTERP_FOUND) | ||||
| execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" | |||||
| "import numpy as n; print(n.__version__); print(n.get_include());" | |||||
| execute_process( | |||||
| COMMAND "${PYTHON_EXECUTABLE}" "-c" | |||||
| "import numpy as n; print(n.__version__); print(n.get_include());" | |||||
| RESULT_VARIABLE __result | RESULT_VARIABLE __result | ||||
| OUTPUT_VARIABLE __output | OUTPUT_VARIABLE __output | ||||
| OUTPUT_STRIP_TRAILING_WHITESPACE) | OUTPUT_STRIP_TRAILING_WHITESPACE) | ||||
| if(__result MATCHES 0) | if(__result MATCHES 0) | ||||
| string(REGEX REPLACE ";" "\\\\;" __values ${__output}) | string(REGEX REPLACE ";" "\\\\;" __values ${__output}) | ||||
| string(REGEX REPLACE "\r?\n" ";" __values ${__values}) | |||||
| string(REGEX REPLACE "\r?\n" ";" __values ${__values}) | |||||
| list(GET __values 0 NUMPY_VERSION) | list(GET __values 0 NUMPY_VERSION) | ||||
| list(GET __values 1 NUMPY_INCLUDE_DIR) | list(GET __values 1 NUMPY_INCLUDE_DIR) | ||||
| @@ -33,13 +31,18 @@ if(PYTHONINTERP_FOUND) | |||||
| set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1}) | set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1}) | ||||
| set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2}) | set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2}) | ||||
| set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3}) | set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3}) | ||||
| math(EXPR NUMPY_VERSION_DECIMAL | |||||
| "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") | |||||
| string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR}) | |||||
| math( | |||||
| EXPR | |||||
| NUMPY_VERSION_DECIMAL | |||||
| "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}" | |||||
| ) | |||||
| string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR}) | |||||
| else() | else() | ||||
| unset(NUMPY_VERSION) | |||||
| unset(NUMPY_INCLUDE_DIR) | |||||
| message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n") | |||||
| unset(NUMPY_VERSION) | |||||
| unset(NUMPY_INCLUDE_DIR) | |||||
| message( | |||||
| STATUS | |||||
| "Requested NumPy version and include path, but got instead:\n${__output}\n") | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| else() | else() | ||||
| @@ -47,8 +50,10 @@ else() | |||||
| endif() | endif() | ||||
| include(FindPackageHandleStandardArgs) | include(FindPackageHandleStandardArgs) | ||||
| find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION | |||||
| VERSION_VAR NUMPY_VERSION) | |||||
| find_package_handle_standard_args( | |||||
| NumPy | |||||
| REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION | |||||
| VERSION_VAR NUMPY_VERSION) | |||||
| if(NUMPY_FOUND) | if(NUMPY_FOUND) | ||||
| message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})") | message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})") | ||||
| @@ -1,48 +1,50 @@ | |||||
| if (MGE_USE_SYSTEM_LIB) | |||||
| find_package(OpenBLAS) | |||||
| set (MGE_USE_SYSTEM_OPENBLAS ON) | |||||
| message(STATUS "Using system provided OpenBLAS ${OpenBLAS_VERSION}") | |||||
| add_library(libopenblas IMPORTED GLOBAL) | |||||
| set_target_properties( | |||||
| libopenblas PROPERTIES | |||||
| IMPORTED_LOCATION ${OpenBLAS_LIBRARIES} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${OpenBLAS_INCLUDE_DIRS} | |||||
| ) | |||||
| return() | |||||
| if(MGE_USE_SYSTEM_LIB) | |||||
| find_package(OpenBLAS) | |||||
| set(MGE_USE_SYSTEM_OPENBLAS ON) | |||||
| message(STATUS "Using system provided OpenBLAS ${OpenBLAS_VERSION}") | |||||
| add_library(libopenblas IMPORTED GLOBAL) | |||||
| set_target_properties( | |||||
| libopenblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_LIBRARIES} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${OpenBLAS_INCLUDE_DIRS}) | |||||
| return() | |||||
| endif() | endif() | ||||
| include(ExternalProject) | include(ExternalProject) | ||||
| include(GNUInstallDirs) | include(GNUInstallDirs) | ||||
| set(OPENBLAS_DIR "${PROJECT_SOURCE_DIR}/third_party/OpenBLAS" CACHE STRING "OpenBLAS directory") | |||||
| set(OPENBLAS_DIR | |||||
| "${PROJECT_SOURCE_DIR}/third_party/OpenBLAS" | |||||
| CACHE STRING "OpenBLAS directory") | |||||
| set(OPENBLAS_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/OpenBLAS) | set(OPENBLAS_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/OpenBLAS) | ||||
| set(OPENBLAS_INC ${OPENBLAS_BUILD_DIR}/include) | set(OPENBLAS_INC ${OPENBLAS_BUILD_DIR}/include) | ||||
| set(OPENBLAS_LIB ${OPENBLAS_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a) | set(OPENBLAS_LIB ${OPENBLAS_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a) | ||||
| if(${CMAKE_GENERATOR} STREQUAL "Ninja") | if(${CMAKE_GENERATOR} STREQUAL "Ninja") | ||||
| set(MAKE_COMMAND make) | |||||
| set(MAKE_COMMAND make) | |||||
| else() | else() | ||||
| set(MAKE_COMMAND "$(MAKE)") | |||||
| set(MAKE_COMMAND "$(MAKE)") | |||||
| endif() | endif() | ||||
| ExternalProject_add( | |||||
| openblas | |||||
| SOURCE_DIR ${OPENBLAS_DIR} | |||||
| PREFIX ${OPENBLAS_BUILD_DIR} | |||||
| CMAKE_GENERATOR "Unix Makefiles" | |||||
| CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
| BUILD_COMMAND ${MAKE_COMMAND} | |||||
| BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE} | |||||
| ) | |||||
| ExternalProject_Add( | |||||
| openblas | |||||
| SOURCE_DIR ${OPENBLAS_DIR} | |||||
| PREFIX ${OPENBLAS_BUILD_DIR} | |||||
| CMAKE_GENERATOR "Unix Makefiles" | |||||
| CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||||
| -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||||
| -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||||
| -DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR} | |||||
| -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||||
| -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
| BUILD_COMMAND ${MAKE_COMMAND} | |||||
| BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE}) | |||||
| file(MAKE_DIRECTORY ${OPENBLAS_INC}) | file(MAKE_DIRECTORY ${OPENBLAS_INC}) | ||||
| add_library(libopenblas STATIC IMPORTED GLOBAL) | add_library(libopenblas STATIC IMPORTED GLOBAL) | ||||
| add_dependencies(libopenblas openblas) | add_dependencies(libopenblas openblas) | ||||
| set_target_properties( | set_target_properties( | ||||
| libopenblas PROPERTIES | |||||
| IMPORTED_LOCATION ${OPENBLAS_LIB} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include | |||||
| ) | |||||
| libopenblas PROPERTIES IMPORTED_LOCATION ${OPENBLAS_LIB} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include) | |||||
| @@ -1,31 +1,31 @@ | |||||
| find_library(ACLRT_LIBRARY | |||||
| NAMES libascendcl.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{ACLRT_HOME}/lib64/stub" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES stub | |||||
| DOC "ACL library." ) | |||||
| find_library( | |||||
| ACLRT_LIBRARY | |||||
| NAMES libascendcl.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{ACLRT_HOME}/lib64/stub" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES stub | |||||
| DOC "ACL library.") | |||||
| if(ACLRT_LIBRARY STREQUAL "ACLRT_LIBRARY-NOTFOUND") | if(ACLRT_LIBRARY STREQUAL "ACLRT_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find ACLRT Library") | |||||
| message(FATAL_ERROR "Can not find ACLRT Library") | |||||
| endif() | endif() | ||||
| get_filename_component(__found_aclrt_root "${ACLRT_LIBRARY}/../../../" REALPATH) | get_filename_component(__found_aclrt_root "${ACLRT_LIBRARY}/../../../" REALPATH) | ||||
| find_path(ACLRT_INCLUDE_DIR | |||||
| NAMES acl/acl.h | |||||
| HINTS "$ENV{ACLRT_HOME}/include" ${__found_aclrt_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to ACLRT include directory." ) | |||||
| find_path( | |||||
| ACLRT_INCLUDE_DIR | |||||
| NAMES acl/acl.h | |||||
| HINTS "$ENV{ACLRT_HOME}/include" ${__found_aclrt_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to ACLRT include directory.") | |||||
| if(ACLRT_INCLUDE_DIR STREQUAL "ACLRT_INCLUDE_DIR-NOTFOUND") | if(ACLRT_INCLUDE_DIR STREQUAL "ACLRT_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find ACLRT Library") | |||||
| message(FATAL_ERROR "Can not find ACLRT Library") | |||||
| endif() | endif() | ||||
| add_library(libascendcl SHARED IMPORTED) | add_library(libascendcl SHARED IMPORTED) | ||||
| set_target_properties(libascendcl PROPERTIES | |||||
| IMPORTED_LOCATION ${ACLRT_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${ACLRT_INCLUDE_DIR} | |||||
| ) | |||||
| set_target_properties( | |||||
| libascendcl PROPERTIES IMPORTED_LOCATION ${ACLRT_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${ACLRT_INCLUDE_DIR}) | |||||
| message(STATUS "Found ACLRT: ${__found_aclrt_root}") | message(STATUS "Found ACLRT: ${__found_aclrt_root}") | ||||
| @@ -1,44 +1,57 @@ | |||||
| find_library(CNDEV_LIBRARY | |||||
| NAMES libcndev.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNDEV library." ) | |||||
| find_library( | |||||
| CNDEV_LIBRARY | |||||
| NAMES libcndev.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNDEV library.") | |||||
| if(CNDEV_LIBRARY STREQUAL "CNDEV_LIBRARY-NOTFOUND") | if(CNDEV_LIBRARY STREQUAL "CNDEV_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNDEV Library") | |||||
| message(FATAL_ERROR "Can not find CNDEV Library") | |||||
| endif() | endif() | ||||
| get_filename_component(__found_cndev_root ${CNDEV_LIBRARY}/../.. REALPATH) | get_filename_component(__found_cndev_root ${CNDEV_LIBRARY}/../.. REALPATH) | ||||
| find_path(CNDEV_INCLUDE_DIR | |||||
| NAMES cndev.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cndev_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNDEV include directory." ) | |||||
| find_path( | |||||
| CNDEV_INCLUDE_DIR | |||||
| NAMES cndev.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cndev_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNDEV include directory.") | |||||
| if(CNDEV_INCLUDE_DIR STREQUAL "CNDEV_INCLUDE_DIR-NOTFOUND") | if(CNDEV_INCLUDE_DIR STREQUAL "CNDEV_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNDEV Library") | |||||
| message(FATAL_ERROR "Can not find CNDEV Library") | |||||
| endif() | endif() | ||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_1 REGEX "^#define CNDEV_VERSION_1 [0-9]+.*$") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_2 REGEX "^#define CNDEV_VERSION_2 [0-9]+.*$") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_3 REGEX "^#define CNDEV_VERSION_3 [0-9]+.*$") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_4 REGEX "^#define CNDEV_VERSION_4 [0-9]+.*$") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_5 REGEX "^#define CNDEV_VERSION_5 [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_1 ([0-9]+).*$" "\\1" CNDEV_VERSION_1 "${CNDEV_1}") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_2 ([0-9]+).*$" "\\1" CNDEV_VERSION_2 "${CNDEV_2}") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_3 ([0-9]+).*$" "\\1" CNDEV_VERSION_3 "${CNDEV_3}") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_4 ([0-9]+).*$" "\\1" CNDEV_VERSION_4 "${CNDEV_4}") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_5 ([0-9]+).*$" "\\1" CNDEV_VERSION_5 "${CNDEV_5}") | |||||
| set(CNDEV_VERSION_STRING "${CNDEV_VERSION_1}.${CNDEV_VERSION_2}.${CNDEV_VERSION_3}.${CNDEV_VERSION_4}.${CNDEV_VERSION_5}") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_1 | |||||
| REGEX "^#define CNDEV_VERSION_1 [0-9]+.*$") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_2 | |||||
| REGEX "^#define CNDEV_VERSION_2 [0-9]+.*$") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_3 | |||||
| REGEX "^#define CNDEV_VERSION_3 [0-9]+.*$") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_4 | |||||
| REGEX "^#define CNDEV_VERSION_4 [0-9]+.*$") | |||||
| file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_5 | |||||
| REGEX "^#define CNDEV_VERSION_5 [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_1 ([0-9]+).*$" "\\1" CNDEV_VERSION_1 | |||||
| "${CNDEV_1}") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_2 ([0-9]+).*$" "\\1" CNDEV_VERSION_2 | |||||
| "${CNDEV_2}") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_3 ([0-9]+).*$" "\\1" CNDEV_VERSION_3 | |||||
| "${CNDEV_3}") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_4 ([0-9]+).*$" "\\1" CNDEV_VERSION_4 | |||||
| "${CNDEV_4}") | |||||
| string(REGEX REPLACE "^#define CNDEV_VERSION_5 ([0-9]+).*$" "\\1" CNDEV_VERSION_5 | |||||
| "${CNDEV_5}") | |||||
| set(CNDEV_VERSION_STRING | |||||
| "${CNDEV_VERSION_1}.${CNDEV_VERSION_2}.${CNDEV_VERSION_3}.${CNDEV_VERSION_4}.${CNDEV_VERSION_5}" | |||||
| ) | |||||
| add_library(libcndev SHARED IMPORTED) | add_library(libcndev SHARED IMPORTED) | ||||
| set_target_properties(libcndev PROPERTIES | |||||
| IMPORTED_LOCATION ${CNDEV_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CNDEV_INCLUDE_DIR} | |||||
| ) | |||||
| message(STATUS "Found CNDEV: ${__found_cndev_root} (found version: ${CNDEV_VERSION_STRING})") | |||||
| set_target_properties( | |||||
| libcndev PROPERTIES IMPORTED_LOCATION ${CNDEV_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
| ${CNDEV_INCLUDE_DIR}) | |||||
| message( | |||||
| STATUS "Found CNDEV: ${__found_cndev_root} (found version: ${CNDEV_VERSION_STRING})") | |||||
| @@ -1,40 +1,49 @@ | |||||
| find_library(CNLIGHT_LIBRARY | |||||
| NAMES libcnlight.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNLIGHT library." ) | |||||
| find_library( | |||||
| CNLIGHT_LIBRARY | |||||
| NAMES libcnlight.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNLIGHT library.") | |||||
| if(CNLIGHT_LIBRARY STREQUAL "CNLIGHT_LIBRARY-NOTFOUND") | if(CNLIGHT_LIBRARY STREQUAL "CNLIGHT_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNLIGHT Library") | |||||
| message(FATAL_ERROR "Can not find CNLIGHT Library") | |||||
| endif() | endif() | ||||
| get_filename_component(__found_cnlight_root "${CNLIGHT_LIBRARY}/../.." REALPATH) | get_filename_component(__found_cnlight_root "${CNLIGHT_LIBRARY}/../.." REALPATH) | ||||
| find_path(CNLIGHT_INCLUDE_DIR | |||||
| NAMES cnlight.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNLIGHT include directory." ) | |||||
| find_path( | |||||
| CNLIGHT_INCLUDE_DIR | |||||
| NAMES cnlight.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNLIGHT include directory.") | |||||
| if(CNLIGHT_INCLUDE_DIR STREQUAL "CNLIGHT_INCLUDE_DIR-NOTFOUND") | if(CNLIGHT_INCLUDE_DIR STREQUAL "CNLIGHT_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNLIGHT Library") | |||||
| message(FATAL_ERROR "Can not find CNLIGHT Library") | |||||
| endif() | endif() | ||||
| file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}") | |||||
| set(CNLIGHT_VERSION_STRING "${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}") | |||||
| file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR | |||||
| REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR | |||||
| REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH | |||||
| REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1" | |||||
| CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1" | |||||
| CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1" | |||||
| CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}") | |||||
| set(CNLIGHT_VERSION_STRING | |||||
| "${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}") | |||||
| add_library(libcnlight SHARED IMPORTED) | add_library(libcnlight SHARED IMPORTED) | ||||
| set_target_properties(libcnlight PROPERTIES | |||||
| IMPORTED_LOCATION ${CNLIGHT_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR} | |||||
| ) | |||||
| message(STATUS "Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})") | |||||
| set_target_properties( | |||||
| libcnlight PROPERTIES IMPORTED_LOCATION ${CNLIGHT_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR}) | |||||
| message( | |||||
| STATUS | |||||
| "Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})") | |||||
| @@ -1,40 +1,48 @@ | |||||
| find_library(CNML_LIBRARY | |||||
| NAMES libcnml.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNML library." ) | |||||
| find_library( | |||||
| CNML_LIBRARY | |||||
| NAMES libcnml.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNML library.") | |||||
| if(CNML_LIBRARY STREQUAL "CNML_LIBRARY-NOTFOUND") | if(CNML_LIBRARY STREQUAL "CNML_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNML Library") | |||||
| message(FATAL_ERROR "Can not find CNML Library") | |||||
| endif() | endif() | ||||
| get_filename_component(__found_cnml_root "${CNML_LIBRARY}/../.." REALPATH) | get_filename_component(__found_cnml_root "${CNML_LIBRARY}/../.." REALPATH) | ||||
| find_path(CNML_INCLUDE_DIR | |||||
| NAMES cnml.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnml_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNML include directory." ) | |||||
| find_path( | |||||
| CNML_INCLUDE_DIR | |||||
| NAMES cnml.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnml_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNML include directory.") | |||||
| if(CNML_INCLUDE_DIR STREQUAL "CNML_INCLUDE_DIR-NOTFOUND") | if(CNML_INCLUDE_DIR STREQUAL "CNML_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNML Library") | |||||
| message(FATAL_ERROR "Can not find CNML Library") | |||||
| endif() | endif() | ||||
| file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MAJOR REGEX "^#define CNML_MAJOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MINOR REGEX "^#define CNML_MINOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_PATCH REGEX "^#define CNML_PATCH_VERSION [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNML_MAJOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MAJOR "${CNML_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNML_MINOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MINOR "${CNML_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNML_PATCH_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_PATCH "${CNML_PATCH}") | |||||
| set(CNML_VERSION_STRING "${CNML_VERSION_MAJOR}.${CNML_VERSION_MINOR}.${CNML_VERSION_PATCH}") | |||||
| file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MAJOR | |||||
| REGEX "^#define CNML_MAJOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MINOR | |||||
| REGEX "^#define CNML_MINOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_PATCH | |||||
| REGEX "^#define CNML_PATCH_VERSION [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNML_MAJOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MAJOR | |||||
| "${CNML_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNML_MINOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MINOR | |||||
| "${CNML_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNML_PATCH_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_PATCH | |||||
| "${CNML_PATCH}") | |||||
| set(CNML_VERSION_STRING | |||||
| "${CNML_VERSION_MAJOR}.${CNML_VERSION_MINOR}.${CNML_VERSION_PATCH}") | |||||
| add_library(libcnml SHARED IMPORTED) | add_library(libcnml SHARED IMPORTED) | ||||
| set_target_properties(libcnml PROPERTIES | |||||
| IMPORTED_LOCATION ${CNML_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CNML_INCLUDE_DIR} | |||||
| ) | |||||
| message(STATUS "Found CNML: ${__found_cnml_root} (found version: ${CNML_VERSION_STRING})") | |||||
| set_target_properties( | |||||
| libcnml PROPERTIES IMPORTED_LOCATION ${CNML_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
| ${CNML_INCLUDE_DIR}) | |||||
| message( | |||||
| STATUS "Found CNML: ${__found_cnml_root} (found version: ${CNML_VERSION_STRING})") | |||||
| @@ -1,80 +1,100 @@ | |||||
| find_library(CNNL_LIBRARY | |||||
| NAMES libcnnl.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNNL library." ) | |||||
| find_library( | |||||
| CNNL_LIBRARY | |||||
| NAMES libcnnl.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNNL library.") | |||||
| if(CNNL_LIBRARY STREQUAL "CNNL_LIBRARY-NOTFOUND") | if(CNNL_LIBRARY STREQUAL "CNNL_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNNL Library") | |||||
| message(FATAL_ERROR "Can not find CNNL Library") | |||||
| endif() | endif() | ||||
| get_filename_component(__found_cnnl_root "${CNNL_LIBRARY}/../.." REALPATH) | get_filename_component(__found_cnnl_root "${CNNL_LIBRARY}/../.." REALPATH) | ||||
| find_path(CNNL_INCLUDE_DIR | |||||
| NAMES cnnl.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNNL include directory." ) | |||||
| find_path( | |||||
| CNNL_INCLUDE_DIR | |||||
| NAMES cnnl.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNNL include directory.") | |||||
| if(CNNL_INCLUDE_DIR STREQUAL "CNNL_INCLUDE_DIR-NOTFOUND") | if(CNNL_INCLUDE_DIR STREQUAL "CNNL_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNNL Library") | |||||
| message(FATAL_ERROR "Can not find CNNL Library") | |||||
| endif() | endif() | ||||
| file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR REGEX "^#define CNNL_MAJOR [0-9]+.*$") | |||||
| file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR REGEX "^#define CNNL_MINOR [0-9]+.*$") | |||||
| file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR "${CNNL_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR "${CNNL_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH "${CNNL_PATCH}") | |||||
| set(CNNL_VERSION_STRING "${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}") | |||||
| file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR | |||||
| REGEX "^#define CNNL_MAJOR [0-9]+.*$") | |||||
| file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR | |||||
| REGEX "^#define CNNL_MINOR [0-9]+.*$") | |||||
| file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH | |||||
| REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR | |||||
| "${CNNL_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR | |||||
| "${CNNL_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH | |||||
| "${CNNL_PATCH}") | |||||
| set(CNNL_VERSION_STRING | |||||
| "${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}") | |||||
| add_library(libcnnl SHARED IMPORTED) | add_library(libcnnl SHARED IMPORTED) | ||||
| set_target_properties(libcnnl PROPERTIES | |||||
| IMPORTED_LOCATION ${CNNL_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CNNL_INCLUDE_DIR} | |||||
| ) | |||||
| set_target_properties( | |||||
| libcnnl PROPERTIES IMPORTED_LOCATION ${CNNL_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
| ${CNNL_INCLUDE_DIR}) | |||||
| message(STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})") | |||||
| message( | |||||
| STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})") | |||||
| find_library(CNNL_EXTRA_LIBRARY | |||||
| NAMES libcnnl_extra.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNNL_EXTRA library." ) | |||||
| find_library( | |||||
| CNNL_EXTRA_LIBRARY | |||||
| NAMES libcnnl_extra.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNNL_EXTRA library.") | |||||
| if(CNNL_EXTRA_LIBRARY STREQUAL "CNNL_EXTRA_LIBRARY-NOTFOUND") | if(CNNL_EXTRA_LIBRARY STREQUAL "CNNL_EXTRA_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||||
| message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||||
| endif() | endif() | ||||
| get_filename_component(__found_cnnl_extra_root "${CNNL_EXTRA_LIBRARY}/../.." REALPATH) | get_filename_component(__found_cnnl_extra_root "${CNNL_EXTRA_LIBRARY}/../.." REALPATH) | ||||
| find_path(CNNL_EXTRA_INCLUDE_DIR | |||||
| NAMES cnnl_extra.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNNL_EXTRA include directory." ) | |||||
| find_path( | |||||
| CNNL_EXTRA_INCLUDE_DIR | |||||
| NAMES cnnl_extra.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNNL_EXTRA include directory.") | |||||
| if(CNNL_EXTRA_INCLUDE_DIR STREQUAL "CNNL_EXTRA_INCLUDE_DIR-NOTFOUND") | if(CNNL_EXTRA_INCLUDE_DIR STREQUAL "CNNL_EXTRA_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||||
| message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||||
| endif() | endif() | ||||
| file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$") | |||||
| file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$") | |||||
| file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}") | |||||
| set(CNNL_EXTRA_VERSION_STRING "${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}") | |||||
| file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR | |||||
| REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$") | |||||
| file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR | |||||
| REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$") | |||||
| file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH | |||||
| REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1" | |||||
| CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1" | |||||
| CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1" | |||||
| CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}") | |||||
| set(CNNL_EXTRA_VERSION_STRING | |||||
| "${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}" | |||||
| ) | |||||
| add_library(libcnnl_extra SHARED IMPORTED) | add_library(libcnnl_extra SHARED IMPORTED) | ||||
| set_target_properties(libcnnl_extra PROPERTIES | |||||
| IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR} | |||||
| ) | |||||
| message(STATUS "Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})") | |||||
| set_target_properties( | |||||
| libcnnl_extra PROPERTIES IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR}) | |||||
| message( | |||||
| STATUS | |||||
| "Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})" | |||||
| ) | |||||
| @@ -1,40 +1,48 @@ | |||||
| find_library(CNRT_LIBRARY | |||||
| NAMES libcnrt.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNRT library." ) | |||||
| find_library( | |||||
| CNRT_LIBRARY | |||||
| NAMES libcnrt.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CNRT library.") | |||||
| if(CNRT_LIBRARY STREQUAL "CNRT_LIBRARY-NOTFOUND") | if(CNRT_LIBRARY STREQUAL "CNRT_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNRT Library") | |||||
| message(FATAL_ERROR "Can not find CNRT Library") | |||||
| endif() | endif() | ||||
| get_filename_component(__found_cnrt_root ${CNRT_LIBRARY}/../../ REALPATH) | get_filename_component(__found_cnrt_root ${CNRT_LIBRARY}/../../ REALPATH) | ||||
| find_path(CNRT_INCLUDE_DIR | |||||
| NAMES cnrt.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnrt_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNRT include directory." ) | |||||
| find_path( | |||||
| CNRT_INCLUDE_DIR | |||||
| NAMES cnrt.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnrt_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CNRT include directory.") | |||||
| if(CNRT_INCLUDE_DIR STREQUAL "CNRT_INCLUDE_DIR-NOTFOUND") | if(CNRT_INCLUDE_DIR STREQUAL "CNRT_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CNRT Library") | |||||
| message(FATAL_ERROR "Can not find CNRT Library") | |||||
| endif() | endif() | ||||
| file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MAJOR REGEX "^#define CNRT_MAJOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MINOR REGEX "^#define CNRT_MINOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_PATCH REGEX "^#define CNRT_PATCH_VERSION [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNRT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MAJOR "${CNRT_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNRT_MINOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MINOR "${CNRT_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNRT_PATCH_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_PATCH "${CNRT_PATCH}") | |||||
| set(CNRT_VERSION_STRING "${CNRT_VERSION_MAJOR}.${CNRT_VERSION_MINOR}.${CNRT_VERSION_PATCH}") | |||||
| file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MAJOR | |||||
| REGEX "^#define CNRT_MAJOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MINOR | |||||
| REGEX "^#define CNRT_MINOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_PATCH | |||||
| REGEX "^#define CNRT_PATCH_VERSION [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define CNRT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MAJOR | |||||
| "${CNRT_MAJOR}") | |||||
| string(REGEX REPLACE "^#define CNRT_MINOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MINOR | |||||
| "${CNRT_MINOR}") | |||||
| string(REGEX REPLACE "^#define CNRT_PATCH_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_PATCH | |||||
| "${CNRT_PATCH}") | |||||
| set(CNRT_VERSION_STRING | |||||
| "${CNRT_VERSION_MAJOR}.${CNRT_VERSION_MINOR}.${CNRT_VERSION_PATCH}") | |||||
| add_library(libcnrt SHARED IMPORTED) | add_library(libcnrt SHARED IMPORTED) | ||||
| set_target_properties(libcnrt PROPERTIES | |||||
| IMPORTED_LOCATION ${CNRT_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CNRT_INCLUDE_DIR} | |||||
| ) | |||||
| message(STATUS "Found CNRT: ${__found_cnrt_root} (found version: ${CNRT_VERSION_STRING})") | |||||
| set_target_properties( | |||||
| libcnrt PROPERTIES IMPORTED_LOCATION ${CNRT_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
| ${CNRT_INCLUDE_DIR}) | |||||
| message( | |||||
| STATUS "Found CNRT: ${__found_cnrt_root} (found version: ${CNRT_VERSION_STRING})") | |||||
| @@ -1,2 +1,5 @@ | |||||
| file(GLOB_RECURSE CPP_REDIS_SRCS ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/sources/*.cpp ${PROJECT_SOURCE_DIR}/third_party/tacopie/sources/*.cpp) | |||||
| set(CPP_REDIS_INCLUDES ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/includes ${PROJECT_SOURCE_DIR}/third_party/tacopie/includes) | |||||
| file(GLOB_RECURSE CPP_REDIS_SRCS | |||||
| ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/sources/*.cpp | |||||
| ${PROJECT_SOURCE_DIR}/third_party/tacopie/sources/*.cpp) | |||||
| set(CPP_REDIS_INCLUDES ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/includes | |||||
| ${PROJECT_SOURCE_DIR}/third_party/tacopie/includes) | |||||
| @@ -1,20 +1,20 @@ | |||||
| if (MGE_USE_SYSTEM_LIB) | |||||
| find_package(Cpuinfo) | |||||
| message(STATUS "Using system provided cpuinfo ${cpuinfo_VERSION}") | |||||
| add_library(libcpuinfo IMPORTED GLOBAL) | |||||
| set_target_properties( | |||||
| libcpuinfo PROPERTIES | |||||
| IMPORTED_LOCATION ${cpuinfo_LIBRARIES} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${cpuinfo_INCLUDE_DIRS} | |||||
| ) | |||||
| return() | |||||
| if(MGE_USE_SYSTEM_LIB) | |||||
| find_package(Cpuinfo) | |||||
| message(STATUS "Using system provided cpuinfo ${cpuinfo_VERSION}") | |||||
| add_library(libcpuinfo IMPORTED GLOBAL) | |||||
| set_target_properties( | |||||
| libcpuinfo PROPERTIES IMPORTED_LOCATION ${cpuinfo_LIBRARIES} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${cpuinfo_INCLUDE_DIRS}) | |||||
| return() | |||||
| endif() | endif() | ||||
| SET(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build") | |||||
| OPTION(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF) | |||||
| OPTION(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF) | |||||
| OPTION(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF) | |||||
| OPTION(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF) | |||||
| set(CPUINFO_LIBRARY_TYPE | |||||
| "static" | |||||
| CACHE STRING "Type of cpuinfo library (shared, static, or default) to build") | |||||
| option(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF) | |||||
| option(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF) | |||||
| option(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF) | |||||
| option(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF) | |||||
| include_directories("${PROJECT_SOURCE_DIR}/third_party/cpuinfo/include") | include_directories("${PROJECT_SOURCE_DIR}/third_party/cpuinfo/include") | ||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cpuinfo ${CMAKE_CURRENT_BINARY_DIR}/cpuinfo EXCLUDE_FROM_ALL) | |||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cpuinfo | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/cpuinfo EXCLUDE_FROM_ALL) | |||||
| @@ -1,73 +1,83 @@ | |||||
| find_package(PkgConfig) | find_package(PkgConfig) | ||||
| if(${PkgConfig_FOUND}) | if(${PkgConfig_FOUND}) | ||||
| pkg_check_modules(PC_CUDNN QUIET CUDNN) | |||||
| pkg_check_modules(PC_CUDNN QUIET CUDNN) | |||||
| endif() | endif() | ||||
| if("${CUDNN_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{CUDNN_ROOT_DIR}" STREQUAL "") | |||||
| set(CUDNN_ROOT_DIR $ENV{CUDNN_ROOT_DIR}) | |||||
| if("${CUDNN_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{CUDNN_ROOT_DIR}" STREQUAL "") | |||||
| set(CUDNN_ROOT_DIR $ENV{CUDNN_ROOT_DIR}) | |||||
| endif() | endif() | ||||
| if(MGE_CUDA_USE_STATIC AND NOT MGE_WITH_CUDNN_SHARED) | if(MGE_CUDA_USE_STATIC AND NOT MGE_WITH_CUDNN_SHARED) | ||||
| find_library(CUDNN_LIBRARY | |||||
| NAMES libcudnn_static.a cudnn.lib | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CUDNN library." ) | |||||
| find_library( | |||||
| CUDNN_LIBRARY | |||||
| NAMES libcudnn_static.a cudnn.lib | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} | |||||
| ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CUDNN library.") | |||||
| else() | else() | ||||
| find_library(CUDNN_LIBRARY | |||||
| NAMES libcudnn.so libcudnn.dylib cudnn64.dll | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CUDNN library." ) | |||||
| find_library( | |||||
| CUDNN_LIBRARY | |||||
| NAMES libcudnn.so libcudnn.dylib cudnn64.dll | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} | |||||
| ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "CUDNN library.") | |||||
| endif() | endif() | ||||
| if(CUDNN_LIBRARY STREQUAL "CUDNN_LIBRARY-NOTFOUND") | if(CUDNN_LIBRARY STREQUAL "CUDNN_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CuDNN Library, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env") | |||||
| message( | |||||
| FATAL_ERROR | |||||
| "Can not find CuDNN Library, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env" | |||||
| ) | |||||
| endif() | endif() | ||||
| get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY}/../.. REALPATH) | get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY}/../.. REALPATH) | ||||
| find_path(CUDNN_INCLUDE_DIR | |||||
| NAMES cudnn.h | |||||
| HINTS $ENV{PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CUDNN include directory." ) | |||||
| find_path( | |||||
| CUDNN_INCLUDE_DIR | |||||
| NAMES cudnn.h | |||||
| HINTS $ENV{PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} | |||||
| ${__found_cudnn_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to CUDNN include directory.") | |||||
| if(CUDNN_INCLUDE_DIR STREQUAL "CUDNN_INCLUDE_DIR-NOTFOUND") | if(CUDNN_INCLUDE_DIR STREQUAL "CUDNN_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find CuDNN INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env") | |||||
| message( | |||||
| FATAL_ERROR | |||||
| "Can not find CuDNN INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env" | |||||
| ) | |||||
| endif() | endif() | ||||
| if(EXISTS ${CUDNN_INCLUDE_DIR}/cudnn_version.h) | if(EXISTS ${CUDNN_INCLUDE_DIR}/cudnn_version.h) | ||||
| file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS) | |||||
| file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS) | |||||
| else() | else() | ||||
| file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) | |||||
| file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) | |||||
| endif() | endif() | ||||
| string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" | |||||
| CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
| string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" | |||||
| CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") | |||||
| string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" | |||||
| CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
| string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" | |||||
| CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") | |||||
| string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" | |||||
| CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
| string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" | |||||
| CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") | |||||
| string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" CUDNN_MAJOR_VERSION | |||||
| "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
| string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" CUDNN_MAJOR_VERSION | |||||
| "${CUDNN_MAJOR_VERSION}") | |||||
| string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" CUDNN_MINOR_VERSION | |||||
| "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
| string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" CUDNN_MINOR_VERSION | |||||
| "${CUDNN_MINOR_VERSION}") | |||||
| string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" CUDNN_PATCH_VERSION | |||||
| "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
| string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" CUDNN_PATCH_VERSION | |||||
| "${CUDNN_PATCH_VERSION}") | |||||
| set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCH_VERSION}) | set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCH_VERSION}) | ||||
| if(MGE_CUDA_USE_STATIC) | if(MGE_CUDA_USE_STATIC) | ||||
| add_library(libcudnn STATIC IMPORTED) | |||||
| add_library(libcudnn STATIC IMPORTED) | |||||
| else() | else() | ||||
| add_library(libcudnn SHARED IMPORTED) | |||||
| add_library(libcudnn SHARED IMPORTED) | |||||
| endif() | endif() | ||||
| set_target_properties(libcudnn PROPERTIES | |||||
| IMPORTED_LOCATION ${CUDNN_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${CUDNN_INCLUDE_DIR}) | |||||
| set_target_properties( | |||||
| libcudnn PROPERTIES IMPORTED_LOCATION ${CUDNN_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
| ${CUDNN_INCLUDE_DIR}) | |||||
| message(STATUS "Found CuDNN: ${__found_cudnn_root} (found version: ${CUDNN_VERSION})") | message(STATUS "Found CuDNN: ${__found_cudnn_root} (found version: ${CUDNN_VERSION})") | ||||
| @@ -1,27 +1,47 @@ | |||||
| if (MGE_USE_SYSTEM_LIB) | |||||
| find_package(Flatbuffers REQUIRED) | |||||
| message(STATUS "Using system provided Flatbuffers ${Flatbuffers_VERSION}") | |||||
| include(cmake/BuildFlatBuffers.cmake) | |||||
| return() | |||||
| if(MGE_USE_SYSTEM_LIB) | |||||
| find_package(Flatbuffers REQUIRED) | |||||
| message(STATUS "Using system provided Flatbuffers ${Flatbuffers_VERSION}") | |||||
| include(cmake/BuildFlatBuffers.cmake) | |||||
| return() | |||||
| endif() | endif() | ||||
| if(MSVC OR WIN32) | if(MSVC OR WIN32) | ||||
| message(DEBUG "add flags flatc for clang-cl build") | |||||
| set(FLATC_FLAGS "") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=unknown-argument -Wno-error=c++98-compat -Wno-error=reserved-id-macro") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=sign-conversion -Wno-error=exceptions -Wno-error=argument-outside-range") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=delete-non-virtual-dtor -Wno-error=ignored-attributes -Wno-error=format") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=sign-compare -Wno-error=unused-private-field -Wno-error=braced-scalar-init") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=return-type-c-linkage -Wno-error=invalid-noreturn -Wno-error=c++98-compat-pedantic") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=extra-semi-stmt -Wno-error=missing-prototypes -Wno-error=documentation-unknown-command") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=missing-variable-declarations -Wno-error=nonportable-system-include-path") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=exit-time-destructors -Wno-error=unused-macros -Wno-error=global-constructors") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=switch-enum -Wno-error=missing-noreturn -Wno-error=float-equal") | |||||
| if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0") | |||||
| set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=suggest-override -Wno-error=suggest-destructor-override") | |||||
| endif() | |||||
| message(DEBUG "add flags flatc for clang-cl build") | |||||
| set(FLATC_FLAGS "") | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=unknown-argument -Wno-error=c++98-compat -Wno-error=reserved-id-macro" | |||||
| ) | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=sign-conversion -Wno-error=exceptions -Wno-error=argument-outside-range" | |||||
| ) | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=delete-non-virtual-dtor -Wno-error=ignored-attributes -Wno-error=format" | |||||
| ) | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=sign-compare -Wno-error=unused-private-field -Wno-error=braced-scalar-init" | |||||
| ) | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=return-type-c-linkage -Wno-error=invalid-noreturn -Wno-error=c++98-compat-pedantic" | |||||
| ) | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=extra-semi-stmt -Wno-error=missing-prototypes -Wno-error=documentation-unknown-command" | |||||
| ) | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=missing-variable-declarations -Wno-error=nonportable-system-include-path" | |||||
| ) | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=exit-time-destructors -Wno-error=unused-macros -Wno-error=global-constructors" | |||||
| ) | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=switch-enum -Wno-error=missing-noreturn -Wno-error=float-equal" | |||||
| ) | |||||
| if(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0") | |||||
| set(FLATC_FLAGS | |||||
| "${FLATC_FLAGS} -Wno-error=suggest-override -Wno-error=suggest-destructor-override" | |||||
| ) | |||||
| endif() | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLATC_FLAGS}") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATC_FLAGS}") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLATC_FLAGS}") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATC_FLAGS}") | |||||
| endif() | endif() | ||||
| option(FLATBUFFERS_BUILD_TESTS "" OFF) | option(FLATBUFFERS_BUILD_TESTS "" OFF) | ||||
| @@ -1 +1,2 @@ | |||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags ${CMAKE_CURRENT_BINARY_DIR}/gflags) | |||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/gflags) | |||||
| @@ -1,2 +1,2 @@ | |||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest ${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL) | |||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL) | |||||
| @@ -1,88 +1,136 @@ | |||||
| # - Find the llvm/mlir libraries | |||||
| # This module finds if llvm/mlir is installed, or build llvm/mlir from source. | |||||
| # This module sets the following variables. | |||||
| # * Find the llvm/mlir libraries This module finds if llvm/mlir is installed, or build | |||||
| # llvm/mlir from source. This module sets the following variables. | |||||
| # | # | ||||
| # MLIR_LLVM_INCLUDE_DIR - path to the LLVM/MLIR include files | |||||
| # MLIR_LLVM_LIBS - path to the LLVM/MLIR libraries | |||||
| # MLIR_LLVM_INCLUDE_DIR - path to the LLVM/MLIR include files MLIR_LLVM_LIBS - path | |||||
| # to the LLVM/MLIR libraries | |||||
| # | # | ||||
| # This module define the following functions. | # This module define the following functions. | ||||
| # | # | ||||
| # external_tablegen_library - created interface library which depends on tablegen outputs | |||||
| # external_tablegen_library - created interface library which depends on tablegen | |||||
| # outputs | |||||
| include(CMakeParseArguments) | include(CMakeParseArguments) | ||||
| function(external_tablegen_library) | function(external_tablegen_library) | ||||
| cmake_parse_arguments( | |||||
| _RULE | |||||
| "TESTONLY" | |||||
| "NAME;TBLGEN" | |||||
| "SRCS;INCLUDES;OUTS" | |||||
| ${ARGN} | |||||
| ) | |||||
| cmake_parse_arguments(_RULE "TESTONLY" "NAME;TBLGEN" "SRCS;INCLUDES;OUTS" ${ARGN}) | |||||
| if(_RULE_TESTONLY AND NOT MGE_WITH_TEST) | |||||
| return() | |||||
| endif() | |||||
| if(_RULE_TESTONLY AND NOT MGE_WITH_TEST) | |||||
| return() | |||||
| endif() | |||||
| set(_NAME ${_RULE_NAME}) | |||||
| set(_NAME ${_RULE_NAME}) | |||||
| set(LLVM_TARGET_DEFINITIONS ${_RULE_SRCS}) | |||||
| set(_INCLUDE_DIRS ${_RULE_INCLUDES}) | |||||
| list(TRANSFORM _INCLUDE_DIRS PREPEND "-I") | |||||
| set(_OUTPUTS) | |||||
| while(_RULE_OUTS) | |||||
| list(GET _RULE_OUTS 0 _COMMAND) | |||||
| list(REMOVE_AT _RULE_OUTS 0) | |||||
| list(GET _RULE_OUTS 0 _FILE) | |||||
| list(REMOVE_AT _RULE_OUTS 0) | |||||
| tablegen(${_RULE_TBLGEN} ${_FILE} ${_COMMAND} ${_INCLUDE_DIRS}) | |||||
| list(APPEND _OUTPUTS ${CMAKE_CURRENT_BINARY_DIR}/${_FILE}) | |||||
| endwhile() | |||||
| add_custom_target(${_NAME}_target DEPENDS ${_OUTPUTS}) | |||||
| set(LLVM_TARGET_DEFINITIONS ${_RULE_SRCS}) | |||||
| set(_INCLUDE_DIRS ${_RULE_INCLUDES}) | |||||
| list(TRANSFORM _INCLUDE_DIRS PREPEND "-I") | |||||
| set(_OUTPUTS) | |||||
| while(_RULE_OUTS) | |||||
| list(GET _RULE_OUTS 0 _COMMAND) | |||||
| list(REMOVE_AT _RULE_OUTS 0) | |||||
| list(GET _RULE_OUTS 0 _FILE) | |||||
| list(REMOVE_AT _RULE_OUTS 0) | |||||
| tablegen(${_RULE_TBLGEN} ${_FILE} ${_COMMAND} ${_INCLUDE_DIRS}) | |||||
| list(APPEND _OUTPUTS ${CMAKE_CURRENT_BINARY_DIR}/${_FILE}) | |||||
| endwhile() | |||||
| add_custom_target(${_NAME}_target DEPENDS ${_OUTPUTS}) | |||||
| add_library(${_NAME} INTERFACE) | |||||
| add_dependencies(${_NAME} ${_NAME}_target) | |||||
| add_library(${_NAME} INTERFACE) | |||||
| add_dependencies(${_NAME} ${_NAME}_target) | |||||
| target_include_directories(${_NAME} INTERFACE | |||||
| "$<BUILD_INTERFACE:${_RULE_INCLUDES}>") | |||||
| target_include_directories(${_NAME} INTERFACE "$<BUILD_INTERFACE:${_RULE_INCLUDES}>") | |||||
| install(TARGETS ${_NAME} EXPORT ${MGE_EXPORT_TARGETS}) | |||||
| install(TARGETS ${_NAME} EXPORT ${MGE_EXPORT_TARGETS}) | |||||
| endfunction() | endfunction() | ||||
| set(LLVM_LIBS LLVMCore LLVMSupport LLVMX86CodeGen LLVMOrcJIT LLVMNVPTXCodeGen LLVMNVPTXDesc LLVMNVPTXInfo) | |||||
| set(MLIR_CORE_LIBS MLIRAnalysis MLIRExecutionEngine MLIRIR MLIRParser MLIRPass MLIRSideEffectInterfaces MLIRTransforms) | |||||
| set(MLIR_DIALECT_LIBS MLIRAsync MLIRAVX512 MLIRGPU MLIRLLVMAVX512 MLIRNVVMIR MLIROpenACC MLIRPDL MLIRPDLInterp MLIRQuant MLIRROCDLIR MLIRSDBM MLIRShape MLIRSPIRV MLIRStandardOpsTransforms MLIRTosa) | |||||
| set(MLIR_CONVERSION_LIBS MLIRAffineToStandard MLIRAVX512ToLLVM MLIRGPUToGPURuntimeTransforms MLIRGPUToNVVMTransforms MLIRSCFToStandard) | |||||
| set(LLVM_LIBS | |||||
| LLVMCore | |||||
| LLVMSupport | |||||
| LLVMX86CodeGen | |||||
| LLVMOrcJIT | |||||
| LLVMNVPTXCodeGen | |||||
| LLVMNVPTXDesc | |||||
| LLVMNVPTXInfo) | |||||
| set(MLIR_CORE_LIBS | |||||
| MLIRAnalysis | |||||
| MLIRExecutionEngine | |||||
| MLIRIR | |||||
| MLIRParser | |||||
| MLIRPass | |||||
| MLIRSideEffectInterfaces | |||||
| MLIRTransforms) | |||||
| set(MLIR_DIALECT_LIBS | |||||
| MLIRAsync | |||||
| MLIRAVX512 | |||||
| MLIRGPU | |||||
| MLIRLLVMAVX512 | |||||
| MLIRNVVMIR | |||||
| MLIROpenACC | |||||
| MLIRPDL | |||||
| MLIRPDLInterp | |||||
| MLIRQuant | |||||
| MLIRROCDLIR | |||||
| MLIRSDBM | |||||
| MLIRShape | |||||
| MLIRSPIRV | |||||
| MLIRStandardOpsTransforms | |||||
| MLIRTosa) | |||||
| set(MLIR_CONVERSION_LIBS | |||||
| MLIRAffineToStandard MLIRAVX512ToLLVM MLIRGPUToGPURuntimeTransforms | |||||
| MLIRGPUToNVVMTransforms MLIRSCFToStandard) | |||||
| set(MLIR_TRANSLATION_LIBS MLIRTargetLLVMIR MLIRTargetNVVMIR) | set(MLIR_TRANSLATION_LIBS MLIRTargetLLVMIR MLIRTargetNVVMIR) | ||||
| set(MLIR_LIBS ${MLIR_CORE_LIBS} ${MLIR_DIALECT_LIBS} ${MLIR_CONVERSION_LIBS} ${MLIR_TRANSLATION_LIBS}) | |||||
| set(MLIR_LIBS ${MLIR_CORE_LIBS} ${MLIR_DIALECT_LIBS} ${MLIR_CONVERSION_LIBS} | |||||
| ${MLIR_TRANSLATION_LIBS}) | |||||
| set(MLIR_LLVM_LIBS ${LLVM_LIBS} ${MLIR_LIBS}) | set(MLIR_LLVM_LIBS ${LLVM_LIBS} ${MLIR_LIBS}) | ||||
| function(add_mge_mlir_src_dep llvm_monorepo_path) | function(add_mge_mlir_src_dep llvm_monorepo_path) | ||||
| set(_CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}") | |||||
| string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) | |||||
| if(NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") | |||||
| set(CMAKE_BUILD_TYPE "Debug") | |||||
| endif() | |||||
| set(_CMAKE_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) | |||||
| set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) | |||||
| set(_CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}") | |||||
| string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) | |||||
| if(NOT uppercase_CMAKE_BUILD_TYPE MATCHES | |||||
| "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") | |||||
| set(CMAKE_BUILD_TYPE "Debug") | |||||
| endif() | |||||
| set(_CMAKE_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) | |||||
| set(BUILD_SHARED_LIBS | |||||
| OFF | |||||
| CACHE BOOL "" FORCE) | |||||
| add_subdirectory("${llvm_monorepo_path}/llvm" ${LLVM_BUILD_DIR} EXCLUDE_FROM_ALL) | |||||
| add_subdirectory("${llvm_monorepo_path}/llvm" ${LLVM_BUILD_DIR} EXCLUDE_FROM_ALL) | |||||
| # Reset CMAKE_BUILD_TYPE to its previous setting | |||||
| set(CMAKE_BUILD_TYPE "${_CMAKE_BUILD_TYPE}" CACHE STRING "Build type" FORCE) | |||||
| # Reset BUILD_SHARED_LIBS to its previous setting | |||||
| set(BUILD_SHARED_LIBS ${_CMAKE_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libraries" FORCE) | |||||
| # Reset CMAKE_BUILD_TYPE to its previous setting | |||||
| set(CMAKE_BUILD_TYPE | |||||
| "${_CMAKE_BUILD_TYPE}" | |||||
| CACHE STRING "Build type" FORCE) | |||||
| # Reset BUILD_SHARED_LIBS to its previous setting | |||||
| set(BUILD_SHARED_LIBS | |||||
| ${_CMAKE_BUILD_SHARED_LIBS} | |||||
| CACHE BOOL "Build shared libraries" FORCE) | |||||
| endfunction() | endfunction() | ||||
| # llvm build options | # llvm build options | ||||
| set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "" FORCE) | |||||
| set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "" FORCE) | |||||
| set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "" FORCE) | |||||
| set(LLVM_ENABLE_BINDINGS OFF CACHE BOOL "" FORCE) | |||||
| set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "" FORCE) | |||||
| set(LLVM_ENABLE_RTTI ${MGE_ENABLE_RTTI} CACHE BOOL "" FORCE) | |||||
| set(LLVM_TARGETS_TO_BUILD "X86;NVPTX;AArch64;ARM" CACHE STRING "" FORCE) | |||||
| set(LLVM_ENABLE_PROJECTS "mlir" CACHE STRING "" FORCE) | |||||
| set(LLVM_INCLUDE_EXAMPLES | |||||
| OFF | |||||
| CACHE BOOL "" FORCE) | |||||
| set(LLVM_INCLUDE_TESTS | |||||
| OFF | |||||
| CACHE BOOL "" FORCE) | |||||
| set(LLVM_INCLUDE_DOCS | |||||
| OFF | |||||
| CACHE BOOL "" FORCE) | |||||
| set(LLVM_ENABLE_BINDINGS | |||||
| OFF | |||||
| CACHE BOOL "" FORCE) | |||||
| set(LLVM_INCLUDE_BENCHMARKS | |||||
| OFF | |||||
| CACHE BOOL "" FORCE) | |||||
| set(LLVM_ENABLE_RTTI | |||||
| ${MGE_ENABLE_RTTI} | |||||
| CACHE BOOL "" FORCE) | |||||
| set(LLVM_TARGETS_TO_BUILD | |||||
| "X86;NVPTX;AArch64;ARM" | |||||
| CACHE STRING "" FORCE) | |||||
| set(LLVM_ENABLE_PROJECTS | |||||
| "mlir" | |||||
| CACHE STRING "" FORCE) | |||||
| set(LLVM_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm) | set(LLVM_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm) | ||||
| add_mge_mlir_src_dep("third_party/llvm-project") | add_mge_mlir_src_dep("third_party/llvm-project") | ||||
| @@ -91,6 +139,5 @@ set(MLIR_LLVM_INCLUDE_DIR | |||||
| ${PROJECT_SOURCE_DIR}/third_party/llvm-project/llvm/include | ${PROJECT_SOURCE_DIR}/third_party/llvm-project/llvm/include | ||||
| ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/include | ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/include | ||||
| ${PROJECT_SOURCE_DIR}/third_party/llvm-project/mlir/include | ${PROJECT_SOURCE_DIR}/third_party/llvm-project/mlir/include | ||||
| ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/tools/mlir/include | |||||
| ) | |||||
| ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/tools/mlir/include) | |||||
| set(MLIR_TABLEGEN_EXE mlir-tblgen) | set(MLIR_TABLEGEN_EXE mlir-tblgen) | ||||
| @@ -1,54 +1,64 @@ | |||||
| find_library(MAGICMIND_LIBRARY | |||||
| NAMES libmagicmind.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "MAGICMIND library." ) | |||||
| find_library( | |||||
| MAGICMIND_LIBRARY | |||||
| NAMES libmagicmind.so | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "MAGICMIND library.") | |||||
| if(MAGICMIND_LIBRARY STREQUAL "MAGICMIND_LIBRARY-NOTFOUND") | if(MAGICMIND_LIBRARY STREQUAL "MAGICMIND_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find MAGICMIND Library") | |||||
| message(FATAL_ERROR "Can not find MAGICMIND Library") | |||||
| endif() | endif() | ||||
| get_filename_component(__found_magicmind_root "${MAGICMIND_LIBRARY}/../../" REALPATH) | get_filename_component(__found_magicmind_root "${MAGICMIND_LIBRARY}/../../" REALPATH) | ||||
| find_path(MAGICMIND_INCLUDE_DIR | |||||
| NAMES common.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to MAGICMIND include directory." ) | |||||
| find_path( | |||||
| MAGICMIND_INCLUDE_DIR | |||||
| NAMES common.h | |||||
| HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to MAGICMIND include directory.") | |||||
| if(MAGICMIND_INCLUDE_DIR STREQUAL "MAGICMIND_INCLUDE_DIR-NOTFOUND") | if(MAGICMIND_INCLUDE_DIR STREQUAL "MAGICMIND_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find MAGICMIND Library") | |||||
| message(FATAL_ERROR "Can not find MAGICMIND Library") | |||||
| endif() | endif() | ||||
| file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR REGEX "^#define MM_MINOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH REGEX "^#define MM_PATCH_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR | |||||
| REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR | |||||
| REGEX "^#define MM_MINOR_VERSION [0-9]+.*$") | |||||
| file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH | |||||
| REGEX "^#define MM_PATCH_VERSION [0-9]+.*$") | |||||
| string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}") | |||||
| string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}") | |||||
| string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}") | |||||
| set(MAGICMIND_VERSION_STRING "${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}") | |||||
| string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1" | |||||
| MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}") | |||||
| string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1" | |||||
| MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}") | |||||
| string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1" | |||||
| MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}") | |||||
| set(MAGICMIND_VERSION_STRING | |||||
| "${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}") | |||||
| add_library(libmagicmind SHARED IMPORTED) | add_library(libmagicmind SHARED IMPORTED) | ||||
| set_target_properties(libmagicmind PROPERTIES | |||||
| IMPORTED_LOCATION ${MAGICMIND_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR} | |||||
| ) | |||||
| set_target_properties( | |||||
| libmagicmind PROPERTIES IMPORTED_LOCATION ${MAGICMIND_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR}) | |||||
| message(STATUS "Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})") | |||||
| message( | |||||
| STATUS | |||||
| "Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})" | |||||
| ) | |||||
| find_library(MAGICMIND_RUNTIME_LIBRARY | |||||
| NAMES libmagicmind_runtime.so | |||||
| PATHS "${__found_magicmind_root}/lib64" | |||||
| ) | |||||
| find_library( | |||||
| MAGICMIND_RUNTIME_LIBRARY | |||||
| NAMES libmagicmind_runtime.so | |||||
| PATHS "${__found_magicmind_root}/lib64") | |||||
| if(MAGICMIND_RUNTIME_LIBRARY STREQUAL "MAGICMIND_RUNTIME_LIBRARY-NOTFOUND") | if(MAGICMIND_RUNTIME_LIBRARY STREQUAL "MAGICMIND_RUNTIME_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library") | |||||
| message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library") | |||||
| else() | else() | ||||
| message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}") | |||||
| message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}") | |||||
| endif() | endif() | ||||
| add_library(libmagicmind_runtime SHARED IMPORTED) | add_library(libmagicmind_runtime SHARED IMPORTED) | ||||
| set_target_properties(libmagicmind_runtime PROPERTIES | |||||
| IMPORTED_LOCATION ${MAGICMIND_RUNTIME_LIBRARY} | |||||
| ) | |||||
| set_target_properties(libmagicmind_runtime PROPERTIES IMPORTED_LOCATION | |||||
| ${MAGICMIND_RUNTIME_LIBRARY}) | |||||
| @@ -1,77 +1,83 @@ | |||||
| find_path(MKL_ROOT_DIR | |||||
| include/mkl_cblas.h | |||||
| PATHS | |||||
| ${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH} | |||||
| ${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library | |||||
| ${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library | |||||
| ${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32 | |||||
| $ENV{MKLDIR} | |||||
| /opt/intel/mkl/*/ | |||||
| /opt/intel/cmkl/*/ | |||||
| /Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal | |||||
| ) | |||||
| find_path( | |||||
| MKL_ROOT_DIR include/mkl_cblas.h | |||||
| PATHS ${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH} | |||||
| ${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library | |||||
| ${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library | |||||
| ${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32 | |||||
| $ENV{MKLDIR} | |||||
| /opt/intel/mkl/*/ | |||||
| /opt/intel/cmkl/*/ | |||||
| /Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal) | |||||
| if(${MKL_ROOT_DIR} STREQUAL "MKL_ROOT_DIR-NOTFOUND") | if(${MKL_ROOT_DIR} STREQUAL "MKL_ROOT_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find MKL") | |||||
| message(FATAL_ERROR "Can not find MKL") | |||||
| endif() | endif() | ||||
| message(STATUS "Build with MKL in ${MKL_ROOT_DIR}") | message(STATUS "Build with MKL in ${MKL_ROOT_DIR}") | ||||
| find_path(MKL_INCLUDE_DIR | |||||
| mkl_cblas.h | |||||
| PATHS | |||||
| ${MKL_ROOT_DIR}/include | |||||
| ${INCLUDE_INSTALL_DIR} | |||||
| ) | |||||
| find_path(MKL_INCLUDE_DIR mkl_cblas.h PATHS ${MKL_ROOT_DIR}/include | |||||
| ${INCLUDE_INSTALL_DIR}) | |||||
| option(MGE_MKL_USE_STATIC "Build MegEngine with static MKL" ON) | option(MGE_MKL_USE_STATIC "Build MegEngine with static MKL" ON) | ||||
| if(MGE_MKL_USE_STATIC) | if(MGE_MKL_USE_STATIC) | ||||
| find_library(MKL_CORE_LIBRARY | |||||
| NAMES libmkl_core.a mkl_core.lib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| find_library( | |||||
| MKL_CORE_LIBRARY | |||||
| NAMES libmkl_core.a mkl_core.lib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| find_library(MKL_SEQUENTIAL_LIBRARY | |||||
| NAMES libmkl_sequential.a mkl_sequential.lib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| find_library( | |||||
| MKL_SEQUENTIAL_LIBRARY | |||||
| NAMES libmkl_sequential.a mkl_sequential.lib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| if(${MGE_ARCH} STREQUAL "x86_64") | |||||
| find_library(MKL_IPL_LIBRARY | |||||
| NAMES libmkl_intel_ilp64.a mkl_intel_ilp64.lib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| elseif(${MGE_ARCH} STREQUAL "i386") | |||||
| find_library(MKL_IPL_LIBRARY | |||||
| NAMES libmkl_intel_32.a mkl_intel_32.lib mkl_intel_c.lib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| endif() | |||||
| if(${MGE_ARCH} STREQUAL "x86_64") | |||||
| find_library( | |||||
| MKL_IPL_LIBRARY | |||||
| NAMES libmkl_intel_ilp64.a mkl_intel_ilp64.lib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| elseif(${MGE_ARCH} STREQUAL "i386") | |||||
| find_library( | |||||
| MKL_IPL_LIBRARY | |||||
| NAMES libmkl_intel_32.a mkl_intel_32.lib mkl_intel_c.lib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| endif() | |||||
| add_library(libmkl INTERFACE IMPORTED) | |||||
| if(UNIX AND NOT APPLE) | |||||
| target_link_libraries(libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY} -Wl,--end-group) | |||||
| else() | |||||
| target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||||
| endif() | |||||
| target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||||
| add_library(libmkl INTERFACE IMPORTED) | |||||
| if(UNIX AND NOT APPLE) | |||||
| target_link_libraries( | |||||
| libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} | |||||
| ${MKL_IPL_LIBRARY} -Wl,--end-group) | |||||
| else() | |||||
| target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} | |||||
| ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||||
| endif() | |||||
| target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||||
| else() | else() | ||||
| find_library(MKL_CORE_LIBRARY | |||||
| NAMES libmkl_core.so libmkl_core.dylib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| find_library( | |||||
| MKL_CORE_LIBRARY | |||||
| NAMES libmkl_core.so libmkl_core.dylib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| find_library(MKL_SEQUENTIAL_LIBRARY | |||||
| NAMES libmkl_sequential.so libmkl_sequential.dylib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| find_library( | |||||
| MKL_SEQUENTIAL_LIBRARY | |||||
| NAMES libmkl_sequential.so libmkl_sequential.dylib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| if(${MGE_ARCH} STREQUAL "x86_64") | |||||
| find_library(MKL_IPL_LIBRARY | |||||
| NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| elseif(${MGE_ARCH} STREQUAL "x86_32") | |||||
| find_library(MKL_IPL_LIBRARY | |||||
| NAMES libmkl_intel_32.so libmkl_intel_32.dylib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| endif() | |||||
| target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||||
| target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||||
| if(${MGE_ARCH} STREQUAL "x86_64") | |||||
| find_library( | |||||
| MKL_IPL_LIBRARY | |||||
| NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| elseif(${MGE_ARCH} STREQUAL "x86_32") | |||||
| find_library( | |||||
| MKL_IPL_LIBRARY | |||||
| NAMES libmkl_intel_32.so libmkl_intel_32.dylib | |||||
| PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
| endif() | |||||
| target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} | |||||
| ${MKL_IPL_LIBRARY}) | |||||
| target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||||
| endif() | endif() | ||||
| if(${MGE_ARCH} STREQUAL "x86_64") | if(${MGE_ARCH} STREQUAL "x86_64") | ||||
| target_compile_definitions(libmkl INTERFACE -DMKL_ILP64) | |||||
| target_compile_definitions(libmkl INTERFACE -DMKL_ILP64) | |||||
| endif() | endif() | ||||
| @@ -1,70 +1,83 @@ | |||||
| function(PROTOBUF_GENERATE_CPP_WITH_ROOT SRCS HDRS ROOT_DIR) | function(PROTOBUF_GENERATE_CPP_WITH_ROOT SRCS HDRS ROOT_DIR) | ||||
| if(NOT ARGN) | |||||
| message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files") | |||||
| return() | |||||
| endif() | |||||
| if(NOT ARGN) | |||||
| message( | |||||
| SEND_ERROR | |||||
| "Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files") | |||||
| return() | |||||
| endif() | |||||
| set(${SRCS}) | |||||
| set(${HDRS}) | |||||
| foreach(FIL ${ARGN}) | |||||
| set(ABS_FIL ${ROOT_DIR}/${FIL}) | |||||
| get_filename_component(FIL_WE ${FIL} NAME_WE) | |||||
| get_filename_component(FIL_DIR ${ABS_FIL} PATH) | |||||
| file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR}) | |||||
| set(${SRCS}) | |||||
| set(${HDRS}) | |||||
| foreach(FIL ${ARGN}) | |||||
| set(ABS_FIL ${ROOT_DIR}/${FIL}) | |||||
| get_filename_component(FIL_WE ${FIL} NAME_WE) | |||||
| get_filename_component(FIL_DIR ${ABS_FIL} PATH) | |||||
| file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR}) | |||||
| list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") | |||||
| list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") | |||||
| list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") | |||||
| list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") | |||||
| add_custom_command( | |||||
| OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" | |||||
| "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" | |||||
| COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} | |||||
| ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS} | |||||
| DEPENDS ${ABS_FIL} libprotobuf | |||||
| COMMENT "Running C++ protocol buffer compiler on ${FIL}" | |||||
| VERBATIM) | |||||
| endforeach() | |||||
| add_custom_command( | |||||
| OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" | |||||
| "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" | |||||
| COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} | |||||
| -I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS} | |||||
| DEPENDS ${ABS_FIL} libprotobuf | |||||
| COMMENT "Running C++ protocol buffer compiler on ${FIL}" | |||||
| VERBATIM) | |||||
| endforeach() | |||||
| set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) | |||||
| set(${SRCS} ${${SRCS}} PARENT_SCOPE) | |||||
| set(${HDRS} ${${HDRS}} PARENT_SCOPE) | |||||
| set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) | |||||
| set(${SRCS} | |||||
| ${${SRCS}} | |||||
| PARENT_SCOPE) | |||||
| set(${HDRS} | |||||
| ${${HDRS}} | |||||
| PARENT_SCOPE) | |||||
| endfunction() | endfunction() | ||||
| if(MGE_USE_SYSTEM_LIB) | if(MGE_USE_SYSTEM_LIB) | ||||
| find_package(Protobuf) | |||||
| if(Protobuf_FOUND) | |||||
| add_library(libprotobuf INTERFACE) | |||||
| target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES}) | |||||
| target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS}) | |||||
| get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY) | |||||
| set(PROTOBUF_ROOT ${Protobuf_ROOT}) | |||||
| set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) | |||||
| set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS}) | |||||
| return() | |||||
| endif() | |||||
| find_package(Protobuf) | |||||
| if(Protobuf_FOUND) | |||||
| add_library(libprotobuf INTERFACE) | |||||
| target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES}) | |||||
| target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS}) | |||||
| get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY) | |||||
| set(PROTOBUF_ROOT ${Protobuf_ROOT}) | |||||
| set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) | |||||
| set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS}) | |||||
| return() | |||||
| endif() | |||||
| endif() | endif() | ||||
| include(ExternalProject) | include(ExternalProject) | ||||
| include(GNUInstallDirs) | include(GNUInstallDirs) | ||||
| set(PROTOBUF_DIR "${PROJECT_SOURCE_DIR}/third_party/protobuf" CACHE STRING "protobuf directory") | |||||
| set(PROTOBUF_DIR | |||||
| "${PROJECT_SOURCE_DIR}/third_party/protobuf" | |||||
| CACHE STRING "protobuf directory") | |||||
| set(PROTOBUF_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/protobuf) | set(PROTOBUF_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/protobuf) | ||||
| if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") | if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") | ||||
| set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a) | |||||
| set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a) | |||||
| else() | else() | ||||
| set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a) | |||||
| set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a) | |||||
| endif() | endif() | ||||
| set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_BUILD_DIR}/bin/protoc) | set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_BUILD_DIR}/bin/protoc) | ||||
| ExternalProject_add( | |||||
| protobuf | |||||
| SOURCE_DIR ${PROTOBUF_DIR}/cmake | |||||
| PREFIX ${PROTOBUF_BUILD_DIR} | |||||
| CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR} -Dprotobuf_BUILD_EXAMPLES=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
| BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE} | |||||
| ) | |||||
| ExternalProject_Add( | |||||
| protobuf | |||||
| SOURCE_DIR ${PROTOBUF_DIR}/cmake | |||||
| PREFIX ${PROTOBUF_BUILD_DIR} | |||||
| CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||||
| -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||||
| -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||||
| -DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR} | |||||
| -Dprotobuf_BUILD_EXAMPLES=OFF | |||||
| -Dprotobuf_BUILD_TESTS=OFF | |||||
| -DBUILD_SHARED_LIBS=OFF | |||||
| -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
| BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE}) | |||||
| set(PROTOBUF_INC ${PROTOBUF_BUILD_DIR}/include) | set(PROTOBUF_INC ${PROTOBUF_BUILD_DIR}/include) | ||||
| file(MAKE_DIRECTORY ${PROTOBUF_INC}) | file(MAKE_DIRECTORY ${PROTOBUF_INC}) | ||||
| @@ -72,19 +85,14 @@ file(MAKE_DIRECTORY ${PROTOBUF_INC}) | |||||
| add_library(libprotobuf STATIC IMPORTED GLOBAL) | add_library(libprotobuf STATIC IMPORTED GLOBAL) | ||||
| add_dependencies(libprotobuf protobuf) | add_dependencies(libprotobuf protobuf) | ||||
| set_target_properties( | set_target_properties( | ||||
| libprotobuf PROPERTIES | |||||
| IMPORTED_LOCATION ${PROTOBUF_LIB} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include | |||||
| ) | |||||
| libprotobuf PROPERTIES IMPORTED_LOCATION ${PROTOBUF_LIB} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include) | |||||
| add_executable(protoc IMPORTED GLOBAL) | add_executable(protoc IMPORTED GLOBAL) | ||||
| add_dependencies(protoc protobuf) | add_dependencies(protoc protobuf) | ||||
| set_target_properties( | |||||
| protoc PROPERTIES | |||||
| IMPORTED_LOCATION ${PROTOBUF_BUILD_DIR}/bin/protoc | |||||
| ) | |||||
| set_target_properties(protoc PROPERTIES IMPORTED_LOCATION | |||||
| ${PROTOBUF_BUILD_DIR}/bin/protoc) | |||||
| set(PROTOBUF_ROOT ${PROTOBUF_BUILD_DIR}) | set(PROTOBUF_ROOT ${PROTOBUF_BUILD_DIR}) | ||||
| set(PROTOBUF_PROTOC_EXECUTABLE protoc) | set(PROTOBUF_PROTOC_EXECUTABLE protoc) | ||||
| set(PROTOBUF_INCLUDE_DIRS ${PROTOBUF_BUILD_DIR}/include) | set(PROTOBUF_INCLUDE_DIRS ${PROTOBUF_BUILD_DIR}/include) | ||||
| @@ -1,28 +1,34 @@ | |||||
| if(NOT DEFINED HIP_PATH) | if(NOT DEFINED HIP_PATH) | ||||
| if(NOT DEFINED ENV{HIP_PATH}) | |||||
| set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") | |||||
| else() | |||||
| set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") | |||||
| endif() | |||||
| if(NOT DEFINED ENV{HIP_PATH}) | |||||
| set(HIP_PATH | |||||
| "/opt/rocm/hip" | |||||
| CACHE PATH "Path to which HIP has been installed") | |||||
| else() | |||||
| set(HIP_PATH | |||||
| $ENV{HIP_PATH} | |||||
| CACHE PATH "Path to which HIP has been installed") | |||||
| endif() | |||||
| endif() | endif() | ||||
| set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) | set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) | ||||
| find_package(HIP QUIET) | find_package(HIP QUIET) | ||||
| if (HIP_FOUND) | |||||
| message(STATUS "Found HIP: " ${HIP_VERSION}) | |||||
| if(HIP_FOUND) | |||||
| message(STATUS "Found HIP: " ${HIP_VERSION}) | |||||
| else() | else() | ||||
| message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.") | |||||
| message( | |||||
| FATAL_ERROR | |||||
| "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location." | |||||
| ) | |||||
| endif() | endif() | ||||
| if (${HIP_VERSION} VERSION_LESS 3.0) | |||||
| message(FATAL_ERROR "ROCM version needed 3. Please update ROCM.") | |||||
| if(${HIP_VERSION} VERSION_LESS 3.0) | |||||
| message(FATAL_ERROR "ROCM version needed 3. Please update ROCM.") | |||||
| endif() | endif() | ||||
| macro(hipconfig_get_option variable option) | macro(hipconfig_get_option variable option) | ||||
| if(NOT DEFINED ${variable}) | |||||
| execute_process( | |||||
| COMMAND ${HIP_HIPCONFIG_EXECUTABLE} ${option} | |||||
| OUTPUT_VARIABLE ${variable}) | |||||
| endif() | |||||
| if(NOT DEFINED ${variable}) | |||||
| execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} ${option} | |||||
| OUTPUT_VARIABLE ${variable}) | |||||
| endif() | |||||
| endmacro() | endmacro() | ||||
| hipconfig_get_option(HIP_COMPILER "--compiler") | hipconfig_get_option(HIP_COMPILER "--compiler") | ||||
| @@ -31,30 +37,33 @@ hipconfig_get_option(HIP_CPP_CONFIG "--cpp_config") | |||||
| separate_arguments(HIP_CPP_CONFIG) | separate_arguments(HIP_CPP_CONFIG) | ||||
| foreach(hip_config_item ${HIP_CPP_CONFIG}) | foreach(hip_config_item ${HIP_CPP_CONFIG}) | ||||
| foreach(macro_name "__HIP_PLATFORM_HCC__" "__HIP_ROCclr__") | |||||
| if(${hip_config_item} STREQUAL "-D${macro_name}=") | |||||
| set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name}\n") | |||||
| set(HIP_CPP_UNDEFINE "${HIP_CPP_UNDEFINE}\ | |||||
| foreach(macro_name "__HIP_PLATFORM_HCC__" "__HIP_ROCclr__") | |||||
| if(${hip_config_item} STREQUAL "-D${macro_name}=") | |||||
| set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name}\n") | |||||
| set(HIP_CPP_UNDEFINE | |||||
| "${HIP_CPP_UNDEFINE}\ | |||||
| #ifdef ${macro_name}\n#undef ${macro_name}\n\ | #ifdef ${macro_name}\n#undef ${macro_name}\n\ | ||||
| #else\n#error\n\ | #else\n#error\n\ | ||||
| #endif\n") | #endif\n") | ||||
| elseif(${hip_config_item} STREQUAL "-D${macro_name}") | |||||
| set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name} 1\n") | |||||
| set(HIP_CPP_UNDEFINE "${HIP_CPP_UNDEFINE}\ | |||||
| elseif(${hip_config_item} STREQUAL "-D${macro_name}") | |||||
| set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name} 1\n") | |||||
| set(HIP_CPP_UNDEFINE | |||||
| "${HIP_CPP_UNDEFINE}\ | |||||
| #ifdef ${macro_name}\n#undef ${macro_name}\n\ | #ifdef ${macro_name}\n#undef ${macro_name}\n\ | ||||
| #else\n#error\n\ | #else\n#error\n\ | ||||
| #endif\n") | #endif\n") | ||||
| endif() | |||||
| endforeach() | |||||
| endif() | |||||
| endforeach() | |||||
| endforeach() | endforeach() | ||||
| message(STATUS "Using HIP compiler ${HIP_COMPILER}") | message(STATUS "Using HIP compiler ${HIP_COMPILER}") | ||||
| if(${HIP_COMPILER} STREQUAL "hcc") | if(${HIP_COMPILER} STREQUAL "hcc") | ||||
| set(MGE_ROCM_LIBS hip_hcc) | |||||
| message(WARNING "hcc is not well supported, please modify link.txt to link with hipcc") | |||||
| elseif (${HIP_COMPILER} STREQUAL "clang") | |||||
| set(MGE_ROCM_LIBS amdhip64) | |||||
| set(MGE_ROCM_LIBS hip_hcc) | |||||
| message( | |||||
| WARNING "hcc is not well supported, please modify link.txt to link with hipcc") | |||||
| elseif(${HIP_COMPILER} STREQUAL "clang") | |||||
| set(MGE_ROCM_LIBS amdhip64) | |||||
| endif() | endif() | ||||
| list(APPEND MGE_ROCM_LIBS amdocl64 MIOpen rocblas rocrand) | list(APPEND MGE_ROCM_LIBS amdocl64 MIOpen rocblas rocrand) | ||||
| @@ -63,26 +72,28 @@ set(HIP_INCLUDE_DIR ${HIP_ROOT_DIR}/../include) | |||||
| set(HIP_LIBRARY_DIR ${HIP_ROOT_DIR}/../lib) | set(HIP_LIBRARY_DIR ${HIP_ROOT_DIR}/../lib) | ||||
| function(find_rocm_library name dirname include library) | function(find_rocm_library name dirname include library) | ||||
| find_path(${name}_LIBRARY_DIR | |||||
| NAMES ${library} | |||||
| HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||||
| PATH_SUFFIXES lib lib/x86_64 | |||||
| DOC "Path to ${name} library directory") | |||||
| find_path( | |||||
| ${name}_LIBRARY_DIR | |||||
| NAMES ${library} | |||||
| HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||||
| PATH_SUFFIXES lib lib/x86_64 | |||||
| DOC "Path to ${name} library directory") | |||||
| if(${${name}_LIBRARY_DIR} MATCHES "NOTFOUND$") | |||||
| message(FATAL_ERROR "Can not find ${name} library") | |||||
| endif() | |||||
| if(${${name}_LIBRARY_DIR} MATCHES "NOTFOUND$") | |||||
| message(FATAL_ERROR "Can not find ${name} library") | |||||
| endif() | |||||
| find_path(${name}_INCLUDE_DIR | |||||
| NAMES ${include} | |||||
| HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to ${name} include directory") | |||||
| find_path( | |||||
| ${name}_INCLUDE_DIR | |||||
| NAMES ${include} | |||||
| HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to ${name} include directory") | |||||
| if(${name}_INCLUDE_DIR MATCHES "NOTFOUND$") | |||||
| message(FATAL_ERROR "Can not find ${name} include") | |||||
| endif() | |||||
| message(DEBUG "Found lib ${${name}_LIBRARY_DIR}, include ${${name}_INCLUDE_DIR}") | |||||
| if(${name}_INCLUDE_DIR MATCHES "NOTFOUND$") | |||||
| message(FATAL_ERROR "Can not find ${name} include") | |||||
| endif() | |||||
| message(DEBUG "Found lib ${${name}_LIBRARY_DIR}, include ${${name}_INCLUDE_DIR}") | |||||
| endfunction() | endfunction() | ||||
| find_rocm_library(MIOPEN miopen miopen libMIOpen.so) | find_rocm_library(MIOPEN miopen miopen libMIOpen.so) | ||||
| @@ -1,166 +1,189 @@ | |||||
| if("${TRT_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{TRT_ROOT_DIR}" STREQUAL "") | |||||
| set(TRT_ROOT_DIR $ENV{TRT_ROOT_DIR}) | |||||
| if("${TRT_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{TRT_ROOT_DIR}" STREQUAL "") | |||||
| set(TRT_ROOT_DIR $ENV{TRT_ROOT_DIR}) | |||||
| endif() | endif() | ||||
| if(MGE_CUDA_USE_STATIC) | if(MGE_CUDA_USE_STATIC) | ||||
| find_library(TRT_LIBRARY | |||||
| NAMES libnvinfer_static.a nvinfer.lib | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "TRT library." ) | |||||
| find_library(TRT_PLUGIN_LIBRARY | |||||
| NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "TRT plugin library." ) | |||||
| find_library( | |||||
| TRT_LIBRARY | |||||
| NAMES libnvinfer_static.a nvinfer.lib | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "TRT library.") | |||||
| find_library( | |||||
| TRT_PLUGIN_LIBRARY | |||||
| NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "TRT plugin library.") | |||||
| else() | else() | ||||
| find_library(TRT_LIBRARY | |||||
| NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "TRT library." ) | |||||
| find_library(TRT_PLUGIN_LIBRARY | |||||
| NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "TRT plugin library." ) | |||||
| find_library( | |||||
| TRT_LIBRARY | |||||
| NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "TRT library.") | |||||
| find_library( | |||||
| TRT_PLUGIN_LIBRARY | |||||
| NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll | |||||
| PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
| HINTS ${ALTER_LIBRARY_PATHS} | |||||
| PATH_SUFFIXES lib lib64 | |||||
| DOC "TRT plugin library.") | |||||
| endif() | endif() | ||||
| if(TRT_LIBRARY STREQUAL "TRT_LIBRARY-NOTFOUND") | if(TRT_LIBRARY STREQUAL "TRT_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||||
| message( | |||||
| FATAL_ERROR | |||||
| "Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||||
| ) | |||||
| endif() | endif() | ||||
| if(TRT_PLUGIN_LIBRARY STREQUAL "TRT_PLUGIN_LIBRARY-NOTFOUND") | if(TRT_PLUGIN_LIBRARY STREQUAL "TRT_PLUGIN_LIBRARY-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||||
| message( | |||||
| FATAL_ERROR | |||||
| "Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||||
| ) | |||||
| endif() | endif() | ||||
| get_filename_component(__found_trt_root ${TRT_LIBRARY}/../.. REALPATH) | get_filename_component(__found_trt_root ${TRT_LIBRARY}/../.. REALPATH) | ||||
| find_path(TRT_INCLUDE_DIR | |||||
| NAMES NvInfer.h | |||||
| HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to TRT include directory." ) | |||||
| find_path(TRT_PLUGIN_INCLUDE_DIR | |||||
| NAMES NvInferPlugin.h | |||||
| HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to TRT plugin include directory." ) | |||||
| find_path( | |||||
| TRT_INCLUDE_DIR | |||||
| NAMES NvInfer.h | |||||
| HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to TRT include directory.") | |||||
| find_path( | |||||
| TRT_PLUGIN_INCLUDE_DIR | |||||
| NAMES NvInferPlugin.h | |||||
| HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||||
| PATH_SUFFIXES include | |||||
| DOC "Path to TRT plugin include directory.") | |||||
| if(TRT_INCLUDE_DIR STREQUAL "TRT_INCLUDE_DIR-NOTFOUND") | if(TRT_INCLUDE_DIR STREQUAL "TRT_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||||
| message( | |||||
| FATAL_ERROR | |||||
| "Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||||
| ) | |||||
| endif() | endif() | ||||
| if(TRT_PLUGIN_INCLUDE_DIR STREQUAL "TRT_PLUGIN_INCLUDE_DIR-NOTFOUND") | if(TRT_PLUGIN_INCLUDE_DIR STREQUAL "TRT_PLUGIN_INCLUDE_DIR-NOTFOUND") | ||||
| message(FATAL_ERROR "Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||||
| message( | |||||
| FATAL_ERROR | |||||
| "Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||||
| ) | |||||
| endif() | endif() | ||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR | |||||
| REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR | |||||
| REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH | |||||
| REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||||
| if (TensorRT_MAJOR STREQUAL "") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||||
| if(TensorRT_MAJOR STREQUAL "") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR | |||||
| REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR | |||||
| REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||||
| file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH | |||||
| REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||||
| endif() | endif() | ||||
| string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") | |||||
| string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") | |||||
| string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") | |||||
| set(TRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") | |||||
| string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" | |||||
| TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") | |||||
| string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" | |||||
| TensorRT_VERSION_MINOR "${TensorRT_MINOR}") | |||||
| string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" | |||||
| TensorRT_VERSION_PATCH "${TensorRT_PATCH}") | |||||
| set(TRT_VERSION_STRING | |||||
| "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") | |||||
| if(MGE_CUDA_USE_STATIC) | if(MGE_CUDA_USE_STATIC) | ||||
| add_library(libnvinfer STATIC IMPORTED) | |||||
| add_library(libnvinfer_plugin STATIC IMPORTED) | |||||
| add_library(libnvinfer STATIC IMPORTED) | |||||
| add_library(libnvinfer_plugin STATIC IMPORTED) | |||||
| else() | else() | ||||
| add_library(libnvinfer SHARED IMPORTED) | |||||
| add_library(libnvinfer_plugin SHARED IMPORTED) | |||||
| add_library(libnvinfer SHARED IMPORTED) | |||||
| add_library(libnvinfer_plugin SHARED IMPORTED) | |||||
| endif() | endif() | ||||
| set_target_properties(libnvinfer PROPERTIES | |||||
| IMPORTED_LOCATION ${TRT_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${TRT_INCLUDE_DIR} | |||||
| ) | |||||
| set_target_properties(libnvinfer_plugin PROPERTIES | |||||
| IMPORTED_LOCATION ${TRT_PLUGIN_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${TRT_PLUGIN_INCLUDE_DIR} | |||||
| ) | |||||
| set_target_properties( | |||||
| libnvinfer PROPERTIES IMPORTED_LOCATION ${TRT_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
| ${TRT_INCLUDE_DIR}) | |||||
| set_target_properties( | |||||
| libnvinfer_plugin PROPERTIES IMPORTED_LOCATION ${TRT_PLUGIN_LIBRARY} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${TRT_PLUGIN_INCLUDE_DIR}) | |||||
| message(STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") | |||||
| message( | |||||
| STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") | |||||
| if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | ||||
| if(MGE_CUDA_USE_STATIC) | |||||
| find_library(LIBMYELIN_COMPILER | |||||
| NAMES libmyelin_compiler_static.a myelin_compiler_static.lib | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}") | |||||
| endif() | |||||
| add_library(libmyelin_compiler STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_compiler PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_COMPILER} | |||||
| ) | |||||
| if(MGE_CUDA_USE_STATIC) | |||||
| find_library( | |||||
| LIBMYELIN_COMPILER | |||||
| NAMES libmyelin_compiler_static.a myelin_compiler_static.lib | |||||
| PATHS ${__found_trt_root}/lib) | |||||
| if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}") | |||||
| endif() | |||||
| add_library(libmyelin_compiler STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_compiler PROPERTIES IMPORTED_LOCATION | |||||
| ${LIBMYELIN_COMPILER}) | |||||
| find_library(LIBMYELIN_EXECUTOR | |||||
| NAMES libmyelin_executor_static.a myelin_executor_static.lib | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}") | |||||
| endif() | |||||
| add_library(libmyelin_executor STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_executor PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_EXECUTOR} | |||||
| ) | |||||
| find_library( | |||||
| LIBMYELIN_EXECUTOR | |||||
| NAMES libmyelin_executor_static.a myelin_executor_static.lib | |||||
| PATHS ${__found_trt_root}/lib) | |||||
| if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}") | |||||
| endif() | |||||
| add_library(libmyelin_executor STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_executor PROPERTIES IMPORTED_LOCATION | |||||
| ${LIBMYELIN_EXECUTOR}) | |||||
| find_library(LIBMYELIN_PATTERN_RUNTIME | |||||
| NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}") | |||||
| endif() | |||||
| add_library(libmyelin_pattern_runtime STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_pattern_runtime PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME} | |||||
| ) | |||||
| find_library( | |||||
| LIBMYELIN_PATTERN_RUNTIME | |||||
| NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib | |||||
| PATHS ${__found_trt_root}/lib) | |||||
| if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library") | |||||
| else() | |||||
| message( | |||||
| STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}") | |||||
| endif() | |||||
| add_library(libmyelin_pattern_runtime STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_pattern_runtime | |||||
| PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME}) | |||||
| find_library(LIBMYELIN_PATTERN_LIBRARY | |||||
| NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}") | |||||
| endif() | |||||
| add_library(libmyelin_pattern_library STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_pattern_library PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY} | |||||
| ) | |||||
| find_library( | |||||
| LIBMYELIN_PATTERN_LIBRARY | |||||
| NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib | |||||
| PATHS ${__found_trt_root}/lib) | |||||
| if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library") | |||||
| else() | else() | ||||
| find_library(LIBMYELIN_SHARED | |||||
| NAMES libmyelin.so myelin.dll | |||||
| PATHS ${__found_trt_root}/lib | |||||
| ) | |||||
| message( | |||||
| STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}") | |||||
| endif() | |||||
| add_library(libmyelin_pattern_library STATIC IMPORTED) | |||||
| set_target_properties(libmyelin_pattern_library | |||||
| PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY}) | |||||
| else() | |||||
| find_library( | |||||
| LIBMYELIN_SHARED | |||||
| NAMES libmyelin.so myelin.dll | |||||
| PATHS ${__found_trt_root}/lib) | |||||
| if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}") | |||||
| endif() | |||||
| add_library(libmyelin SHARED IMPORTED) | |||||
| set_target_properties(libmyelin PROPERTIES | |||||
| IMPORTED_LOCATION ${LIBMYELIN_SHARED} | |||||
| ) | |||||
| if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND") | |||||
| message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library") | |||||
| else() | |||||
| message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}") | |||||
| endif() | endif() | ||||
| add_library(libmyelin SHARED IMPORTED) | |||||
| set_target_properties(libmyelin PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_SHARED}) | |||||
| endif() | |||||
| endif() | endif() | ||||
| @@ -1,17 +1,26 @@ | |||||
| include(ExternalProject) | include(ExternalProject) | ||||
| include(GNUInstallDirs) | include(GNUInstallDirs) | ||||
| set(ZMQ_DIR ${PROJECT_SOURCE_DIR}/third_party/libzmq CACHE STRING "ZMQ directory") | |||||
| set(ZMQ_DIR | |||||
| ${PROJECT_SOURCE_DIR}/third_party/libzmq | |||||
| CACHE STRING "ZMQ directory") | |||||
| set(ZMQ_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/libzmq) | set(ZMQ_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/libzmq) | ||||
| set(ZMQ_LIB ${ZMQ_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libzmq.a) | set(ZMQ_LIB ${ZMQ_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libzmq.a) | ||||
| ExternalProject_add( | |||||
| zmq | |||||
| SOURCE_DIR ${ZMQ_DIR} | |||||
| PREFIX ${ZMQ_BUILD_DIR} | |||||
| CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR} -DWITH_PERF_TOOL=OFF -DZMQ_BUILD_TESTS=OFF -DENABLE_CPACK=OFF -DENABLE_CURVE=OFF | |||||
| BUILD_BYPRODUCTS ${ZMQ_LIB} | |||||
| ) | |||||
| ExternalProject_Add( | |||||
| zmq | |||||
| SOURCE_DIR ${ZMQ_DIR} | |||||
| PREFIX ${ZMQ_BUILD_DIR} | |||||
| CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||||
| -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||||
| -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||||
| -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||||
| -DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR} | |||||
| -DWITH_PERF_TOOL=OFF | |||||
| -DZMQ_BUILD_TESTS=OFF | |||||
| -DENABLE_CPACK=OFF | |||||
| -DENABLE_CURVE=OFF | |||||
| BUILD_BYPRODUCTS ${ZMQ_LIB}) | |||||
| set(ZMQ_INC ${ZMQ_BUILD_DIR}/include) | set(ZMQ_INC ${ZMQ_BUILD_DIR}/include) | ||||
| include_directories(${ZMQ_INC}) | include_directories(${ZMQ_INC}) | ||||
| @@ -19,8 +28,5 @@ file(MAKE_DIRECTORY ${ZMQ_INC}) | |||||
| add_library(libzmq STATIC IMPORTED GLOBAL) | add_library(libzmq STATIC IMPORTED GLOBAL) | ||||
| add_dependencies(libzmq zmq) | add_dependencies(libzmq zmq) | ||||
| set_target_properties( | |||||
| libzmq PROPERTIES | |||||
| IMPORTED_LOCATION ${ZMQ_LIB} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC} | |||||
| ) | |||||
| set_target_properties(libzmq PROPERTIES IMPORTED_LOCATION ${ZMQ_LIB} | |||||
| INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC}) | |||||
| @@ -4,66 +4,61 @@ set(OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_param_defs.py) | |||||
| set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/include/) | set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/include/) | ||||
| file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/megdnn) | file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/megdnn) | ||||
| add_custom_command( | add_custom_command( | ||||
| OUTPUT | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||||
| COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
| COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} | |||||
| tmp_unuse.log --write-cppjson ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||||
| DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||||
| VERBATIM | |||||
| ) | |||||
| list(APPEND OPR_PARAM_DEFS_OUTS | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
| OUTPUT ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||||
| COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||||
| ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
| COMMAND | |||||
| ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||||
| ${OPR_PARAM_DEFS_SRCS} tmp_unuse.log --write-cppjson | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | ||||
| ) | |||||
| DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||||
| VERBATIM) | |||||
| list(APPEND OPR_PARAM_DEFS_OUTS ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h) | |||||
| list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | ||||
| set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) | set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) | ||||
| file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/src/common) | file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/src/common) | ||||
| add_custom_command( | add_custom_command( | ||||
| OUTPUT | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
| COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||||
| --enumv ${OPR_PARAM_DEFS_SRCS} | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
| DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||||
| VERBATIM | |||||
| ) | |||||
| OUTPUT ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
| COMMAND | |||||
| ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} --enumv | |||||
| ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
| DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||||
| VERBATIM) | |||||
| list(APPEND OPR_PARAM_DEFS_OUTS | list(APPEND OPR_PARAM_DEFS_OUTS | ||||
| ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
| ) | |||||
| ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh) | |||||
| list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | ||||
| install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") | |||||
| install( | |||||
| DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn | |||||
| DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||||
| FILES_MATCHING | |||||
| PATTERN "*.h") | |||||
| add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | ||||
| add_library(opr_param_defs INTERFACE) | add_library(opr_param_defs INTERFACE) | ||||
| target_include_directories(opr_param_defs | target_include_directories(opr_param_defs | ||||
| INTERFACE | |||||
| $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||||
| ) | |||||
| foreach (INCPATH IN LISTS OPR_PARAM_DEFS_INC) | |||||
| target_include_directories(opr_param_defs | |||||
| INTERFACE $<BUILD_INTERFACE:${INCPATH}> | |||||
| ) | |||||
| INTERFACE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) | |||||
| foreach(INCPATH IN LISTS OPR_PARAM_DEFS_INC) | |||||
| target_include_directories(opr_param_defs INTERFACE $<BUILD_INTERFACE:${INCPATH}>) | |||||
| endforeach() | endforeach() | ||||
| add_dependencies(opr_param_defs _opr_param_defs) | add_dependencies(opr_param_defs _opr_param_defs) | ||||
| install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | ||||
| if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
| add_library(cutlass INTERFACE) | |||||
| target_include_directories(cutlass | |||||
| INTERFACE | |||||
| $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||||
| add_library(cutlass INTERFACE) | |||||
| target_include_directories( | |||||
| cutlass | |||||
| INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||||
| endif() | endif() | ||||
| if(MGE_WITH_TEST) | if(MGE_WITH_TEST) | ||||
| add_subdirectory(test) | |||||
| add_subdirectory(test) | |||||
| endif() | endif() | ||||
| add_subdirectory(src) | add_subdirectory(src) | ||||
| @@ -1,6 +1,8 @@ | |||||
| add_library(atlas-stub STATIC src/libatlas-wrap.cpp) | add_library(atlas-stub STATIC src/libatlas-wrap.cpp) | ||||
| target_include_directories(atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||||
| target_include_directories( | |||||
| atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||||
| install(TARGETS atlas-stub EXPORT ${MGE_EXPORT_TARGETS}) | install(TARGETS atlas-stub EXPORT ${MGE_EXPORT_TARGETS}) | ||||
| add_library(acl-cblas STATIC src/libacl_cblas-wrap.cpp) | add_library(acl-cblas STATIC src/libacl_cblas-wrap.cpp) | ||||
| target_include_directories(acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||||
| target_include_directories( | |||||
| acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||||
| @@ -1,26 +1,27 @@ | |||||
| file (GLOB_RECURSE CUDA_STUB src/libcuda.cpp) | |||||
| file (GLOB_RECURSE NVRTC_STUB src/libnvrtc.cpp) | |||||
| file(GLOB_RECURSE CUDA_STUB src/libcuda.cpp) | |||||
| file(GLOB_RECURSE NVRTC_STUB src/libnvrtc.cpp) | |||||
| if(MGE_WITH_CUDA_STUB) | if(MGE_WITH_CUDA_STUB) | ||||
| list(APPEND STUB_SRC ${CUDA_STUB}) | |||||
| list(APPEND STUB_SRC ${CUDA_STUB}) | |||||
| endif() | endif() | ||||
| if(MGE_WITH_NVRTC_STUB) | if(MGE_WITH_NVRTC_STUB) | ||||
| list(APPEND STUB_SRC ${NVRTC_STUB}) | |||||
| list(APPEND STUB_SRC ${NVRTC_STUB}) | |||||
| endif() | endif() | ||||
| if(MSVC OR WIN32) | if(MSVC OR WIN32) | ||||
| add_library (cuda-stub STATIC ${STUB_SRC}) | |||||
| add_library(cuda-stub STATIC ${STUB_SRC}) | |||||
| else() | else() | ||||
| add_library (cuda-stub SHARED ${STUB_SRC}) | |||||
| add_library(cuda-stub SHARED ${STUB_SRC}) | |||||
| endif() | endif() | ||||
| set_target_properties(cuda-stub PROPERTIES OUTPUT_NAME cuda_stub) | set_target_properties(cuda-stub PROPERTIES OUTPUT_NAME cuda_stub) | ||||
| target_compile_definitions(cuda-stub PRIVATE __CUDA_API_VERSION_INTERNAL) | target_compile_definitions(cuda-stub PRIVATE __CUDA_API_VERSION_INTERNAL) | ||||
| if (MSVC OR WIN32) | |||||
| target_link_libraries(cuda-stub PRIVATE -Wl,--no-undefined) | |||||
| if(MSVC OR WIN32) | |||||
| target_link_libraries(cuda-stub PRIVATE -Wl,--no-undefined) | |||||
| else() | else() | ||||
| target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined) | |||||
| target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined) | |||||
| endif() | endif() | ||||
| target_include_directories(cuda-stub PRIVATE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>) | |||||
| install (TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS}) | |||||
| target_include_directories(cuda-stub | |||||
| PRIVATE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>) | |||||
| install(TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS}) | |||||
| @@ -12,6 +12,7 @@ | |||||
| #pragma once | #pragma once | ||||
| #include "megbrain_build_config.h" | #include "megbrain_build_config.h" | ||||
| #include "megdnn/oprs/base.h" | |||||
| #if MGB_ENABLE_GETENV | #if MGB_ENABLE_GETENV | ||||
| #define MGB_GETENV ::std::getenv | #define MGB_GETENV ::std::getenv | ||||
| @@ -36,6 +37,11 @@ bool has_available_algo(Opr* opr, Args&&... args) { | |||||
| return !all_algos.empty(); | return !all_algos.empty(); | ||||
| } | } | ||||
| template <class Opr, typename... Args> | |||||
| bool has_no_naive_heuristic_algo(Opr* opr, Args&&... args) { | |||||
| auto&& algo = opr->get_algorithm_info_heuristic(std::forward<Args>(args)...); | |||||
| return !static_cast<bool>(algo.attribute & detail::Algorithm::Attribute::NAIVE); | |||||
| } | |||||
| } // namespace megdnn | } // namespace megdnn | ||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -1936,6 +1936,119 @@ protected: | |||||
| const TensorLayout& grad_s, size_t workspace_in_bytes); | const TensorLayout& grad_s, size_t workspace_in_bytes); | ||||
| }; | }; | ||||
| class LayerNormBase : public OperatorBase { | |||||
| DEF_OPR_IMPL_CTOR(LayerNormBase, OperatorBase); | |||||
| DEF_OPR_PARAM(LayerNorm); | |||||
| protected: | |||||
| void deduce_layout_fwd( | |||||
| const TensorLayout& data, const TensorLayout& weight, | |||||
| const TensorLayout& bias, TensorLayout& dst, TensorLayout& mean, | |||||
| TensorLayout& rstd); | |||||
| void check_layout_fwd( | |||||
| const TensorLayout& data, const TensorLayout& weight, | |||||
| const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||||
| const TensorLayout& rstd); | |||||
| }; | |||||
| class LayerNormForward : public LayerNormBase { | |||||
| DEF_OPR_IMPL(LayerNormForward, LayerNormBase, 3, 3); | |||||
| public: | |||||
| virtual void exec( | |||||
| _megdnn_tensor_in data, _megdnn_tensor_in weight, _megdnn_tensor_in bias, | |||||
| _megdnn_tensor_out dst, _megdnn_tensor_out mean, _megdnn_tensor_out rstd, | |||||
| _megdnn_workspace workspace) = 0; | |||||
| void deduce_layout( | |||||
| const TensorLayout& data, const TensorLayout& weight, | |||||
| const TensorLayout& bias, TensorLayout& dst, TensorLayout& mean, | |||||
| TensorLayout& rstd); | |||||
| virtual size_t get_workspace_in_bytes( | |||||
| const TensorLayout& data, const TensorLayout& weight, | |||||
| const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||||
| const TensorLayout& rstd) = 0; | |||||
| protected: | |||||
| void check_exec( | |||||
| const TensorLayout& data, const TensorLayout& weight, | |||||
| const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||||
| const TensorLayout& rstd, size_t workspace_in_bytes); | |||||
| }; | |||||
| using LayerNorm = LayerNormForward; | |||||
| class LayerNormBackward : public LayerNormBase { | |||||
| DEF_OPR_IMPL(LayerNormBackward, LayerNormBase, 5, 3); | |||||
| public: | |||||
| virtual void exec( | |||||
| _megdnn_tensor_in diff, _megdnn_tensor_in data, _megdnn_tensor_in weight, | |||||
| _megdnn_tensor_in mean, _megdnn_tensor_in rstd, _megdnn_tensor_out ddata, | |||||
| _megdnn_tensor_out dweight, _megdnn_tensor_out dbias, | |||||
| _megdnn_workspace workspace) = 0; | |||||
| void deduce_layout( | |||||
| const TensorLayout& diff, const TensorLayout& data, | |||||
| const TensorLayout& weight, const TensorLayout& mean, | |||||
| const TensorLayout& rstd, TensorLayout& ddata, TensorLayout& dweight, | |||||
| TensorLayout& dbias); | |||||
| virtual size_t get_workspace_in_bytes( | |||||
| const TensorLayout& diff, const TensorLayout& data, | |||||
| const TensorLayout& weight, const TensorLayout& mean, | |||||
| const TensorLayout& rstd, const TensorLayout& ddata, | |||||
| const TensorLayout& dweight, const TensorLayout& dbias) = 0; | |||||
| protected: | |||||
| void check_exec( | |||||
| const TensorLayout& diff, const TensorLayout& data, | |||||
| const TensorLayout& weight, const TensorLayout& mean, | |||||
| const TensorLayout& rstd, const TensorLayout& ddata, | |||||
| const TensorLayout& dweight, const TensorLayout& dbias, | |||||
| size_t workspace_in_bytes); | |||||
| }; | |||||
| class DropoutBase : public OperatorBase { | |||||
| DEF_OPR_IMPL_CTOR(DropoutBase, OperatorBase); | |||||
| DEF_OPR_PARAM(Dropout); | |||||
| }; | |||||
| class DropoutForward : public DropoutBase { | |||||
| DEF_OPR_IMPL(DropoutForward, DropoutBase, 1, 2); | |||||
| public: | |||||
| void deduce_layout(const TensorLayout& inp, TensorLayout& oup, TensorLayout& mask); | |||||
| virtual void exec( | |||||
| _megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask, | |||||
| _megdnn_workspace workspace) = 0; | |||||
| virtual size_t get_workspace_in_bytes( | |||||
| const TensorLayout& inp, const TensorLayout& oup, | |||||
| const TensorLayout& mask) = 0; | |||||
| virtual size_t get_mask_size_in_bytes(const TensorLayout& inp) = 0; | |||||
| protected: | |||||
| void check_exec( | |||||
| const TensorLayout& inp, const TensorLayout& oup, const TensorLayout& mask, | |||||
| size_t workspace_in_bytes); | |||||
| }; | |||||
| using Dropout = DropoutForward; | |||||
| class DropoutBackward : public DropoutBase { | |||||
| DEF_OPR_IMPL(DropoutBackward, DropoutBase, 2, 1); | |||||
| public: | |||||
| void deduce_layout( | |||||
| const TensorLayout& doup, const TensorLayout& mask, TensorLayout& dinp); | |||||
| virtual void exec( | |||||
| _megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp, | |||||
| _megdnn_workspace workspace) = 0; | |||||
| virtual size_t get_workspace_in_bytes( | |||||
| const TensorLayout& doup, const TensorLayout& mask, | |||||
| const TensorLayout& dinp) = 0; | |||||
| protected: | |||||
| void check_exec( | |||||
| const TensorLayout& doup, const TensorLayout& mask, | |||||
| const TensorLayout& dinp, size_t workspace_in_bytes); | |||||
| }; | |||||
| } // namespace megdnn | } // namespace megdnn | ||||
| #include "megdnn/internal/opr_header_epilogue.h" | #include "megdnn/internal/opr_header_epilogue.h" | ||||
| @@ -1212,3 +1212,15 @@ PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'), | |||||
| member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES] | member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES] | ||||
| ) | ) | ||||
| ) | ) | ||||
| (pdef('LayerNorm') | |||||
| .add_fields('bool', 'affine', 'true') | |||||
| .add_fields('float32', 'eps', '1e-5f') | |||||
| .add_fields('uint64', 'normalized_dim', '1') | |||||
| .add_fields('uint64', 'normalized_size', '1') | |||||
| ) | |||||
| (pdef('Dropout') | |||||
| .add_fields('float32', 'drop_prob', '0') | |||||
| .add_fields('uint64', 'seed', '0') | |||||
| ) | |||||
| @@ -5,168 +5,190 @@ file(GLOB_RECURSE SOURCES common/*.cpp naive/*.cpp) | |||||
| list(APPEND SOURCES ${PROJECT_BINARY_DIR}/genfiles/megbrain_build_config.h) | list(APPEND SOURCES ${PROJECT_BINARY_DIR}/genfiles/megbrain_build_config.h) | ||||
| if(NOT ${MGE_ARCH} STREQUAL "naive") | if(NOT ${MGE_ARCH} STREQUAL "naive") | ||||
| file(GLOB_RECURSE SOURCES_ fallback/*.cpp) | |||||
| file(GLOB_RECURSE SOURCES_ fallback/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| if(${MGE_ARCH} STREQUAL "fallback") | |||||
| message(WARNING "build only with fallback") | |||||
| elseif(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||||
| file(GLOB_RECURSE SOURCES_ x86/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
| if(${MGE_ARCH} STREQUAL "fallback") | |||||
| message(WARNING "build only with fallback") | |||||
| elseif(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||||
| file(GLOB_RECURSE SOURCES_ x86/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| if(NOT MSVC) | |||||
| file(GLOB_RECURSE SOURCES_ x86/*.S) | |||||
| set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| endif() | |||||
| elseif(${MGE_ARCH} STREQUAL "armv7") | |||||
| file(GLOB_RECURSE SOURCES_ armv7/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ armv7/*.S) | |||||
| set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| elseif(${MGE_ARCH} STREQUAL "aarch64") | |||||
| file(GLOB_RECURSE SOURCES_ aarch64/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ aarch64/*.S) | |||||
| set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| if(NOT MSVC) | |||||
| file(GLOB_RECURSE SOURCES_ x86/*.S) | |||||
| set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| endif() | endif() | ||||
| elseif(${MGE_ARCH} STREQUAL "armv7") | |||||
| file(GLOB_RECURSE SOURCES_ armv7/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ armv7/*.S) | |||||
| set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| elseif(${MGE_ARCH} STREQUAL "aarch64") | |||||
| file(GLOB_RECURSE SOURCES_ aarch64/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ aarch64/*.S) | |||||
| set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| endif() | |||||
| endif() | endif() | ||||
| if(MGE_WITH_MIDOUT_PROFILE) | if(MGE_WITH_MIDOUT_PROFILE) | ||||
| list(APPEND SOURCES ${PROJECT_SOURCE_DIR}/third_party/midout/src/midout.cpp) | |||||
| list(APPEND SOURCES ${PROJECT_SOURCE_DIR}/third_party/midout/src/midout.cpp) | |||||
| endif() | endif() | ||||
| ############################################################################### | |||||
| # ###################################################################################### | |||||
| # HIP_COMPILE | # HIP_COMPILE | ||||
| ############################################################################### | |||||
| macro (HIP_COMPILE _hip_target _hip_objs) | |||||
| # Separate the sources from the options | |||||
| HIP_GET_SOURCES_AND_OPTIONS(_sources | |||||
| _cmake_options | |||||
| _hipcc_options | |||||
| _hcc_options | |||||
| _nvcc_options | |||||
| ${ARGN}) | |||||
| HIP_PREPARE_TARGET_COMMANDS(${_hip_target} | |||||
| OBJ _generated_files _source_files ${_sources} ${_cmake_options} | |||||
| HIPCC_OPTIONS ${_hipcc_options} | |||||
| HCC_OPTIONS ${_hcc_options} | |||||
| NVCC_OPTIONS ${_nvcc_options}) | |||||
| if(_source_files) | |||||
| list(REMOVE_ITEM _sources ${_source_files}) | |||||
| endif() | |||||
| # ###################################################################################### | |||||
| macro(HIP_COMPILE _hip_target _hip_objs) | |||||
| # Separate the sources from the options | |||||
| hip_get_sources_and_options(_sources _cmake_options _hipcc_options _hcc_options | |||||
| _nvcc_options ${ARGN}) | |||||
| hip_prepare_target_commands( | |||||
| ${_hip_target} | |||||
| OBJ | |||||
| _generated_files | |||||
| _source_files | |||||
| ${_sources} | |||||
| ${_cmake_options} | |||||
| HIPCC_OPTIONS | |||||
| ${_hipcc_options} | |||||
| HCC_OPTIONS | |||||
| ${_hcc_options} | |||||
| NVCC_OPTIONS | |||||
| ${_nvcc_options}) | |||||
| if(_source_files) | |||||
| list(REMOVE_ITEM _sources ${_source_files}) | |||||
| endif() | |||||
| add_custom_target(${_hip_target}) | |||||
| add_custom_target(${_hip_target}) | |||||
| # set return value | |||||
| set(${_hip_objs} ${_generated_files}) | |||||
| # set return value | |||||
| set(${_hip_objs} ${_generated_files}) | |||||
| endmacro() | endmacro() | ||||
| if (MGE_WITH_ROCM) | |||||
| file (GLOB_RECURSE SOURCES_ rocm/*.cpp) | |||||
| list (APPEND SOURCES ${SOURCES_}) | |||||
| # FIXME rocm may lost the first hip file, so currently we just create an | |||||
| # empty file to bypass this error. | |||||
| file(GLOB start.cpp.hip "" ) | |||||
| list(APPEND HIP_SOURCES start.cpp.hip) | |||||
| configure_file( | |||||
| ${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h.in | |||||
| ${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h) | |||||
| configure_file( | |||||
| ${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h.in | |||||
| ${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h) | |||||
| file(GLOB_RECURSE HIP_SOURCES_ rocm/*.cpp.hip) | |||||
| set(HIP_TARGET_NAME megdnn_hip_kernel) | |||||
| set(_HIPCC_OPTIONS "-fPIC") | |||||
| set(_HCC_OPTIONS "-fPIC") | |||||
| set(_NVCC_OPTIONS "-fPIC") | |||||
| list(APPEND HIP_SOURCES ${HIP_SOURCES_}) | |||||
| set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | |||||
| HIP_INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/dnn | |||||
| ${PROJECT_SOURCE_DIR}/dnn/include | |||||
| ${PROJECT_BINARY_DIR}/dnn | |||||
| ${PROJECT_BINARY_DIR}/genfiles | |||||
| ${PROJECT_BINARY_DIR}/dnn/include | |||||
| ${HIP_INCLUDE_DIR} | |||||
| ${MIOPEN_INCLUDE_DIR} | |||||
| ${ROCBLAS_INCLUDE_DIR} | |||||
| ${ROCRAND_INCLUDE_DIR} | |||||
| ${AMDOCL_INCLUDE_DIR}) | |||||
| hip_compile( | |||||
| ${HIP_TARGET_NAME} HIPOBJS ${HIP_SOURCES} | |||||
| HIPCC_OPTIONS ${_HIPCC_OPTIONS} | |||||
| HCC_OPTIONS ${_HCC_OPTIONS} | |||||
| NVCC_OPTIONS ${_NVCC_OPTIONS}) | |||||
| list(APPEND SOURCES ${HIPOBJS}) | |||||
| endif () | |||||
| if(MGE_WITH_ROCM) | |||||
| file(GLOB_RECURSE SOURCES_ rocm/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| # FIXME rocm may lost the first hip file, so currently we just create an empty file to | |||||
| # bypass this error. | |||||
| file(GLOB start.cpp.hip "") | |||||
| list(APPEND HIP_SOURCES start.cpp.hip) | |||||
| configure_file(${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h.in | |||||
| ${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h) | |||||
| configure_file(${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h.in | |||||
| ${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h) | |||||
| file(GLOB_RECURSE HIP_SOURCES_ rocm/*.cpp.hip) | |||||
| set(HIP_TARGET_NAME megdnn_hip_kernel) | |||||
| set(_HIPCC_OPTIONS "-fPIC") | |||||
| set(_HCC_OPTIONS "-fPIC") | |||||
| set(_NVCC_OPTIONS "-fPIC") | |||||
| list(APPEND HIP_SOURCES ${HIP_SOURCES_}) | |||||
| set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | |||||
| hip_include_directories( | |||||
| ${PROJECT_SOURCE_DIR}/dnn | |||||
| ${PROJECT_SOURCE_DIR}/dnn/include | |||||
| ${PROJECT_BINARY_DIR}/dnn | |||||
| ${PROJECT_BINARY_DIR}/genfiles | |||||
| ${PROJECT_BINARY_DIR}/dnn/include | |||||
| ${HIP_INCLUDE_DIR} | |||||
| ${MIOPEN_INCLUDE_DIR} | |||||
| ${ROCBLAS_INCLUDE_DIR} | |||||
| ${ROCRAND_INCLUDE_DIR} | |||||
| ${AMDOCL_INCLUDE_DIR}) | |||||
| hip_compile( | |||||
| ${HIP_TARGET_NAME} | |||||
| HIPOBJS | |||||
| ${HIP_SOURCES} | |||||
| HIPCC_OPTIONS | |||||
| ${_HIPCC_OPTIONS} | |||||
| HCC_OPTIONS | |||||
| ${_HCC_OPTIONS} | |||||
| NVCC_OPTIONS | |||||
| ${_NVCC_OPTIONS}) | |||||
| list(APPEND SOURCES ${HIPOBJS}) | |||||
| endif() | |||||
| if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
| file(GLOB_RECURSE SOURCES_ cuda/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE SOURCES_ cuda/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| file(GLOB_RECURSE CUSOURCES cuda/*.cu) | |||||
| set(CUTLASS_GEN_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/cutlass_generator/generator.py) | |||||
| set(CUTLASS_GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuda/cutlass/generated) | |||||
| set(CUTLASS_SOURCES "") | |||||
| function(gen_cutlass_kimpl op type gen_files) | |||||
| set(CURRENT_CUTLASS_STAGE_DIR ${CUTLASS_GEN_DIR}/${op}_${type}.stage) | |||||
| set(CURRENT_CUTLASS_GEN_DIR ${CUTLASS_GEN_DIR}/${op}_${type}) | |||||
| set_directory_properties(PROPERTIES CMAKE_CONFIGURE_DEPENDS ${CUTLASS_GEN_SCRIPT}) | |||||
| file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
| file(MAKE_DIRECTORY ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
| file(MAKE_DIRECTORY ${CURRENT_CUTLASS_GEN_DIR}) | |||||
| execute_process( | |||||
| COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${CUTLASS_GEN_SCRIPT} --operations ${op} --type ${type} ${CURRENT_CUTLASS_STAGE_DIR} | |||||
| RESULT_VARIABLE gen_cutlass_result | |||||
| OUTPUT_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||||
| ERROR_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||||
| ) | |||||
| if (NOT gen_cutlass_result EQUAL 0) | |||||
| message(FATAL_ERROR "Error generating library instances. See ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log") | |||||
| endif() | |||||
| file(GLOB CUTLASS_GEN_FILES RELATIVE "${CURRENT_CUTLASS_GEN_DIR}/" "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||||
| foreach(FILE ${CUTLASS_GEN_FILES}) | |||||
| if (NOT EXISTS "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}") | |||||
| file(REMOVE "${CURRENT_CUTLASS_GEN_DIR}/${FILE}") | |||||
| endif() | |||||
| endforeach() | |||||
| file(GLOB CUTLASS_GEN_FILES RELATIVE "${CURRENT_CUTLASS_STAGE_DIR}" "${CURRENT_CUTLASS_STAGE_DIR}/*.cu") | |||||
| foreach(FILE ${CUTLASS_GEN_FILES}) | |||||
| execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}" "${CURRENT_CUTLASS_GEN_DIR}") | |||||
| endforeach() | |||||
| file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
| file(GLOB_RECURSE CUTLASS_GEN_FILES "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||||
| list(APPEND ${gen_files} ${CUTLASS_GEN_FILES}) | |||||
| set(${gen_files} "${${gen_files}}" PARENT_SCOPE) | |||||
| endfunction() | |||||
| gen_cutlass_kimpl(gemm simt CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(gemm tensorop884 CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(gemm tensorop1688 CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(gemv simt CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(deconv simt CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(deconv tensorop8816 CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(conv2d simt CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(conv2d tensorop8816 CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(conv2d tensorop8832 CUTLASS_SOURCES) | |||||
| list(APPEND SOURCES ${CUTLASS_SOURCES}) | |||||
| list(APPEND SOURCES ${CUSOURCES}) | |||||
| file(GLOB_RECURSE CUSOURCES cuda/*.cu) | |||||
| set(CUTLASS_GEN_SCRIPT | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/cutlass_generator/generator.py) | |||||
| set(CUTLASS_GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuda/cutlass/generated) | |||||
| set(CUTLASS_SOURCES "") | |||||
| function(gen_cutlass_kimpl op type gen_files) | |||||
| set(CURRENT_CUTLASS_STAGE_DIR ${CUTLASS_GEN_DIR}/${op}_${type}.stage) | |||||
| set(CURRENT_CUTLASS_GEN_DIR ${CUTLASS_GEN_DIR}/${op}_${type}) | |||||
| set_directory_properties(PROPERTIES CMAKE_CONFIGURE_DEPENDS ${CUTLASS_GEN_SCRIPT}) | |||||
| file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
| file(MAKE_DIRECTORY ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
| file(MAKE_DIRECTORY ${CURRENT_CUTLASS_GEN_DIR}) | |||||
| execute_process( | |||||
| COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${CUTLASS_GEN_SCRIPT} --operations | |||||
| ${op} --type ${type} ${CURRENT_CUTLASS_STAGE_DIR} | |||||
| RESULT_VARIABLE gen_cutlass_result | |||||
| OUTPUT_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||||
| ERROR_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log) | |||||
| if(NOT gen_cutlass_result EQUAL 0) | |||||
| message( | |||||
| FATAL_ERROR | |||||
| "Error generating library instances. See ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log" | |||||
| ) | |||||
| endif() | |||||
| file( | |||||
| GLOB CUTLASS_GEN_FILES | |||||
| RELATIVE "${CURRENT_CUTLASS_GEN_DIR}/" | |||||
| "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||||
| foreach(FILE ${CUTLASS_GEN_FILES}) | |||||
| if(NOT EXISTS "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}") | |||||
| file(REMOVE "${CURRENT_CUTLASS_GEN_DIR}/${FILE}") | |||||
| endif() | |||||
| endforeach() | |||||
| file( | |||||
| GLOB CUTLASS_GEN_FILES | |||||
| RELATIVE "${CURRENT_CUTLASS_STAGE_DIR}" | |||||
| "${CURRENT_CUTLASS_STAGE_DIR}/*.cu") | |||||
| foreach(FILE ${CUTLASS_GEN_FILES}) | |||||
| execute_process( | |||||
| COMMAND ${CMAKE_COMMAND} -E copy_if_different | |||||
| "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}" "${CURRENT_CUTLASS_GEN_DIR}") | |||||
| endforeach() | |||||
| file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
| file(GLOB_RECURSE CUTLASS_GEN_FILES "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||||
| list(APPEND ${gen_files} ${CUTLASS_GEN_FILES}) | |||||
| set(${gen_files} | |||||
| "${${gen_files}}" | |||||
| PARENT_SCOPE) | |||||
| endfunction() | |||||
| gen_cutlass_kimpl(gemm simt CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(gemm tensorop884 CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(gemm tensorop1688 CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(gemv simt CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(deconv simt CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(deconv tensorop8816 CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(conv2d simt CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(conv2d tensorop8816 CUTLASS_SOURCES) | |||||
| gen_cutlass_kimpl(conv2d tensorop8832 CUTLASS_SOURCES) | |||||
| list(APPEND SOURCES ${CUTLASS_SOURCES}) | |||||
| list(APPEND SOURCES ${CUSOURCES}) | |||||
| endif() | endif() | ||||
| if(MGE_WITH_ATLAS) | if(MGE_WITH_ATLAS) | ||||
| file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | |||||
| file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | |||||
| list(APPEND SOURCES ${SOURCES_}) | |||||
| list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | |||||
| endif() | endif() | ||||
| add_definitions(${LIBMEGDNN_DEF}) | add_definitions(${LIBMEGDNN_DEF}) | ||||
| @@ -174,81 +196,85 @@ add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES}) | |||||
| target_link_libraries(megdnn PUBLIC opr_param_defs) | target_link_libraries(megdnn PUBLIC opr_param_defs) | ||||
| if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cutlass>) | |||||
| target_include_directories(megdnn PRIVATE ${CUDNN_INCLUDE_DIR}) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cutlass>) | |||||
| target_include_directories(megdnn PRIVATE ${CUDNN_INCLUDE_DIR}) | |||||
| endif() | endif() | ||||
| if(MGE_WITH_ROCM) | if(MGE_WITH_ROCM) | ||||
| target_include_directories(megdnn PUBLIC | |||||
| ${HIP_INCLUDE_DIR} | |||||
| ${MIOPEN_INCLUDE_DIR} | |||||
| ${ROCBLAS_INCLUDE_DIR} | |||||
| ${ROCRAND_INCLUDE_DIR} | |||||
| ${AMDOCL_INCLUDE_DIR}) | |||||
| target_link_directories(megdnn PUBLIC | |||||
| ${HIP_LIBRARY_DIR} | |||||
| ${MIOPEN_LIBRARY_DIR} | |||||
| ${ROCBLAS_LIBRARY_DIR} | |||||
| ${ROCRAND_LIBRARY_DIR} | |||||
| ${AMDOCL_LIBRARY_DIR}) | |||||
| target_include_directories( | |||||
| megdnn PUBLIC ${HIP_INCLUDE_DIR} ${MIOPEN_INCLUDE_DIR} ${ROCBLAS_INCLUDE_DIR} | |||||
| ${ROCRAND_INCLUDE_DIR} ${AMDOCL_INCLUDE_DIR}) | |||||
| target_link_directories( | |||||
| megdnn | |||||
| PUBLIC | |||||
| ${HIP_LIBRARY_DIR} | |||||
| ${MIOPEN_LIBRARY_DIR} | |||||
| ${ROCBLAS_LIBRARY_DIR} | |||||
| ${ROCRAND_LIBRARY_DIR} | |||||
| ${AMDOCL_LIBRARY_DIR}) | |||||
| endif() | endif() | ||||
| if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") | |||||
| if(MGE_ENABLE_CPUINFO) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | |||||
| endif() | |||||
| if(${MGE_ARCH} STREQUAL "x86_64" | |||||
| OR ${MGE_ARCH} STREQUAL "i386" | |||||
| OR ${MGE_ARCH} STREQUAL "armv7" | |||||
| OR ${MGE_ARCH} STREQUAL "aarch64") | |||||
| if(MGE_ENABLE_CPUINFO) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | |||||
| endif() | |||||
| endif() | endif() | ||||
| target_include_directories(megdnn | |||||
| PUBLIC | |||||
| $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles> | |||||
| $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/dnn/include> | |||||
| $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||||
| PRIVATE | |||||
| ${PROJECT_SOURCE_DIR}/dnn | |||||
| ${PROJECT_SOURCE_DIR}/third_party/midout/src | |||||
| ) | |||||
| target_include_directories( | |||||
| megdnn | |||||
| PUBLIC $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles> | |||||
| $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/dnn/include> | |||||
| $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||||
| PRIVATE ${PROJECT_SOURCE_DIR}/dnn ${PROJECT_SOURCE_DIR}/third_party/midout/src) | |||||
| install(DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include DESTINATION . FILES_MATCHING PATTERN "*.h*") | |||||
| install( | |||||
| DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include | |||||
| DESTINATION . | |||||
| FILES_MATCHING | |||||
| PATTERN "*.h*") | |||||
| if(CXX_SUPPORT_WCLASS_MEMACCESS) | if(CXX_SUPPORT_WCLASS_MEMACCESS) | ||||
| if(MGE_WITH_CUDA) | |||||
| target_compile_options(megdnn PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | |||||
| "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-class-memaccess>") | |||||
| else() | |||||
| target_compile_options(megdnn PRIVATE "-Wno-class-memaccess") | |||||
| endif() | |||||
| if(MGE_WITH_CUDA) | |||||
| target_compile_options( | |||||
| megdnn PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | |||||
| "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-class-memaccess>") | |||||
| else() | |||||
| target_compile_options(megdnn PRIVATE "-Wno-class-memaccess") | |||||
| endif() | |||||
| endif() | endif() | ||||
| target_compile_definitions(megdnn INTERFACE ${LIBMEGDNN_DEF}) | target_compile_definitions(megdnn INTERFACE ${LIBMEGDNN_DEF}) | ||||
| if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64") | if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64") | ||||
| if (BUILD_SHARED_LIBS) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:dnnl>) | |||||
| else() | |||||
| target_link_libraries(megdnn PRIVATE dnnl) | |||||
| endif() | |||||
| if(BUILD_SHARED_LIBS) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:dnnl>) | |||||
| else() | |||||
| target_link_libraries(megdnn PRIVATE dnnl) | |||||
| endif() | |||||
| endif() | endif() | ||||
| if (BUILD_SHARED_LIBS) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_BLAS_LIBS}>) | |||||
| if(BUILD_SHARED_LIBS) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_BLAS_LIBS}>) | |||||
| else() | else() | ||||
| target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) | |||||
| target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) | |||||
| endif() | endif() | ||||
| if (MGE_WITH_ROCM) | |||||
| target_link_libraries(megdnn PRIVATE ${HIPOBJS} ${MGE_ROCM_LIBS}) | |||||
| endif () | |||||
| if(MGE_WITH_ROCM) | |||||
| target_link_libraries(megdnn PRIVATE ${HIPOBJS} ${MGE_ROCM_LIBS}) | |||||
| endif() | |||||
| if(MGE_WITH_ATLAS) | if(MGE_WITH_ATLAS) | ||||
| if (BUILD_SHARED_LIBS) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_ATLAS_LIBS}>) | |||||
| else() | |||||
| target_link_libraries(megdnn PRIVATE ${MGE_ATLAS_LIBS}) | |||||
| endif() | |||||
| if(BUILD_SHARED_LIBS) | |||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_ATLAS_LIBS}>) | |||||
| else() | |||||
| target_link_libraries(megdnn PRIVATE ${MGE_ATLAS_LIBS}) | |||||
| endif() | |||||
| endif() | endif() | ||||
| if(CMAKE_THREAD_LIBS_INIT) | if(CMAKE_THREAD_LIBS_INIT) | ||||
| target_link_libraries(megdnn PRIVATE Threads::Threads) | |||||
| target_link_libraries(megdnn PRIVATE Threads::Threads) | |||||
| endif() | endif() | ||||
| install(TARGETS megdnn EXPORT ${MGE_EXPORT_TARGETS}) | install(TARGETS megdnn EXPORT ${MGE_EXPORT_TARGETS}) | ||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | ||||
| INSTANTIATION_CONV_S1(2); | |||||
| INSTANTIATION_CONV_S1_BIAS(2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
| INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
| INSTANTIATION_CONV_S1_NO_BIAS(2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | ||||
| INSTANTIATION_CONV_S2(5); | |||||
| INSTANTIATION_CONV_S2_BIAS(2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
| INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
| INSTANTIATION_CONV_S2_NO_BIAS(2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | ||||
| INSTANTIATION_CONV_S1(5); | |||||
| INSTANTIATION_CONV_S1_BIAS(3); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
| INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(3); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
| INSTANTIATION_CONV_S1_NO_BIAS(3); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | ||||
| INSTANTIATION_CONV_S2(2); | |||||
| INSTANTIATION_CONV_S2_BIAS(3); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
| INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(3); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
| INSTANTIATION_CONV_S2_NO_BIAS(3); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | ||||
| INSTANTIATION_CONV_S1(3); | |||||
| INSTANTIATION_CONV_S1_BIAS(5); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
| INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(5); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
| INSTANTIATION_CONV_S1_NO_BIAS(5); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | ||||
| INSTANTIATION_CONV_S2(7); | |||||
| INSTANTIATION_CONV_S2_BIAS(5); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
| INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(5); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
| INSTANTIATION_CONV_S2_NO_BIAS(5); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | ||||
| INSTANTIATION_CONV_S1(7); | |||||
| INSTANTIATION_CONV_S1_BIAS(7); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
| INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(7); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
| INSTANTIATION_CONV_S1_NO_BIAS(7); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | ||||
| INSTANTIATION_CONV_S2(3); | |||||
| INSTANTIATION_CONV_S2_BIAS(7); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
| INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(7); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
| INSTANTIATION_CONV_S2_NO_BIAS(7); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -469,9 +469,12 @@ void conv_bias::conv_direct_fp32_nchw44( | |||||
| INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | ||||
| INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | ||||
| #define INSTANTIATION_CONV_S1(filter_size) \ | |||||
| FOR_OP(filter_size, BiasMode::NO_BIAS) \ | |||||
| FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||||
| FOR_OP(filter_size, BiasMode::BIAS) | |||||
| #define INSTANTIATION_CONV_S1_NO_BIAS(filter_size) \ | |||||
| FOR_OP(filter_size, BiasMode::NO_BIAS) | |||||
| // vim: syntax=cpp.doxygen | |||||
| #define INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(filter_size) \ | |||||
| FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) | |||||
| #define INSTANTIATION_CONV_S1_BIAS(filter_size) FOR_OP(filter_size, BiasMode::BIAS) | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -550,9 +550,12 @@ void conv_bias::conv_direct_fp32_nchw44( | |||||
| INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | ||||
| INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | ||||
| #define INSTANTIATION_CONV_S2(filter_size) \ | |||||
| FOR_OP(filter_size, BiasMode::NO_BIAS) \ | |||||
| FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||||
| FOR_OP(filter_size, BiasMode::BIAS) | |||||
| #define INSTANTIATION_CONV_S2_NO_BIAS(filter_size) \ | |||||
| FOR_OP(filter_size, BiasMode::NO_BIAS) | |||||
| // vim: syntax=cpp.doxygen | |||||
| #define INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(filter_size) \ | |||||
| FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) | |||||
| #define INSTANTIATION_CONV_S2_BIAS(filter_size) FOR_OP(filter_size, BiasMode::BIAS) | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | ||||
| INSTANCE_CONV(2, 1); | |||||
| INSTANCE_CONV_BIAS(2, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(2, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_NO_BIAS(2, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | ||||
| INSTANCE_CONV(2, 2); | |||||
| INSTANCE_CONV_BIAS(2, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(2, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_NO_BIAS(2, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | ||||
| INSTANCE_CONV(3, 1); | |||||
| INSTANCE_CONV_BIAS(3, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(3, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_no_bias | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_NO_BIAS(3, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2.cpp | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -11,4 +11,5 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | ||||
| INSTANCE_CONV(3, 2); | |||||
| INSTANCE_CONV_BIAS(3, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(3, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_NO_BIAS(3, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BIAS(5, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(5, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_NO_BIAS(5, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BIAS(5, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(5, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_NO_BIAS(5, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BIAS(7, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(7, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_NO_BIAS(7, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BIAS(7, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_broadcast_channel_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(7, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,15 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_no_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
| INSTANCE_CONV_NO_BIAS(7, 2); | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -928,9 +928,11 @@ void fp32_direct_nchw_nchw44::conv_direct_fp32_nchw_nchw44( | |||||
| INSTANTIATION(stride, filter, bias, ReluOp<dt_float32>) \ | INSTANTIATION(stride, filter, bias, ReluOp<dt_float32>) \ | ||||
| INSTANTIATION(stride, filter, bias, HSwishOp<dt_float32>) | INSTANTIATION(stride, filter, bias, HSwishOp<dt_float32>) | ||||
| #define INSTANCE_CONV(filter, stride) \ | |||||
| FOR_OP(stride, filter, BiasMode::NO_BIAS) \ | |||||
| FOR_OP(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||||
| FOR_OP(stride, filter, BiasMode::BIAS) | |||||
| #define INSTANCE_CONV_NO_BIAS(filter, stride) FOR_OP(stride, filter, BiasMode::NO_BIAS) | |||||
| #define INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(filter, stride) \ | |||||
| FOR_OP(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | |||||
| #define INSTANCE_CONV_BIAS(filter, stride) FOR_OP(stride, filter, BiasMode::BIAS) | |||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.cpp | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -265,7 +265,8 @@ void conv_direct_sdot_int8_nchw44( | |||||
| #define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | #define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | ||||
| template void \ | template void \ | ||||
| conv_direct_sdot_int8_nchw44<dst_type, stride, bias_mode, Op, filter_size>( \ | |||||
| megdnn::arm_common::direct_dotprod_nchw44::conv_direct_sdot_int8_nchw44< \ | |||||
| dst_type, stride, bias_mode, Op, filter_size>( \ | |||||
| dst_type * dst, const int oh, const int ow, const int8_t* src, \ | dst_type * dst, const int oh, const int ow, const int8_t* src, \ | ||||
| const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | ||||
| const int oh_size, const int oc, const int ic, const Op& op); | const int oh_size, const int oc, const int ic, const Op& op); | ||||
| @@ -284,22 +285,6 @@ void conv_direct_sdot_int8_nchw44( | |||||
| FOR_OP(stride, i, BiasMode::NO_BIAS) \ | FOR_OP(stride, i, BiasMode::NO_BIAS) \ | ||||
| FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | ||||
| #define FOR_FILTER(stride) \ | |||||
| FOR_BIAS(stride, 2) \ | |||||
| FOR_BIAS(stride, 3) \ | |||||
| FOR_BIAS(stride, 5) \ | |||||
| FOR_BIAS(stride, 7) | |||||
| FOR_FILTER(1) | |||||
| #undef FOR_STRIDE | |||||
| #undef FOR_FILTER | |||||
| #undef FOR_IC | |||||
| #undef FOR_BIAS | |||||
| #undef FOR_NONLINEAR | |||||
| #undef FOR_REMAIN | |||||
| #undef INSTANTIATION | |||||
| } // namespace direct_dotprod_nchw44 | } // namespace direct_dotprod_nchw44 | ||||
| } // namespace arm_common | } // namespace arm_common | ||||
| } // namespace megdnn | } // namespace megdnn | ||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_2x2.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||||
| #if MGB_ENABLE_DOT | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| FOR_BIAS(1, 2); | |||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_3x3.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||||
| #if MGB_ENABLE_DOT | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| FOR_BIAS(1, 3); | |||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_5x5.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||||
| #if MGB_ENABLE_DOT | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| FOR_BIAS(1, 5); | |||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_7x7.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||||
| #if MGB_ENABLE_DOT | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| FOR_BIAS(1, 7); | |||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.cpp | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -266,7 +266,8 @@ void conv_direct_sdot_int8_nchw44( | |||||
| #define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | #define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | ||||
| template void \ | template void \ | ||||
| conv_direct_sdot_int8_nchw44<dst_type, stride, bias_mode, Op, filter_size>( \ | |||||
| megdnn::arm_common::direct_dotprod_nchw44::conv_direct_sdot_int8_nchw44< \ | |||||
| dst_type, stride, bias_mode, Op, filter_size>( \ | |||||
| dst_type * dst, const int oh, const int ow, const int8_t* src, \ | dst_type * dst, const int oh, const int ow, const int8_t* src, \ | ||||
| const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | ||||
| const int oh_size, const int oc, const int ic, const Op& op); | const int oh_size, const int oc, const int ic, const Op& op); | ||||
| @@ -285,22 +286,6 @@ void conv_direct_sdot_int8_nchw44( | |||||
| FOR_OP(stride, i, BiasMode::NO_BIAS) \ | FOR_OP(stride, i, BiasMode::NO_BIAS) \ | ||||
| FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | ||||
| #define FOR_FILTER(stride) \ | |||||
| FOR_BIAS(stride, 2) \ | |||||
| FOR_BIAS(stride, 3) \ | |||||
| FOR_BIAS(stride, 5) \ | |||||
| FOR_BIAS(stride, 7) | |||||
| FOR_FILTER(2) | |||||
| #undef FOR_STRIDE | |||||
| #undef FOR_FILTER | |||||
| #undef FOR_IC | |||||
| #undef FOR_BIAS | |||||
| #undef FOR_NONLINEAR | |||||
| #undef FOR_REMAIN | |||||
| #undef INSTANTIATION | |||||
| } // namespace direct_dotprod_nchw44 | } // namespace direct_dotprod_nchw44 | ||||
| } // namespace arm_common | } // namespace arm_common | ||||
| } // namespace megdnn | } // namespace megdnn | ||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_2x2.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||||
| #if MGB_ENABLE_DOT | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| FOR_BIAS(2, 2); | |||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_3x3.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||||
| #if MGB_ENABLE_DOT | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| FOR_BIAS(2, 3); | |||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_5x5.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||||
| #if MGB_ENABLE_DOT | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| FOR_BIAS(2, 5); | |||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,21 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_7x7.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||||
| #if MGB_ENABLE_DOT | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| FOR_BIAS(2, 7); | |||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -1,6 +1,6 @@ | |||||
| /** | /** | ||||
| * \file | * \file | ||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.cpp | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
| * | * | ||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
| @@ -45,4 +45,4 @@ public: | |||||
| } // namespace arm_common | } // namespace arm_common | ||||
| } // namespace megdnn | } // namespace megdnn | ||||
| // vim: syntax=cpp.doxygen | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -13,336 +13,9 @@ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h" | #include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h" | ||||
| #include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h" | #include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h" | ||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace arm_common { | namespace arm_common { | ||||
| namespace { | |||||
| /** | |||||
| * @brief core code for calculation patten | |||||
| * | |||||
| * @tparam src_idx is offset of src reg | |||||
| * @tparam weight_idx is offset of weight reg | |||||
| * @tparam c_dim is output channel | |||||
| * @tparam Func mla operation funcion | |||||
| * @tparam stride | |||||
| * @tparam T outpur regs type | |||||
| * @tparam T2 src regs type | |||||
| * @tparam T3 weight regs type | |||||
| * @tparam T4 temp regs type | |||||
| */ | |||||
| template < | |||||
| int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
| typename T3, typename T4> | |||||
| struct ShiftCalHelper { | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp); | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight); | |||||
| }; | |||||
| template < | |||||
| int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
| typename T3, typename T4> | |||||
| MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight, T4& temp) { | |||||
| ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, T4>::impl( | |||||
| c, src, weight, temp); | |||||
| } | |||||
| template < | |||||
| int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
| typename T3> | |||||
| MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight) { | |||||
| ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, int>::impl( | |||||
| c, src, weight); | |||||
| }; | |||||
| template < | |||||
| int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||||
| struct ShiftCalHelper<src_idx, weight_idx, 2, 1, T, T2, T3, T4> { | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||||
| c[0][0] = vdotq_s32_h( | |||||
| src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||||
| c[1][0] = vdotq_s32_h( | |||||
| src[(0 + src_idx) % 8], weight[1][weight_idx], c[1][0], temp[1]); | |||||
| c[0][1] = vdotq_s32_h( | |||||
| src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[2]); | |||||
| c[1][1] = vdotq_s32_h( | |||||
| src[(1 + src_idx) % 8], weight[1][weight_idx], c[1][1], temp[3]); | |||||
| c[0][2] = vdotq_s32_h( | |||||
| src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[0]); | |||||
| c[1][2] = vdotq_s32_h( | |||||
| src[(2 + src_idx) % 8], weight[1][weight_idx], c[1][2], temp[1]); | |||||
| c[0][3] = vdotq_s32_h( | |||||
| src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[2]); | |||||
| c[1][3] = vdotq_s32_h( | |||||
| src[(3 + src_idx) % 8], weight[1][weight_idx], c[1][3], temp[3]); | |||||
| c[0][4] = vdotq_s32_h( | |||||
| src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||||
| c[1][4] = vdotq_s32_h( | |||||
| src[(4 + src_idx) % 8], weight[1][weight_idx], c[1][4], temp[1]); | |||||
| c[0][5] = vdotq_s32_h( | |||||
| src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[2]); | |||||
| c[1][5] = vdotq_s32_h( | |||||
| src[(5 + src_idx) % 8], weight[1][weight_idx], c[1][5], temp[3]); | |||||
| c[0][6] = vdotq_s32_h( | |||||
| src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[0]); | |||||
| c[1][6] = vdotq_s32_h( | |||||
| src[(6 + src_idx) % 8], weight[1][weight_idx], c[1][6], temp[1]); | |||||
| c[0][7] = vdotq_s32_h( | |||||
| src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[2]); | |||||
| c[1][7] = vdotq_s32_h( | |||||
| src[(7 + src_idx) % 8], weight[1][weight_idx], c[1][7], temp[3]); | |||||
| } | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||||
| }; | |||||
| template < | |||||
| int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||||
| struct ShiftCalHelper<src_idx, weight_idx, 1, 1, T, T2, T3, T4> { | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||||
| c[0][0] = vdotq_s32_h( | |||||
| src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||||
| c[0][1] = vdotq_s32_h( | |||||
| src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[1]); | |||||
| c[0][2] = vdotq_s32_h( | |||||
| src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[2]); | |||||
| c[0][3] = vdotq_s32_h( | |||||
| src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[3]); | |||||
| c[0][4] = vdotq_s32_h( | |||||
| src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||||
| c[0][5] = vdotq_s32_h( | |||||
| src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[1]); | |||||
| c[0][6] = vdotq_s32_h( | |||||
| src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[2]); | |||||
| c[0][7] = vdotq_s32_h( | |||||
| src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[3]); | |||||
| } | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 1, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 1; | |||||
| constexpr int filter_width = 4; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 1; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 2, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 2; | |||||
| constexpr int filter_width = 4; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 1; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 3, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 3; | |||||
| constexpr int filter_width = 4; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 1; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr + 2 * filter_width * oc_step, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 2 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 5, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 5; | |||||
| constexpr int filter_width = 8; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 2; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| #define cb(step) \ | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||||
| dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||||
| src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||||
| load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||||
| src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||||
| cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| UNROLL_CALL_RAW(5, cb); | |||||
| #undef cb | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 7, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 7; | |||||
| constexpr int filter_width = 8; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 2; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| #define cb(step) \ | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||||
| dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||||
| src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||||
| load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||||
| src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||||
| cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| UNROLL_CALL_RAW(7, cb); | |||||
| #undef cb | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| } // namespace | |||||
| namespace int8_direct_nchw_nchw44 { | namespace int8_direct_nchw_nchw44 { | ||||
| /** | /** | ||||
| * pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)} | * pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)} | ||||
| @@ -444,115 +117,9 @@ void pack_nchw_src_for_nchw44_conv<1>( | |||||
| } | } | ||||
| } | } | ||||
| template <BiasMode bias_mode, typename Op, size_t filter_size> | |||||
| struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||||
| static void impl( | |||||
| const int8_t* src, const int8_t* filter, const int32_t* bias, int32_t* temp, | |||||
| int8_t* dst, const size_t oc, const size_t ic, const size_t ih, | |||||
| const size_t iw, const size_t oh, const size_t ow, const Op& op) { | |||||
| MEGDNN_MARK_USED_VAR(temp); | |||||
| constexpr int stride = 1; | |||||
| constexpr size_t fh = filter_size; | |||||
| constexpr size_t fw = (filter_size + 3) / 4 * 4; | |||||
| constexpr size_t ic_step = 1; | |||||
| constexpr size_t big_oc_step = 8; | |||||
| constexpr size_t oc_step = 4; | |||||
| constexpr size_t ih_step = 1; | |||||
| constexpr size_t oh_step = 1; | |||||
| constexpr size_t ow_step = 8; | |||||
| constexpr size_t stride_h = stride; | |||||
| constexpr size_t stride_w = stride; | |||||
| constexpr int pack_iw_len = 16; | |||||
| const size_t img_stride = oh * ow; | |||||
| const size_t ow_end = ow / ow_step * ow_step; | |||||
| const size_t ow_remain = ow - ow_end; | |||||
| const size_t oc_end = oc / big_oc_step * big_oc_step; | |||||
| const size_t oc_remain = oc - oc_end; | |||||
| const int ld_dst_oc = oc_step * img_stride; | |||||
| using remain_fun = std::function<void( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, | |||||
| const int32_t* bias_ptr, int8_t* dst_ptr, int ic, int ih, int iw, | |||||
| int ld_dst_oc, const Op& op)>; | |||||
| remain_fun kern_big_oc_remain = nullptr; | |||||
| remain_fun kern_small_oc_remain = nullptr; | |||||
| switch (ow_remain) { | |||||
| #define cb(step) \ | |||||
| case step: \ | |||||
| kern_big_oc_remain = KerNeonXXs2NchwNchw44< \ | |||||
| bias_mode, Op, step, filter_size, big_oc_step, stride>::impl; \ | |||||
| kern_small_oc_remain = KerNeonXXs2NchwNchw44< \ | |||||
| bias_mode, Op, step, filter_size, oc_step, stride>::impl; \ | |||||
| break; | |||||
| UNROLL_CALL_RAW(8, cb); | |||||
| default: | |||||
| megdnn_assert(0, "no remain %zu for kern", ow_remain); | |||||
| } | |||||
| for (size_t oc_idx = 0; oc_idx < oc_end; oc_idx += big_oc_step) { | |||||
| const size_t weight_offset = oc_idx * ic * fh * fw; | |||||
| for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||||
| for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||||
| const size_t src_offset = | |||||
| (oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||||
| ic_step * pack_iw_len; | |||||
| const size_t dst_offset = | |||||
| oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||||
| KerNeonXXs2NchwNchw44< | |||||
| bias_mode, Op, ow_step, filter_size, big_oc_step, stride>:: | |||||
| impl(src + src_offset, filter + weight_offset, | |||||
| bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||||
| op); | |||||
| } | |||||
| if (ow_remain > 0) { | |||||
| const size_t src_offset = | |||||
| (oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||||
| ic_step * pack_iw_len; | |||||
| const size_t dst_offset = | |||||
| oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||||
| kern_big_oc_remain( | |||||
| src + src_offset, filter + weight_offset, bias + oc_idx, | |||||
| dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (oc_remain > 0) { | |||||
| size_t oc_idx = oc_end; | |||||
| const size_t weight_offset = oc_idx * ic * fh * fw; | |||||
| for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||||
| for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||||
| const size_t src_offset = | |||||
| (oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||||
| ic_step * pack_iw_len; | |||||
| const size_t dst_offset = | |||||
| oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||||
| KerNeonXXs2NchwNchw44< | |||||
| bias_mode, Op, ow_step, filter_size, oc_step, stride>:: | |||||
| impl(src + src_offset, filter + weight_offset, | |||||
| bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||||
| op); | |||||
| } | |||||
| if (ow_remain > 0) { | |||||
| const size_t src_offset = | |||||
| (oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||||
| ic_step * pack_iw_len; | |||||
| const size_t dst_offset = | |||||
| oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||||
| kern_small_oc_remain( | |||||
| src + src_offset, filter + weight_offset, bias + oc_idx, | |||||
| dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| }; | |||||
| #define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \ | #define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \ | ||||
| template struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||||
| template struct megdnn::arm_common::int8_direct_nchw_nchw44:: \ | |||||
| ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||||
| #define INSTANCE_OP_PARAM(stride, filter, bias_mode) \ | #define INSTANCE_OP_PARAM(stride, filter, bias_mode) \ | ||||
| INSTANCE_CONV_KERN_FUN( \ | INSTANCE_CONV_KERN_FUN( \ | ||||
| @@ -566,17 +133,10 @@ struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||||
| INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \ | INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \ | ||||
| INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | ||||
| #define INSTANCE_CONV_KERN(stride) \ | |||||
| INSTANCE_BIAS_MODE_PARAM(stride, 1) \ | |||||
| INSTANCE_BIAS_MODE_PARAM(stride, 2) \ | |||||
| INSTANCE_BIAS_MODE_PARAM(stride, 3) \ | |||||
| INSTANCE_BIAS_MODE_PARAM(stride, 5) \ | |||||
| INSTANCE_BIAS_MODE_PARAM(stride, 7) | |||||
| INSTANCE_CONV_KERN(1); | |||||
| #define INSTANCE_CONV_KERN(stride, filter) INSTANCE_BIAS_MODE_PARAM(stride, filter) | |||||
| } // namespace int8_direct_nchw_nchw44 | } // namespace int8_direct_nchw_nchw44 | ||||
| } // namespace arm_common | } // namespace arm_common | ||||
| } // namespace megdnn | } // namespace megdnn | ||||
| // vim: syntax=cpp.doxygen | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,481 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h" | |||||
| #include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h" | |||||
| namespace megdnn { | |||||
| namespace arm_common { | |||||
| namespace { | |||||
| /** | |||||
| * @brief core code for calculation patten | |||||
| * | |||||
| * @tparam src_idx is offset of src reg | |||||
| * @tparam weight_idx is offset of weight reg | |||||
| * @tparam c_dim is output channel | |||||
| * @tparam Func mla operation funcion | |||||
| * @tparam stride | |||||
| * @tparam T outpur regs type | |||||
| * @tparam T2 src regs type | |||||
| * @tparam T3 weight regs type | |||||
| * @tparam T4 temp regs type | |||||
| */ | |||||
| template < | |||||
| int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
| typename T3, typename T4> | |||||
| struct ShiftCalHelper { | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp); | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight); | |||||
| }; | |||||
| template < | |||||
| int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
| typename T3, typename T4> | |||||
| MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight, T4& temp) { | |||||
| ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, T4>::impl( | |||||
| c, src, weight, temp); | |||||
| } | |||||
| template < | |||||
| int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
| typename T3> | |||||
| MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight) { | |||||
| ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, int>::impl( | |||||
| c, src, weight); | |||||
| }; | |||||
| template < | |||||
| int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||||
| struct ShiftCalHelper<src_idx, weight_idx, 2, 1, T, T2, T3, T4> { | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||||
| c[0][0] = vdotq_s32_h( | |||||
| src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||||
| c[1][0] = vdotq_s32_h( | |||||
| src[(0 + src_idx) % 8], weight[1][weight_idx], c[1][0], temp[1]); | |||||
| c[0][1] = vdotq_s32_h( | |||||
| src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[2]); | |||||
| c[1][1] = vdotq_s32_h( | |||||
| src[(1 + src_idx) % 8], weight[1][weight_idx], c[1][1], temp[3]); | |||||
| c[0][2] = vdotq_s32_h( | |||||
| src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[0]); | |||||
| c[1][2] = vdotq_s32_h( | |||||
| src[(2 + src_idx) % 8], weight[1][weight_idx], c[1][2], temp[1]); | |||||
| c[0][3] = vdotq_s32_h( | |||||
| src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[2]); | |||||
| c[1][3] = vdotq_s32_h( | |||||
| src[(3 + src_idx) % 8], weight[1][weight_idx], c[1][3], temp[3]); | |||||
| c[0][4] = vdotq_s32_h( | |||||
| src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||||
| c[1][4] = vdotq_s32_h( | |||||
| src[(4 + src_idx) % 8], weight[1][weight_idx], c[1][4], temp[1]); | |||||
| c[0][5] = vdotq_s32_h( | |||||
| src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[2]); | |||||
| c[1][5] = vdotq_s32_h( | |||||
| src[(5 + src_idx) % 8], weight[1][weight_idx], c[1][5], temp[3]); | |||||
| c[0][6] = vdotq_s32_h( | |||||
| src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[0]); | |||||
| c[1][6] = vdotq_s32_h( | |||||
| src[(6 + src_idx) % 8], weight[1][weight_idx], c[1][6], temp[1]); | |||||
| c[0][7] = vdotq_s32_h( | |||||
| src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[2]); | |||||
| c[1][7] = vdotq_s32_h( | |||||
| src[(7 + src_idx) % 8], weight[1][weight_idx], c[1][7], temp[3]); | |||||
| } | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||||
| }; | |||||
| template < | |||||
| int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||||
| struct ShiftCalHelper<src_idx, weight_idx, 1, 1, T, T2, T3, T4> { | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||||
| c[0][0] = vdotq_s32_h( | |||||
| src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||||
| c[0][1] = vdotq_s32_h( | |||||
| src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[1]); | |||||
| c[0][2] = vdotq_s32_h( | |||||
| src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[2]); | |||||
| c[0][3] = vdotq_s32_h( | |||||
| src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[3]); | |||||
| c[0][4] = vdotq_s32_h( | |||||
| src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||||
| c[0][5] = vdotq_s32_h( | |||||
| src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[1]); | |||||
| c[0][6] = vdotq_s32_h( | |||||
| src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[2]); | |||||
| c[0][7] = vdotq_s32_h( | |||||
| src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[3]); | |||||
| } | |||||
| static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 1, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 1; | |||||
| constexpr int filter_width = 4; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 1; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 2, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 2; | |||||
| constexpr int filter_width = 4; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 1; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 3, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 3; | |||||
| constexpr int filter_width = 4; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 1; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
| dot4_weight, weight_ptr + 2 * filter_width * oc_step, ld_weight_oc); | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
| src, nchw_src_ptr + 2 * iw * pack_iw_len, 0); | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 5, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 5; | |||||
| constexpr int filter_width = 8; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 2; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| #define cb(step) \ | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||||
| dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||||
| src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||||
| load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||||
| src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||||
| cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| UNROLL_CALL_RAW(5, cb); | |||||
| #undef cb | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
| struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 7, oc_block, 1> { | |||||
| static void impl( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
| int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
| constexpr int stride = 1; | |||||
| constexpr int filter_height = 7; | |||||
| constexpr int filter_width = 8; | |||||
| constexpr int oc_step = 4; | |||||
| constexpr int loop_ic_step = 1; | |||||
| constexpr int simd_len = 16; | |||||
| constexpr int pack_iw_len = 16; | |||||
| constexpr int src_reg = 8; | |||||
| constexpr int weight_reg = 2; | |||||
| const int ic_stride = ih * iw * pack_iw_len; | |||||
| const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
| constexpr int c_dim = OCHelper<oc_block>::val; | |||||
| int32x4_t c[c_dim][8]; | |||||
| init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
| for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
| const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
| int8x16_t src[src_reg]; | |||||
| int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
| int16x8_t temp_c[4]; | |||||
| #define cb(step) \ | |||||
| load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||||
| dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||||
| load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||||
| src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||||
| cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||||
| load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||||
| src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||||
| cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
| UNROLL_CALL_RAW(7, cb); | |||||
| #undef cb | |||||
| weight_ptr += oc_step * filter_height * filter_width; | |||||
| } | |||||
| store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
| c, op, dst_ptr, ld_dst_oc); | |||||
| } | |||||
| }; | |||||
| } // namespace | |||||
| namespace int8_direct_nchw_nchw44 { | |||||
| /** | |||||
| * pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)} | |||||
| * pack interleave two adjacent row in filter to one row | |||||
| * */ | |||||
| template <BiasMode bias_mode, typename Op, size_t filter_size> | |||||
| struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||||
| static void impl( | |||||
| const int8_t* src, const int8_t* filter, const int32_t* bias, int32_t* temp, | |||||
| int8_t* dst, const size_t oc, const size_t ic, const size_t ih, | |||||
| const size_t iw, const size_t oh, const size_t ow, const Op& op) { | |||||
| MEGDNN_MARK_USED_VAR(temp); | |||||
| constexpr int stride = 1; | |||||
| constexpr size_t fh = filter_size; | |||||
| constexpr size_t fw = (filter_size + 3) / 4 * 4; | |||||
| constexpr size_t ic_step = 1; | |||||
| constexpr size_t big_oc_step = 8; | |||||
| constexpr size_t oc_step = 4; | |||||
| constexpr size_t ih_step = 1; | |||||
| constexpr size_t oh_step = 1; | |||||
| constexpr size_t ow_step = 8; | |||||
| constexpr size_t stride_h = stride; | |||||
| constexpr size_t stride_w = stride; | |||||
| constexpr int pack_iw_len = 16; | |||||
| const size_t img_stride = oh * ow; | |||||
| const size_t ow_end = ow / ow_step * ow_step; | |||||
| const size_t ow_remain = ow - ow_end; | |||||
| const size_t oc_end = oc / big_oc_step * big_oc_step; | |||||
| const size_t oc_remain = oc - oc_end; | |||||
| const int ld_dst_oc = oc_step * img_stride; | |||||
| using remain_fun = std::function<void( | |||||
| const int8_t* src_ptr, const int8_t* weight_ptr, | |||||
| const int32_t* bias_ptr, int8_t* dst_ptr, int ic, int ih, int iw, | |||||
| int ld_dst_oc, const Op& op)>; | |||||
| remain_fun kern_big_oc_remain = nullptr; | |||||
| remain_fun kern_small_oc_remain = nullptr; | |||||
| switch (ow_remain) { | |||||
| #define cb(step) \ | |||||
| case step: \ | |||||
| kern_big_oc_remain = KerNeonXXs2NchwNchw44< \ | |||||
| bias_mode, Op, step, filter_size, big_oc_step, stride>::impl; \ | |||||
| kern_small_oc_remain = KerNeonXXs2NchwNchw44< \ | |||||
| bias_mode, Op, step, filter_size, oc_step, stride>::impl; \ | |||||
| break; | |||||
| UNROLL_CALL_RAW(8, cb); | |||||
| default: | |||||
| megdnn_assert(0, "no remain %zu for kern", ow_remain); | |||||
| } | |||||
| for (size_t oc_idx = 0; oc_idx < oc_end; oc_idx += big_oc_step) { | |||||
| const size_t weight_offset = oc_idx * ic * fh * fw; | |||||
| for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||||
| for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||||
| const size_t src_offset = | |||||
| (oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||||
| ic_step * pack_iw_len; | |||||
| const size_t dst_offset = | |||||
| oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||||
| KerNeonXXs2NchwNchw44< | |||||
| bias_mode, Op, ow_step, filter_size, big_oc_step, stride>:: | |||||
| impl(src + src_offset, filter + weight_offset, | |||||
| bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||||
| op); | |||||
| } | |||||
| if (ow_remain > 0) { | |||||
| const size_t src_offset = | |||||
| (oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||||
| ic_step * pack_iw_len; | |||||
| const size_t dst_offset = | |||||
| oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||||
| kern_big_oc_remain( | |||||
| src + src_offset, filter + weight_offset, bias + oc_idx, | |||||
| dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||||
| } | |||||
| } | |||||
| } | |||||
| if (oc_remain > 0) { | |||||
| size_t oc_idx = oc_end; | |||||
| const size_t weight_offset = oc_idx * ic * fh * fw; | |||||
| for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||||
| for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||||
| const size_t src_offset = | |||||
| (oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||||
| ic_step * pack_iw_len; | |||||
| const size_t dst_offset = | |||||
| oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||||
| KerNeonXXs2NchwNchw44< | |||||
| bias_mode, Op, ow_step, filter_size, oc_step, stride>:: | |||||
| impl(src + src_offset, filter + weight_offset, | |||||
| bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||||
| op); | |||||
| } | |||||
| if (ow_remain > 0) { | |||||
| const size_t src_offset = | |||||
| (oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||||
| ic_step * pack_iw_len; | |||||
| const size_t dst_offset = | |||||
| oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||||
| kern_small_oc_remain( | |||||
| src + src_offset, filter + weight_offset, bias + oc_idx, | |||||
| dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| }; | |||||
| #define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \ | |||||
| template struct megdnn::arm_common::int8_direct_nchw_nchw44:: \ | |||||
| ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||||
| #define INSTANCE_OP_PARAM(stride, filter, bias_mode) \ | |||||
| INSTANCE_CONV_KERN_FUN( \ | |||||
| stride, filter, bias_mode, TypeCvtOp<dt_qint32 MEGDNN_COMMA dt_qint8>) \ | |||||
| INSTANCE_CONV_KERN_FUN( \ | |||||
| stride, filter, bias_mode, ReluOp<dt_qint32 MEGDNN_COMMA dt_qint8>) \ | |||||
| INSTANCE_CONV_KERN_FUN( \ | |||||
| stride, filter, bias_mode, HSwishOp<dt_qint32 MEGDNN_COMMA dt_qint8>) | |||||
| #define INSTANCE_BIAS_MODE_PARAM(stride, filter) \ | |||||
| INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \ | |||||
| INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | |||||
| #define INSTANCE_CONV_KERN(stride, filter) INSTANCE_BIAS_MODE_PARAM(stride, filter) | |||||
| } // namespace int8_direct_nchw_nchw44 | |||||
| } // namespace arm_common | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cpp.doxygen | |||||
| @@ -0,0 +1,19 @@ | |||||
| /** | |||||
| * \file | |||||
| * dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1_1x1.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h" | |||||
| using namespace megdnn; | |||||
| using namespace arm_common; | |||||
| INSTANCE_CONV_KERN(1, 1); | |||||
| // vim: syntax=cpp.doxygen | |||||