| @@ -5,6 +5,7 @@ | |||
| *.def | |||
| *.o | |||
| *.out | |||
| *.tmp | |||
| lapack-3.1.1 | |||
| lapack-3.1.1.tgz | |||
| lapack-3.4.1 | |||
| @@ -1,4 +1,119 @@ | |||
| # XXX: Precise is already deprecated, new default is Trusty. | |||
| # https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming | |||
| dist: precise | |||
| sudo: false | |||
| language: c | |||
| compiler: gcc | |||
| jobs: | |||
| include: | |||
| - &test-ubuntu | |||
| stage: test | |||
| addons: | |||
| apt: | |||
| packages: | |||
| - gfortran | |||
| before_script: &common-before | |||
| - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" | |||
| script: | |||
| - set -e | |||
| - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
| - make -C test $COMMON_FLAGS $BTYPE | |||
| - make -C ctest $COMMON_FLAGS $BTYPE | |||
| - make -C utest $COMMON_FLAGS $BTYPE | |||
| env: | |||
| - TARGET_BOX=LINUX64 | |||
| - BTYPE="BINARY=64" | |||
| - <<: *test-ubuntu | |||
| env: | |||
| - TARGET_BOX=LINUX64 | |||
| - BTYPE="BINARY=64 USE_OPENMP=1" | |||
| - <<: *test-ubuntu | |||
| env: | |||
| - TARGET_BOX=LINUX64 | |||
| - BTYPE="BINARY=64 INTERFACE64=1" | |||
| - <<: *test-ubuntu | |||
| addons: | |||
| apt: | |||
| packages: | |||
| - gcc-multilib | |||
| - gfortran-multilib | |||
| env: | |||
| - TARGET_BOX=LINUX32 | |||
| - BTYPE="BINARY=32" | |||
| - stage: test | |||
| addons: | |||
| apt: | |||
| packages: | |||
| - binutils-mingw-w64-x86-64 | |||
| - gcc-mingw-w64-x86-64 | |||
| - gfortran-mingw-w64-x86-64 | |||
| before_script: *common-before | |||
| script: | |||
| - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
| env: | |||
| - TARGET_BOX=WIN64 | |||
| - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran" | |||
| # Build & test on Alpine Linux inside chroot, i.e. on system with musl libc. | |||
| # These jobs needs sudo, so Travis runs them on VM-based infrastructure | |||
| # which is slower than container-based infrastructure used for jobs | |||
| # that don't require sudo. | |||
| - &test-alpine | |||
| stage: test | |||
| dist: trusty | |||
| sudo: true | |||
| language: minimal | |||
| before_install: | |||
| - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.6.0/alpine-chroot-install' \ | |||
| && echo 'a827a4ba3d0817e7c88bae17fe34e50204983d1e alpine-chroot-install' | sha1sum -c || exit 1" | |||
| - alpine() { /alpine/enter-chroot -u "$USER" "$@"; } | |||
| install: | |||
| - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers' | |||
| before_script: *common-before | |||
| script: | |||
| - set -e | |||
| # XXX: Disable some warnings for now to avoid exceeding Travis limit for log size. | |||
| - alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
| CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types" | |||
| - alpine make -C test $COMMON_FLAGS $BTYPE | |||
| - alpine make -C ctest $COMMON_FLAGS $BTYPE | |||
| - alpine make -C utest $COMMON_FLAGS $BTYPE | |||
| env: | |||
| - TARGET_BOX=LINUX64_MUSL | |||
| - BTYPE="BINARY=64" | |||
| # XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS, | |||
| # so it's "allowed to fail" for now (see allow_failures). | |||
| - &test-alpine-openmp | |||
| <<: *test-alpine | |||
| env: | |||
| - TARGET_BOX=LINUX64_MUSL | |||
| - BTYPE="BINARY=64 USE_OPENMP=1" | |||
| - <<: *test-alpine | |||
| env: | |||
| - TARGET_BOX=LINUX64_MUSL | |||
| - BTYPE="BINARY=64 INTERFACE64=1" | |||
| # Build with the same flags as Alpine do in OpenBLAS package. | |||
| - <<: *test-alpine | |||
| env: | |||
| - TARGET_BOX=LINUX64_MUSL | |||
| - BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=core2" | |||
| allow_failures: | |||
| - <<: *test-alpine-openmp | |||
| # whitelist | |||
| branches: | |||
| only: | |||
| - master | |||
| - develop | |||
| notifications: | |||
| webhooks: | |||
| @@ -7,32 +122,3 @@ notifications: | |||
| on_success: change # options: [always|never|change] default: always | |||
| on_failure: always # options: [always|never|change] default: always | |||
| on_start: never # options: [always|never|change] default: always | |||
| compiler: | |||
| - gcc | |||
| env: | |||
| - TARGET_BOX=LINUX64 BTYPE="BINARY=64" | |||
| - TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1" | |||
| - TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1" | |||
| - TARGET_BOX=LINUX32 BTYPE="BINARY=32" | |||
| - TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran" | |||
| before_install: | |||
| - sudo apt-get update -qq | |||
| - sudo apt-get install -qq gfortran | |||
| - if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi | |||
| - if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi | |||
| script: | |||
| - set -e | |||
| - make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE | |||
| - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||
| - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||
| - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||
| # whitelist | |||
| branches: | |||
| only: | |||
| - master | |||
| - develop | |||
| @@ -231,43 +231,33 @@ install(TARGETS ${OpenBLAS_LIBNAME} | |||
| # Install include files | |||
| set (GENCONFIG_BIN ${CMAKE_BINARY_DIR}/gen_config_h${CMAKE_EXECUTABLE_SUFFIX}) | |||
| ADD_CUSTOM_COMMAND( | |||
| OUTPUT ${CMAKE_BINARY_DIR}/openblas_config.h | |||
| DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h | |||
| COMMAND ${GENCONFIG_BIN} ${CMAKE_CURRENT_SOURCE_DIR}/config.h ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h > ${CMAKE_BINARY_DIR}/openblas_config.h | |||
| ) | |||
| ADD_CUSTOM_TARGET(genconfig | |||
| ALL | |||
| DEPENDS openblas_config.h | |||
| ) | |||
| add_dependencies(genconfig ${OpenBLAS_LIBNAME}) | |||
| execute_process(COMMAND ${GENCONFIG_BIN} | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/config.h | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h | |||
| OUTPUT_VARIABLE OPENBLAS_CONFIG_H_CONTENTS) | |||
| file(WRITE ${CMAKE_BINARY_DIR}/openblas_config.tmp "${OPENBLAS_CONFIG_H_CONTENTS}") | |||
| configure_file(${CMAKE_BINARY_DIR}/openblas_config.tmp ${CMAKE_BINARY_DIR}/openblas_config.h COPYONLY) | |||
| install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
| message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}") | |||
| ADD_CUSTOM_TARGET(genf77blas | |||
| ALL | |||
| COMMAND ${AWK} 'BEGIN{print \"\#ifndef OPENBLAS_F77BLAS_H\" \; print \"\#define OPENBLAS_F77BLAS_H\" \; print \"\#include \\"openblas_config.h\\" \"}; NF {print}; END{print \"\#endif\"}' ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h > ${CMAKE_BINARY_DIR}/f77blas.h | |||
| DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h | |||
| ) | |||
| add_dependencies(genf77blas ${OpenBLAS_LIBNAME}) | |||
| file(WRITE ${CMAKE_BINARY_DIR}/f77blas.h "") | |||
| file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "#ifndef OPENBLAS_F77BLAS_H\n#define OPENBLAS_F77BLAS_H\n#include \"openblas_config.h\"\n") | |||
| file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS) | |||
| file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "${COMMON_INTERFACE_H_CONTENTS}") | |||
| file(APPEND ${CMAKE_BINARY_DIR}/f77blas.h "#endif") | |||
| install (FILES ${CMAKE_BINARY_DIR}/f77blas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
| if(NOT NO_CBLAS) | |||
| message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}") | |||
| ADD_CUSTOM_TARGET(gencblas | |||
| ALL | |||
| COMMAND ${SED} 's/common/openblas_config/g' ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h > "${CMAKE_BINARY_DIR}/cblas.tmp" | |||
| COMMAND cp "${CMAKE_BINARY_DIR}/cblas.tmp" "${CMAKE_BINARY_DIR}/cblas.h" | |||
| DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h | |||
| ) | |||
| add_dependencies(gencblas ${OpenBLAS_LIBNAME}) | |||
| install (FILES ${CMAKE_BINARY_DIR}/cblas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
| file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS) | |||
| string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}") | |||
| file(WRITE ${CMAKE_BINARY_DIR}/cblas.tmp "${CBLAS_H_CONTENTS_NEW}") | |||
| install (FILES ${CMAKE_BINARY_DIR}/cblas.tmp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} RENAME cblas.h) | |||
| endif() | |||
| if(NOT NO_LAPACKE) | |||
| @@ -277,7 +267,7 @@ if(NOT NO_LAPACKE) | |||
| install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
| ADD_CUSTOM_TARGET(genlapacke | |||
| COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h" | |||
| COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h" | |||
| ) | |||
| install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
| endif() | |||
| @@ -87,7 +87,7 @@ endif () | |||
| string(TOUPPER ${ARCH} UC_ARCH) | |||
| file(WRITE ${TARGET_CONF} | |||
| file(WRITE ${TARGET_CONF_TEMP} | |||
| "#define OS_${HOST_OS}\t1\n" | |||
| "#define ARCH_${UC_ARCH}\t1\n" | |||
| "#define C_${COMPILER_ID}\t1\n" | |||
| @@ -95,7 +95,7 @@ file(WRITE ${TARGET_CONF} | |||
| "#define FUNDERSCORE\t${FU}\n") | |||
| if (${HOST_OS} STREQUAL "WINDOWSSTORE") | |||
| file(APPEND ${TARGET_CONF} | |||
| file(APPEND ${TARGET_CONF_TEMP} | |||
| "#define OS_WINNT\t1\n") | |||
| endif () | |||
| @@ -44,7 +44,7 @@ if (NOT ONLY_CBLAS) | |||
| # TODO: set FEXTRALIB flags a la f_check? | |||
| set(BU "_") | |||
| file(APPEND ${TARGET_CONF} | |||
| file(APPEND ${TARGET_CONF_TEMP} | |||
| "#define BUNDERSCORE _\n" | |||
| "#define NEEDBUNDERSCORE 1\n" | |||
| "#define NEED2UNDERSCORES 0\n") | |||
| @@ -56,7 +56,7 @@ else () | |||
| set(NO_FBLAS 1) | |||
| #set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler | |||
| set(BU "_") | |||
| file(APPEND ${TARGET_CONF} | |||
| file(APPEND ${TARGET_CONF_TEMP} | |||
| "#define BUNDERSCORE _\n" | |||
| "#define NEEDBUNDERSCORE 1\n") | |||
| endif() | |||
| @@ -2391,6 +2391,6 @@ foreach (Utils_FILE ${Utils_SRC}) | |||
| endforeach () | |||
| set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include") | |||
| execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h") | |||
| configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY) | |||
| include_directories(${lapacke_include_dir}) | |||
| set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") | |||
| @@ -51,6 +51,7 @@ else() | |||
| set(TARGET_CONF "config.h") | |||
| endif () | |||
| set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp") | |||
| include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake") | |||
| if (NOT NOFORTRAN) | |||
| @@ -79,10 +80,11 @@ endif () | |||
| set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") | |||
| set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") | |||
| file(MAKE_DIRECTORY ${GETARCH_DIR}) | |||
| configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY) | |||
| if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") | |||
| try_compile(GETARCH_RESULT ${GETARCH_DIR} | |||
| SOURCES ${GETARCH_SRC} | |||
| COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${PROJECT_SOURCE_DIR} | |||
| COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} | |||
| OUTPUT_VARIABLE GETARCH_LOG | |||
| COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} | |||
| ) | |||
| @@ -100,16 +102,17 @@ execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE G | |||
| message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") | |||
| # append config data from getarch to the TARGET file and read in CMake vars | |||
| file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) | |||
| file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT}) | |||
| ParseGetArchVars(${GETARCH_MAKE_OUT}) | |||
| set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") | |||
| set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") | |||
| file(MAKE_DIRECTORY ${GETARCH2_DIR}) | |||
| configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY) | |||
| if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") | |||
| try_compile(GETARCH2_RESULT ${GETARCH2_DIR} | |||
| SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c | |||
| COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${PROJECT_SOURCE_DIR} | |||
| COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} | |||
| OUTPUT_VARIABLE GETARCH2_LOG | |||
| COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} | |||
| ) | |||
| @@ -124,7 +127,8 @@ execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE | |||
| execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) | |||
| # append config data from getarch_2nd to the TARGET file and read in CMake vars | |||
| file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT}) | |||
| file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT}) | |||
| configure_file(${TARGET_CONF_TEMP} ${PROJECT_BINARY_DIR}/${TARGET_CONF} COPYONLY) | |||
| ParseGetArchVars(${GETARCH2_MAKE_OUT}) | |||
| # compile get_config_h | |||
| @@ -144,4 +148,4 @@ if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") | |||
| if (NOT ${GEN_CONFIG_H_RESULT}) | |||
| MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}") | |||
| endif () | |||
| endif () | |||
| endif () | |||
| @@ -234,7 +234,9 @@ function(GenerateNamedObjects sources_in) | |||
| string(REPLACE ";" "\n#define " define_source "${obj_defines}") | |||
| string(REPLACE "=" " " define_source "${define_source}") | |||
| file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"") | |||
| file(WRITE ${new_source_file}.tmp "#define ${define_source}\n#include \"${old_source_file}\"") | |||
| configure_file(${new_source_file}.tmp ${new_source_file} COPYONLY) | |||
| file(REMOVE ${new_source_file}.tmp) | |||
| list(APPEND SRC_LIST_OUT ${new_source_file}) | |||
| endforeach () | |||
| @@ -1,4 +1,5 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| enable_language(Fortran) | |||
| @@ -1,5 +1,6 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| # sources that need to be compiled twice, once with no flags and once with LOWER | |||
| set(UL_SOURCES | |||
| @@ -1,4 +1,5 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa | |||
| @@ -1,4 +1,5 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| if (${CORE} STREQUAL "PPC440") | |||
| set(MEMORY memory_qalloc.c) | |||
| @@ -155,7 +155,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #ifdef DYNAMIC_ARCH | |||
| gotoblas_t *gotoblas = NULL; | |||
| #endif | |||
| extern void openblas_warning(int verbose, const char * msg); | |||
| #ifndef SMP | |||
| @@ -187,25 +186,24 @@ int i,n; | |||
| #if !defined(__GLIBC_PREREQ) | |||
| return nums; | |||
| #endif | |||
| #if !__GLIBC_PREREQ(2, 3) | |||
| #else | |||
| #if !__GLIBC_PREREQ(2, 3) | |||
| return nums; | |||
| #endif | |||
| #endif | |||
| #if !__GLIBC_PREREQ(2, 7) | |||
| #if !__GLIBC_PREREQ(2, 7) | |||
| ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); | |||
| if (ret!=0) return nums; | |||
| n=0; | |||
| #if !__GLIBC_PREREQ(2, 6) | |||
| #if !__GLIBC_PREREQ(2, 6) | |||
| for (i=0;i<nums;i++) | |||
| if (CPU_ISSET(i,cpusetp)) n++; | |||
| nums=n; | |||
| #else | |||
| #else | |||
| nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp); | |||
| #endif | |||
| #endif | |||
| return nums; | |||
| #endif | |||
| #else | |||
| cpusetp = CPU_ALLOC(nums); | |||
| if (cpusetp == NULL) return nums; | |||
| size = CPU_ALLOC_SIZE(nums); | |||
| @@ -214,6 +212,8 @@ int i,n; | |||
| nums = CPU_COUNT_S(size,cpusetp); | |||
| CPU_FREE(cpusetp); | |||
| return nums; | |||
| #endif | |||
| #endif | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -1,5 +1,6 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| set(BLAS1_SOURCES | |||
| @@ -1,5 +1,6 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| include("${PROJECT_SOURCE_DIR}/cmake/kernel.cmake") | |||
| # Makefile | |||
| @@ -147,57 +147,57 @@ static FLOAT casum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| " fmov s6, "REG0" \n" | |||
| " fmov s7, "REG0" \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| " cmp "INC_X", xzr \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Lasum_kernel_S_BEGIN \n" | |||
| " bne 5f //asum_kernel_S_BEGIN \n" | |||
| ".Lasum_kernel_F_BEGIN: \n" | |||
| "1: //asum_kernel_F_BEGIN: \n" | |||
| " asr "J", "N", #5 \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Lasum_kernel_F1 \n" | |||
| " beq 3f //asum_kernel_F1 \n" | |||
| ".Lasum_kernel_F32: \n" | |||
| "2: //asum_kernel_F32: \n" | |||
| " "KERNEL_F32" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_F32 \n" | |||
| " bne 2b //asum_kernel_F32 \n" | |||
| " "KERNEL_F32_FINALIZE" \n" | |||
| ".Lasum_kernel_F1: \n" | |||
| "3: //asum_kernel_F1: \n" | |||
| " ands "J", "N", #31 \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_F10: \n" | |||
| "4: //asum_kernel_F10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_F10 \n" | |||
| " b .Lasum_kernel_L999 \n" | |||
| " bne 4b //asum_kernel_F10 \n" | |||
| " b 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_S_BEGIN: \n" | |||
| "5: //asum_kernel_S_BEGIN: \n" | |||
| " "INIT_S" \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Lasum_kernel_S1 \n" | |||
| " ble 7f //asum_kernel_S1 \n" | |||
| ".Lasum_kernel_S4: \n" | |||
| "6: //asum_kernel_S4: \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_S4 \n" | |||
| " bne 6b //asum_kernel_S4 \n" | |||
| ".Lasum_kernel_S1: \n" | |||
| "7: //asum_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_S10: \n" | |||
| "8: //asum_kernel_S10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_S10 \n" | |||
| " bne 8b //asum_kernel_S10 \n" | |||
| ".Lasum_kernel_L999: \n" | |||
| "9: //asum_kernel_L999: \n" | |||
| " fmov %[ASUM_], "SUMFD" \n" | |||
| : [ASUM_] "=r" (asum) //%0 | |||
| @@ -90,62 +90,62 @@ static int do_copy(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_ | |||
| " mov "Y", %[Y_] \n" | |||
| " mov "INC_Y", %[INCY_] \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Lcopy_kernel_L999 \n" | |||
| " ble 8f //copy_kernel_L999 \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Lcopy_kernel_S_BEGIN \n" | |||
| " bne 4f //copy_kernel_S_BEGIN \n" | |||
| " cmp "INC_Y", #1 \n" | |||
| " bne .Lcopy_kernel_S_BEGIN \n" | |||
| " bne 4f //copy_kernel_S_BEGIN \n" | |||
| ".Lcopy_kernel_F_BEGIN: \n" | |||
| "// .Lcopy_kernel_F_BEGIN: \n" | |||
| " "INIT" \n" | |||
| " asr "J", "N", #"N_DIV_SHIFT" \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Lcopy_kernel_F1 \n" | |||
| " beq 2f //copy_kernel_F1 \n" | |||
| " .align 5 \n" | |||
| ".Lcopy_kernel_F: \n" | |||
| "1: //copy_kernel_F: \n" | |||
| " "KERNEL_F" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lcopy_kernel_F \n" | |||
| " bne 1b //copy_kernel_F \n" | |||
| ".Lcopy_kernel_F1: \n" | |||
| "2: //copy_kernel_F1: \n" | |||
| #if defined(COMPLEX) && defined(DOUBLE) | |||
| " b .Lcopy_kernel_L999 \n" | |||
| " b 8f //copy_kernel_L999 \n" | |||
| #else | |||
| " ands "J", "N", #"N_REM_MASK" \n" | |||
| " ble .Lcopy_kernel_L999 \n" | |||
| " ble 8f //copy_kernel_L999 \n" | |||
| #endif | |||
| ".Lcopy_kernel_F10: \n" | |||
| "3: //copy_kernel_F10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lcopy_kernel_F10 \n" | |||
| " b .Lcopy_kernel_L999 \n" | |||
| " bne 3b //copy_kernel_F10 \n" | |||
| " b 8f //copy_kernel_L999 \n" | |||
| ".Lcopy_kernel_S_BEGIN: \n" | |||
| "4: //copy_kernel_S_BEGIN: \n" | |||
| " "INIT" \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Lcopy_kernel_S1 \n" | |||
| " ble 6f //copy_kernel_S1 \n" | |||
| ".Lcopy_kernel_S4: \n" | |||
| "5: //copy_kernel_S4: \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lcopy_kernel_S4 \n" | |||
| " bne 5b //copy_kernel_S4 \n" | |||
| ".Lcopy_kernel_S1: \n" | |||
| "6: //copy_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Lcopy_kernel_L999 \n" | |||
| " ble 8f //copy_kernel_L999 \n" | |||
| ".Lcopy_kernel_S10: \n" | |||
| "7: //copy_kernel_S10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lcopy_kernel_S10 \n" | |||
| " bne 7b //copy_kernel_S10 \n" | |||
| ".Lcopy_kernel_L999: \n" | |||
| "8: //copy_kernel_L999: \n" | |||
| : | |||
| : [N_] "r" (n), //%1 | |||
| @@ -141,58 +141,58 @@ static FLOAT dasum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| " fmov d6, "REG0" \n" | |||
| " fmov d7, "REG0" \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| " cmp "INC_X", xzr \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Lasum_kernel_S_BEGIN \n" | |||
| " bne 5f //asum_kernel_S_BEGIN \n" | |||
| ".Lasum_kernel_F_BEGIN: \n" | |||
| "1: //asum_kernel_F_BEGIN: \n" | |||
| " asr "J", "N", #5 \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Lasum_kernel_F1 \n" | |||
| " beq 3f //asum_kernel_F1 \n" | |||
| ".align 5 \n" | |||
| ".Lasum_kernel_F32: \n" | |||
| "2: //asum_kernel_F32: \n" | |||
| " "KERNEL_F32" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_F32 \n" | |||
| " bne 2b //asum_kernel_F32 \n" | |||
| " "KERNEL_F32_FINALIZE" \n" | |||
| ".Lasum_kernel_F1: \n" | |||
| "3: //asum_kernel_F1: \n" | |||
| " ands "J", "N", #31 \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_F10: \n" | |||
| "4: //asum_kernel_F10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_F10 \n" | |||
| " b .Lasum_kernel_L999 \n" | |||
| " bne 4b //asum_kernel_F10 \n" | |||
| " b 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_S_BEGIN: \n" | |||
| "5: //asum_kernel_S_BEGIN: \n" | |||
| " "INIT_S" \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Lasum_kernel_S1 \n" | |||
| " ble 7f //asum_kernel_S1 \n" | |||
| ".Lasum_kernel_S4: \n" | |||
| "6: //asum_kernel_S4: \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_S4 \n" | |||
| " bne 6b //asum_kernel_S4 \n" | |||
| ".Lasum_kernel_S1: \n" | |||
| "7: //asum_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_S10: \n" | |||
| "8: //asum_kernel_S10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_S10 \n" | |||
| " bne 8b //asum_kernel_S10 \n" | |||
| ".Lasum_kernel_L999: \n" | |||
| "9: //asum_kernel_L999: \n" | |||
| " fmov %[ASUM_], "SUMF" \n" | |||
| : [ASUM_] "=r" (asum) //%0 | |||
| @@ -291,61 +291,61 @@ static RETURN_TYPE dot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, B | |||
| " fmov d6, xzr \n" | |||
| " fmov d7, xzr \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Ldot_kernel_L999 \n" | |||
| " ble 9f //dot_kernel_L999 \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Ldot_kernel_S_BEGIN \n" | |||
| " bne 5f //dot_kernel_S_BEGIN \n" | |||
| " cmp "INC_Y", #1 \n" | |||
| " bne .Ldot_kernel_S_BEGIN \n" | |||
| " bne 5f //dot_kernel_S_BEGIN \n" | |||
| ".Ldot_kernel_F_BEGIN: \n" | |||
| "1: //dot_kernel_F_BEGIN: \n" | |||
| " lsl "INC_X", "INC_X", "INC_SHIFT" \n" | |||
| " lsl "INC_Y", "INC_Y", "INC_SHIFT" \n" | |||
| " asr "J", "N", #"N_DIV_SHIFT" \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Ldot_kernel_F1 \n" | |||
| " beq 3f //dot_kernel_F1 \n" | |||
| " .align 5 \n" | |||
| ".Ldot_kernel_F: \n" | |||
| "2: //dot_kernel_F: \n" | |||
| " "KERNEL_F" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Ldot_kernel_F \n" | |||
| " bne 2b //dot_kernel_F \n" | |||
| " "KERNEL_F_FINALIZE" \n" | |||
| ".Ldot_kernel_F1: \n" | |||
| "3: //dot_kernel_F1: \n" | |||
| " ands "J", "N", #"N_REM_MASK" \n" | |||
| " ble .Ldot_kernel_L999 \n" | |||
| " ble 9f //dot_kernel_L999 \n" | |||
| ".Ldot_kernel_F10: \n" | |||
| "4: //dot_kernel_F10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Ldot_kernel_F10 \n" | |||
| " b .Ldot_kernel_L999 \n" | |||
| " bne 4b //dot_kernel_F10 \n" | |||
| " b 9f //dot_kernel_L999 \n" | |||
| ".Ldot_kernel_S_BEGIN: \n" | |||
| "5: //dot_kernel_S_BEGIN: \n" | |||
| " lsl "INC_X", "INC_X", "INC_SHIFT" \n" | |||
| " lsl "INC_Y", "INC_Y", "INC_SHIFT" \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Ldot_kernel_S1 \n" | |||
| " ble 7f //dot_kernel_S1 \n" | |||
| ".Ldot_kernel_S4: \n" | |||
| "6: //dot_kernel_S4: \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Ldot_kernel_S4 \n" | |||
| " bne 6b //dot_kernel_S4 \n" | |||
| ".Ldot_kernel_S1: \n" | |||
| "7: //dot_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Ldot_kernel_L999 \n" | |||
| " ble 9f //dot_kernel_L999 \n" | |||
| ".Ldot_kernel_S10: \n" | |||
| "8: //dot_kernel_S10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Ldot_kernel_S10 \n" | |||
| " bne 8b //dot_kernel_S10 \n" | |||
| ".Ldot_kernel_L999: \n" | |||
| "9: //dot_kernel_L999: \n" | |||
| " str "DOTF", [%[DOT_]] \n" | |||
| : | |||
| @@ -74,33 +74,33 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, | |||
| " fmov "SCALE", xzr \n" | |||
| " fmov "SSQ", #1.0 \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Lnrm2_kernel_L999 \n" | |||
| " ble 9f //nrm2_kernel_L999 \n" | |||
| " cmp "INC_X", xzr \n" | |||
| " ble .Lnrm2_kernel_L999 \n" | |||
| " ble 9f //nrm2_kernel_L999 \n" | |||
| ".Lnrm2_kernel_F_BEGIN: \n" | |||
| "1: //nrm2_kernel_F_BEGIN: \n" | |||
| " fmov "REGZERO", xzr \n" | |||
| " fmov "REGONE", #1.0 \n" | |||
| " lsl "INC_X", "INC_X", #"INC_SHIFT" \n" | |||
| " mov "J", "N" \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Lnrm2_kernel_L999 \n" | |||
| " beq 9f //nrm2_kernel_L999 \n" | |||
| ".Lnrm2_kernel_F_ZERO_SKIP: \n" | |||
| "2: //nrm2_kernel_F_ZERO_SKIP: \n" | |||
| " ldr d4, ["X"] \n" | |||
| " fcmp d4, "REGZERO" \n" | |||
| " bne .Lnrm2_kernel_F_INIT \n" | |||
| " bne 3f //nrm2_kernel_F_INIT \n" | |||
| #if defined(COMPLEX) | |||
| " ldr d4, ["X", #8] \n" | |||
| " fcmp d4, "REGZERO" \n" | |||
| " bne .Lnrm2_kernel_F_INIT_I \n" | |||
| " bne 4f //nrm2_kernel_F_INIT_I \n" | |||
| #endif | |||
| " add "X", "X", "INC_X" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " beq .Lnrm2_kernel_L999 \n" | |||
| " b .Lnrm2_kernel_F_ZERO_SKIP \n" | |||
| " beq 9f //nrm2_kernel_L999 \n" | |||
| " b 2b //nrm2_kernel_F_ZERO_SKIP \n" | |||
| ".Lnrm2_kernel_F_INIT: \n" | |||
| "3: //nrm2_kernel_F_INIT: \n" | |||
| " ldr d4, ["X"] \n" | |||
| " fabs d4, d4 \n" | |||
| " fmax "CUR_MAX", "SCALE", d4 \n" | |||
| @@ -112,7 +112,7 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, | |||
| " fadd "SSQ", "SSQ", d4 \n" | |||
| " fmov "SCALE", "CUR_MAX" \n" | |||
| #if defined(COMPLEX) | |||
| ".Lnrm2_kernel_F_INIT_I: \n" | |||
| "4: //nrm2_kernel_F_INIT_I: \n" | |||
| " ldr d3, ["X", #8] \n" | |||
| " fabs d3, d3 \n" | |||
| " fmax "CUR_MAX", "SCALE", d3 \n" | |||
| @@ -126,16 +126,16 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, | |||
| #endif | |||
| " add "X", "X", "INC_X" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " beq .Lnrm2_kernel_L999 \n" | |||
| " beq 9f //nrm2_kernel_L999 \n" | |||
| ".Lnrm2_kernel_F_START: \n" | |||
| "5: //nrm2_kernel_F_START: \n" | |||
| " cmp "INC_X", #"SZ" \n" | |||
| " bne .Lnrm2_kernel_F1 \n" | |||
| " bne 8f //nrm2_kernel_F1 \n" | |||
| " asr "K", "J", #4 \n" | |||
| " cmp "K", xzr \n" | |||
| " beq .Lnrm2_kernel_F1 \n" | |||
| " beq 8f //nrm2_kernel_F1 \n" | |||
| ".Lnrm2_kernel_F: \n" | |||
| "6: //nrm2_kernel_F: \n" | |||
| " ldp q16, q17, ["X"] \n" | |||
| " ldp q18, q19, ["X", #32] \n" | |||
| " ldp q20, q21, ["X", #64] \n" | |||
| @@ -255,13 +255,13 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, | |||
| " fmov "SCALE", "CUR_MAX" \n" | |||
| #endif | |||
| " subs "K", "K", #1 \n" | |||
| " bne .Lnrm2_kernel_F \n" | |||
| " bne 6b //nrm2_kernel_F \n" | |||
| ".Lnrm2_kernel_F_DONE: \n" | |||
| "7: //nrm2_kernel_F_DONE: \n" | |||
| " ands "J", "J", #15 \n" | |||
| " beq .Lnrm2_kernel_L999 \n" | |||
| " beq 9f //nrm2_kernel_L999 \n" | |||
| ".Lnrm2_kernel_F1: \n" | |||
| "8: //nrm2_kernel_F1: \n" | |||
| " ldr d4, ["X"] \n" | |||
| " fabs d4, d4 \n" | |||
| " fmax "CUR_MAX", "SCALE", d4 \n" | |||
| @@ -286,9 +286,9 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, | |||
| #endif | |||
| " add "X", "X", "INC_X" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lnrm2_kernel_F1 \n" | |||
| " bne 8b //nrm2_kernel_F1 \n" | |||
| ".Lnrm2_kernel_L999: \n" | |||
| "9: //nrm2_kernel_L999: \n" | |||
| " str "SSQ", [%[SSQ_]] \n" | |||
| " str "SCALE", [%[SCALE_]] \n" | |||
| @@ -208,7 +208,7 @@ extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n | |||
| #endif | |||
| static BLASLONG iamax_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| static BLASLONG __attribute__((noinline)) iamax_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| { | |||
| BLASLONG index = 0; | |||
| @@ -220,72 +220,72 @@ static BLASLONG iamax_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| " mov "INC_X", %[INCX_] \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Liamax_kernel_zero \n" | |||
| " ble 10f //iamax_kernel_zero \n" | |||
| " cmp "INC_X", xzr \n" | |||
| " ble .Liamax_kernel_zero \n" | |||
| " ble 10f //iamax_kernel_zero \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Liamax_kernel_S_BEGIN \n" | |||
| " bne 5f //iamax_kernel_S_BEGIN \n" | |||
| " mov x7, "X" \n" | |||
| ".Liamax_kernel_F_BEGIN: \n" | |||
| "1: //iamax_kernel_F_BEGIN: \n" | |||
| " "INIT" \n" | |||
| " subs "N", "N", #1 \n" | |||
| " ble .Liamax_kernel_L999 \n" | |||
| " ble 9f //iamax_kernel_L999 \n" | |||
| " asr "J", "N", #"N_DIV_SHIFT" \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Liamax_kernel_F1 \n" | |||
| " beq 3f //iamax_kernel_F1 \n" | |||
| " add "Z", "Z", #1 \n" | |||
| ".Liamax_kernel_F: \n" | |||
| "2: //iamax_kernel_F: \n" | |||
| " "KERNEL_F" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Liamax_kernel_F \n" | |||
| " bne 2b //iamax_kernel_F \n" | |||
| " "KERNEL_F_FINALIZE" \n" | |||
| " sub "Z", "Z", #1 \n" | |||
| ".Liamax_kernel_F1: \n" | |||
| "3: //iamax_kernel_F1: \n" | |||
| " ands "J", "N", #"N_REM_MASK" \n" | |||
| " ble .Liamax_kernel_L999 \n" | |||
| " ble 9f //iamax_kernel_L999 \n" | |||
| ".Liamax_kernel_F10: \n" | |||
| "4: //iamax_kernel_F10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Liamax_kernel_F10 \n" | |||
| " b .Liamax_kernel_L999 \n" | |||
| " bne 4b //iamax_kernel_F10 \n" | |||
| " b 9f //iamax_kernel_L999 \n" | |||
| ".Liamax_kernel_S_BEGIN: \n" | |||
| "5: //iamax_kernel_S_BEGIN: \n" | |||
| " "INIT" \n" | |||
| " subs "N", "N", #1 \n" | |||
| " ble .Liamax_kernel_L999 \n" | |||
| " ble 9f //iamax_kernel_L999 \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Liamax_kernel_S1 \n" | |||
| " ble 7f //iamax_kernel_S1 \n" | |||
| ".Liamax_kernel_S4: \n" | |||
| "6: //iamax_kernel_S4: \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Liamax_kernel_S4 \n" | |||
| " bne 6b //iamax_kernel_S4 \n" | |||
| ".Liamax_kernel_S1: \n" | |||
| "7: //iamax_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Liamax_kernel_L999 \n" | |||
| " ble 9f //iamax_kernel_L999 \n" | |||
| ".Liamax_kernel_S10: \n" | |||
| "8: //iamax_kernel_S10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Liamax_kernel_S10 \n" | |||
| " bne 8b //iamax_kernel_S10 \n" | |||
| ".Liamax_kernel_L999: \n" | |||
| "9: //iamax_kernel_L999: \n" | |||
| " mov x0, "INDEX" \n" | |||
| " b .Liamax_kernel_DONE \n" | |||
| " b 11f //iamax_kernel_DONE \n" | |||
| ".Liamax_kernel_zero: \n" | |||
| "10: //iamax_kernel_zero: \n" | |||
| " mov x0, xzr \n" | |||
| ".Liamax_kernel_DONE: \n" | |||
| "11: //iamax_kernel_DONE: \n" | |||
| " mov %[INDEX_], "INDEX" \n" | |||
| : [INDEX_] "=r" (index) //%0 | |||
| @@ -229,72 +229,72 @@ static BLASLONG izamax_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| " mov "INC_X", %[INCX_] \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Lizamax_kernel_zero \n" | |||
| " ble 10f //izamax_kernel_zero \n" | |||
| " cmp "INC_X", xzr \n" | |||
| " ble .Lizamax_kernel_zero \n" | |||
| " ble 10f //izamax_kernel_zero \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Lizamax_kernel_S_BEGIN \n" | |||
| " bne 5f //izamax_kernel_S_BEGIN \n" | |||
| " mov x7, "X" \n" | |||
| ".Lizamax_kernel_F_BEGIN: \n" | |||
| "1: //izamax_kernel_F_BEGIN: \n" | |||
| " "INIT" \n" | |||
| " subs "N", "N", #1 \n" | |||
| " ble .Lizamax_kernel_L999 \n" | |||
| " ble 9f //izamax_kernel_L999 \n" | |||
| " asr "J", "N", #"N_DIV_SHIFT" \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Lizamax_kernel_F1 \n" | |||
| " beq 3f //izamax_kernel_F1 \n" | |||
| " add "Z", "Z", #1 \n" | |||
| ".Lizamax_kernel_F: \n" | |||
| "2: //izamax_kernel_F: \n" | |||
| " "KERNEL_F" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lizamax_kernel_F \n" | |||
| " bne 2b //izamax_kernel_F \n" | |||
| " "KERNEL_F_FINALIZE" \n" | |||
| " sub "Z", "Z", #1 \n" | |||
| ".Lizamax_kernel_F1: \n" | |||
| "3: //izamax_kernel_F1: \n" | |||
| " ands "J", "N", #"N_REM_MASK" \n" | |||
| " ble .Lizamax_kernel_L999 \n" | |||
| " ble 9f //izamax_kernel_L999 \n" | |||
| ".Lizamax_kernel_F10: \n" | |||
| "4: //izamax_kernel_F10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lizamax_kernel_F10 \n" | |||
| " b .Lizamax_kernel_L999 \n" | |||
| " bne 4b //izamax_kernel_F10 \n" | |||
| " b 9f //izamax_kernel_L999 \n" | |||
| ".Lizamax_kernel_S_BEGIN: \n" | |||
| "5: //izamax_kernel_S_BEGIN: \n" | |||
| " "INIT" \n" | |||
| " subs "N", "N", #1 \n" | |||
| " ble .Lizamax_kernel_L999 \n" | |||
| " ble 9f //izamax_kernel_L999 \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Lizamax_kernel_S1 \n" | |||
| " ble 7f //izamax_kernel_S1 \n" | |||
| ".Lizamax_kernel_S4: \n" | |||
| "6: //izamax_kernel_S4: \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lizamax_kernel_S4 \n" | |||
| " bne 6b //izamax_kernel_S4 \n" | |||
| ".Lizamax_kernel_S1: \n" | |||
| "7: //izamax_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Lizamax_kernel_L999 \n" | |||
| " ble 9f //izamax_kernel_L999 \n" | |||
| ".Lizamax_kernel_S10: \n" | |||
| "8: //izamax_kernel_S10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lizamax_kernel_S10 \n" | |||
| " bne 8b //izamax_kernel_S10 \n" | |||
| ".Lizamax_kernel_L999: \n" | |||
| "9: //izamax_kernel_L999: \n" | |||
| " mov x0, "INDEX" \n" | |||
| " b .Lizamax_kernel_DONE \n" | |||
| " b 11f //izamax_kernel_DONE \n" | |||
| ".Lizamax_kernel_zero: \n" | |||
| "10: //izamax_kernel_zero: \n" | |||
| " mov x0, xzr \n" | |||
| ".Lizamax_kernel_DONE: \n" | |||
| "11: //izamax_kernel_DONE: \n" | |||
| " mov %[INDEX_], "INDEX" \n" | |||
| : [INDEX_] "=r" (index) //%0 | |||
| @@ -143,58 +143,58 @@ static FLOAT sasum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| " fmov s6, "REG0" \n" | |||
| " fmov s7, "REG0" \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| " cmp "INC_X", xzr \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Lasum_kernel_S_BEGIN \n" | |||
| " bne 5f //asum_kernel_S_BEGIN \n" | |||
| ".Lasum_kernel_F_BEGIN: \n" | |||
| "1: //asum_kernel_F_BEGIN: \n" | |||
| " asr "J", "N", #6 \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Lasum_kernel_F1 \n" | |||
| " beq 3f //asum_kernel_F1 \n" | |||
| ".align 5 \n" | |||
| ".Lasum_kernel_F64: \n" | |||
| "2: //asum_kernel_F64: \n" | |||
| " "KERNEL_F64" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_F64 \n" | |||
| " bne 2b //asum_kernel_F64 \n" | |||
| " "KERNEL_F64_FINALIZE" \n" | |||
| ".Lasum_kernel_F1: \n" | |||
| "3: //asum_kernel_F1: \n" | |||
| " ands "J", "N", #63 \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_F10: \n" | |||
| "4: //asum_kernel_F10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_F10 \n" | |||
| " b .Lasum_kernel_L999 \n" | |||
| " bne 4b //asum_kernel_F10 \n" | |||
| " b 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_S_BEGIN: \n" | |||
| "5: //asum_kernel_S_BEGIN: \n" | |||
| " "INIT_S" \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Lasum_kernel_S1 \n" | |||
| " ble 7f //asum_kernel_S1 \n" | |||
| ".Lasum_kernel_S4: \n" | |||
| "6: //asum_kernel_S4: \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_S4 \n" | |||
| " bne 6b //asum_kernel_S4 \n" | |||
| ".Lasum_kernel_S1: \n" | |||
| "7: //asum_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_S10: \n" | |||
| "8: //asum_kernel_S10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_S10 \n" | |||
| " bne 8b //asum_kernel_S10 \n" | |||
| ".Lasum_kernel_L999: \n" | |||
| "9: //asum_kernel_L999: \n" | |||
| " fmov %[ASUM_], "SUMFD" \n" | |||
| : [ASUM_] "=r" (asum) //%0 | |||
| @@ -227,58 +227,58 @@ static double nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| " fmov d6, xzr \n" | |||
| " fmov d7, xzr \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Lnrm2_kernel_L999 \n" | |||
| " ble 9f //nrm2_kernel_L999 \n" | |||
| " cmp "INC_X", xzr \n" | |||
| " ble .Lnrm2_kernel_L999 \n" | |||
| " ble 9f //nrm2_kernel_L999 \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Lnrm2_kernel_S_BEGIN \n" | |||
| " bne 5f //nrm2_kernel_S_BEGIN \n" | |||
| ".Lnrm2_kernel_F_BEGIN: \n" | |||
| "1: //nrm2_kernel_F_BEGIN: \n" | |||
| " asr "J", "N", #"N_DIV_SHIFT" \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Lnrm2_kernel_S_BEGIN \n" | |||
| " beq 5f //nrm2_kernel_S_BEGIN \n" | |||
| " .align 5 \n" | |||
| ".Lnrm2_kernel_F: \n" | |||
| "2: //nrm2_kernel_F: \n" | |||
| " "KERNEL_F" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lnrm2_kernel_F \n" | |||
| " bne 2b //nrm2_kernel_F \n" | |||
| " "KERNEL_F_FINALIZE" \n" | |||
| ".Lnrm2_kernel_F1: \n" | |||
| "3: //nrm2_kernel_F1: \n" | |||
| " ands "J", "N", #"N_REM_MASK" \n" | |||
| " ble .Lnrm2_kernel_L999 \n" | |||
| " ble 9f //nrm2_kernel_L999 \n" | |||
| ".Lnrm2_kernel_F10: \n" | |||
| "4: //nrm2_kernel_F10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lnrm2_kernel_F10 \n" | |||
| " b .Lnrm2_kernel_L999 \n" | |||
| " bne 4b //nrm2_kernel_F10 \n" | |||
| " b 9f //nrm2_kernel_L999 \n" | |||
| ".Lnrm2_kernel_S_BEGIN: \n" | |||
| "5: //nrm2_kernel_S_BEGIN: \n" | |||
| " lsl "INC_X", "INC_X", #"INC_SHIFT" \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Lnrm2_kernel_S1 \n" | |||
| " ble 7f //nrm2_kernel_S1 \n" | |||
| ".Lnrm2_kernel_S4: \n" | |||
| "6: //nrm2_kernel_S4: \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lnrm2_kernel_S4 \n" | |||
| " bne 6b //nrm2_kernel_S4 \n" | |||
| ".Lnrm2_kernel_S1: \n" | |||
| "7: //nrm2_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Lnrm2_kernel_L999 \n" | |||
| " ble 9f //nrm2_kernel_L999 \n" | |||
| ".Lnrm2_kernel_S10: \n" | |||
| "8: //nrm2_kernel_S10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lnrm2_kernel_S10 \n" | |||
| " bne 8b //nrm2_kernel_S10 \n" | |||
| ".Lnrm2_kernel_L999: \n" | |||
| "9: //nrm2_kernel_L999: \n" | |||
| " "KERNEL_FINALIZE" \n" | |||
| " fmov %[RET_], "SSQD" \n" | |||
| @@ -143,58 +143,58 @@ static FLOAT zasum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||
| " fmov d6, "REG0" \n" | |||
| " fmov d7, "REG0" \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| " cmp "INC_X", xzr \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Lasum_kernel_S_BEGIN \n" | |||
| " bne 5f //asum_kernel_S_BEGIN \n" | |||
| ".Lasum_kernel_F_BEGIN: \n" | |||
| "1: //asum_kernel_F_BEGIN: \n" | |||
| " asr "J", "N", #4 \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Lasum_kernel_F1 \n" | |||
| " beq 3f //asum_kernel_F1 \n" | |||
| ".align 5 \n" | |||
| ".Lasum_kernel_F16: \n" | |||
| "2: //asum_kernel_F16: \n" | |||
| " "KERNEL_F16" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_F16 \n" | |||
| " bne 2b //asum_kernel_F16 \n" | |||
| " "KERNEL_F16_FINALIZE" \n" | |||
| ".Lasum_kernel_F1: \n" | |||
| "3: //asum_kernel_F1: \n" | |||
| " ands "J", "N", #15 \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_F10: \n" | |||
| "4: //asum_kernel_F10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_F10 \n" | |||
| " b .Lasum_kernel_L999 \n" | |||
| " bne 4b //asum_kernel_F10 \n" | |||
| " b 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_S_BEGIN: \n" | |||
| "5: //asum_kernel_S_BEGIN: \n" | |||
| " "INIT_S" \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Lasum_kernel_S1 \n" | |||
| " ble 7f //asum_kernel_S1 \n" | |||
| ".Lasum_kernel_S4: \n" | |||
| "6: //asum_kernel_S4: \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_S4 \n" | |||
| " bne 6b //asum_kernel_S4 \n" | |||
| ".Lasum_kernel_S1: \n" | |||
| "7: //asum_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Lasum_kernel_L999 \n" | |||
| " ble 9f //asum_kernel_L999 \n" | |||
| ".Lasum_kernel_S10: \n" | |||
| "8: //asum_kernel_S10: \n" | |||
| " "KERNEL_S1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Lasum_kernel_S10 \n" | |||
| " bne 8b //asum_kernel_S10 \n" | |||
| ".Lasum_kernel_L999: \n" | |||
| "9: //asum_kernel_L999: \n" | |||
| " fmov %[ASUM_], "SUMF" \n" | |||
| : [ASUM_] "=r" (asum) //%0 | |||
| @@ -218,61 +218,61 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON | |||
| " fmov d6, xzr \n" | |||
| " fmov d7, xzr \n" | |||
| " cmp "N", xzr \n" | |||
| " ble .Ldot_kernel_L999 \n" | |||
| " ble 9f //dot_kernel_L999 \n" | |||
| " cmp "INC_X", #1 \n" | |||
| " bne .Ldot_kernel_S_BEGIN \n" | |||
| " bne 5f //dot_kernel_S_BEGIN \n" | |||
| " cmp "INC_Y", #1 \n" | |||
| " bne .Ldot_kernel_S_BEGIN \n" | |||
| " bne 5f //dot_kernel_S_BEGIN \n" | |||
| ".Ldot_kernel_F_BEGIN: \n" | |||
| "1: //dot_kernel_F_BEGIN: \n" | |||
| " lsl "INC_X", "INC_X", "INC_SHIFT" \n" | |||
| " lsl "INC_Y", "INC_Y", "INC_SHIFT" \n" | |||
| " asr "J", "N", #"N_DIV_SHIFT" \n" | |||
| " cmp "J", xzr \n" | |||
| " beq .Ldot_kernel_F1 \n" | |||
| " beq 3f //dot_kernel_F1 \n" | |||
| " .align 5 \n" | |||
| ".Ldot_kernel_F: \n" | |||
| "2: //dot_kernel_F: \n" | |||
| " "KERNEL_F" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Ldot_kernel_F \n" | |||
| " bne 2b //dot_kernel_F \n" | |||
| " "KERNEL_F_FINALIZE" \n" | |||
| ".Ldot_kernel_F1: \n" | |||
| "3: //dot_kernel_F1: \n" | |||
| " ands "J", "N", #"N_REM_MASK" \n" | |||
| " ble .Ldot_kernel_L999 \n" | |||
| " ble 9f //dot_kernel_L999 \n" | |||
| ".Ldot_kernel_F10: \n" | |||
| "4: //dot_kernel_F10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Ldot_kernel_F10 \n" | |||
| " b .Ldot_kernel_L999 \n" | |||
| " bne 4b //dot_kernel_F10 \n" | |||
| " b 9f //dot_kernel_L999 \n" | |||
| ".Ldot_kernel_S_BEGIN: \n" | |||
| "5: //dot_kernel_S_BEGIN: \n" | |||
| " lsl "INC_X", "INC_X", "INC_SHIFT" \n" | |||
| " lsl "INC_Y", "INC_Y", "INC_SHIFT" \n" | |||
| " asr "J", "N", #2 \n" | |||
| " cmp "J", xzr \n" | |||
| " ble .Ldot_kernel_S1 \n" | |||
| " ble 7f //dot_kernel_S1 \n" | |||
| ".Ldot_kernel_S4: \n" | |||
| "6: //dot_kernel_S4: \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Ldot_kernel_S4 \n" | |||
| " bne 6b //dot_kernel_S4 \n" | |||
| ".Ldot_kernel_S1: \n" | |||
| "7: //dot_kernel_S1: \n" | |||
| " ands "J", "N", #3 \n" | |||
| " ble .Ldot_kernel_L999 \n" | |||
| " ble 9f //dot_kernel_L999 \n" | |||
| ".Ldot_kernel_S10: \n" | |||
| "8: //dot_kernel_S10: \n" | |||
| " "KERNEL_F1" \n" | |||
| " subs "J", "J", #1 \n" | |||
| " bne .Ldot_kernel_S10 \n" | |||
| " bne 8b //dot_kernel_S10 \n" | |||
| ".Ldot_kernel_L999: \n" | |||
| "9: //dot_kernel_L999: \n" | |||
| " str "DOTF", [%[DOTR_]] \n" | |||
| " str "DOTI", [%[DOTI_]] \n" | |||
| @@ -1,5 +1,6 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| set(LAPACK_SOURCES | |||
| @@ -1,4 +1,5 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| enable_language(Fortran) | |||
| @@ -35,4 +36,4 @@ add_test(NAME "${float_type}blas2" | |||
| COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat2" "${PROJECT_SOURCE_DIR}/test/${float_type}blat2.dat" ${float_type_upper}BLAT2.SUMM) | |||
| add_test(NAME "${float_type}blas3" | |||
| COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat3" "${PROJECT_SOURCE_DIR}/test/${float_type}blat3.dat" ${float_type_upper}BLAT3.SUMM) | |||
| endforeach() | |||
| endforeach() | |||
| @@ -1,4 +1,5 @@ | |||
| include_directories(${PROJECT_SOURCE_DIR}) | |||
| include_directories(${PROJECT_BINARY_DIR}) | |||
| set(OpenBLAS_utest_src | |||
| utest_main.c | |||
| @@ -39,4 +40,4 @@ add_custom_command(TARGET ${OpenBLAS_utest_bin} | |||
| ) | |||
| endif() | |||
| add_test(${OpenBLAS_utest_bin} ${CMAKE_CURRENT_BINARY_DIR}/${OpenBLAS_utest_bin}) | |||
| add_test(${OpenBLAS_utest_bin} ${CMAKE_CURRENT_BINARY_DIR}/${OpenBLAS_utest_bin}) | |||