Update from develop for 0.3.4tags/v0.3.4
| @@ -4,11 +4,10 @@ dist: precise | |||||
| sudo: true | sudo: true | ||||
| language: c | language: c | ||||
| jobs: | |||||
| matrix: | |||||
| include: | include: | ||||
| - &test-ubuntu | - &test-ubuntu | ||||
| os: linux | os: linux | ||||
| stage: test | |||||
| compiler: gcc | compiler: gcc | ||||
| addons: | addons: | ||||
| apt: | apt: | ||||
| @@ -59,7 +58,6 @@ jobs: | |||||
| - BTYPE="BINARY=32" | - BTYPE="BINARY=32" | ||||
| - os: linux | - os: linux | ||||
| stage: test | |||||
| compiler: gcc | compiler: gcc | ||||
| addons: | addons: | ||||
| apt: | apt: | ||||
| @@ -80,13 +78,12 @@ jobs: | |||||
| # that don't require sudo. | # that don't require sudo. | ||||
| - &test-alpine | - &test-alpine | ||||
| os: linux | os: linux | ||||
| stage: test | |||||
| dist: trusty | dist: trusty | ||||
| sudo: true | sudo: true | ||||
| language: minimal | language: minimal | ||||
| before_install: | before_install: | ||||
| - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.6.0/alpine-chroot-install' \ | |||||
| && echo 'a827a4ba3d0817e7c88bae17fe34e50204983d1e alpine-chroot-install' | sha1sum -c || exit 1" | |||||
| - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \ | |||||
| && echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1" | |||||
| - alpine() { /alpine/enter-chroot -u "$USER" "$@"; } | - alpine() { /alpine/enter-chroot -u "$USER" "$@"; } | ||||
| install: | install: | ||||
| - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers' | - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers' | ||||
| @@ -124,7 +121,6 @@ jobs: | |||||
| - &test-cmake | - &test-cmake | ||||
| os: linux | os: linux | ||||
| stage: test | |||||
| compiler: clang | compiler: clang | ||||
| addons: | addons: | ||||
| apt: | apt: | ||||
| @@ -153,7 +149,6 @@ jobs: | |||||
| - &test-macos | - &test-macos | ||||
| os: osx | os: osx | ||||
| stage: test | |||||
| osx_image: xcode8 | osx_image: xcode8 | ||||
| before_script: | before_script: | ||||
| - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" | - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" | ||||
| @@ -168,6 +163,42 @@ jobs: | |||||
| env: | env: | ||||
| - BTYPE="BINARY=32" | - BTYPE="BINARY=32" | ||||
| - &emulated-arm | |||||
| dist: trusty | |||||
| sudo: required | |||||
| services: docker | |||||
| env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=gcc | |||||
| name: "Emulated Build for ARMV6 with gcc" | |||||
| before_install: sudo docker run --rm --privileged multiarch/qemu-user-static:register --reset | |||||
| script: | | |||||
| echo "FROM openblas/alpine:${IMAGE_ARCH} | |||||
| COPY . /tmp/openblas | |||||
| RUN mkdir /tmp/openblas/build && \ | |||||
| cd /tmp/openblas/build && \ | |||||
| CC=${COMPILER} cmake -D DYNAMIC_ARCH=OFF \ | |||||
| -D TARGET=${TARGET_ARCH} \ | |||||
| -D BUILD_SHARED_LIBS=ON \ | |||||
| -D BUILD_WITHOUT_LAPACK=ON \ | |||||
| -D BUILD_WITHOUT_CBLAS=ON \ | |||||
| -D CMAKE_BUILD_TYPE=Release ../ && \ | |||||
| cmake --build ." > Dockerfile | |||||
| docker build . | |||||
| - <<: *emulated-arm | |||||
| env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=clang | |||||
| name: "Emulated Build for ARMV6 with clang" | |||||
| - <<: *emulated-arm | |||||
| env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=gcc | |||||
| name: "Emulated Build for ARMV8 with gcc" | |||||
| - <<: *emulated-arm | |||||
| env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=clang | |||||
| name: "Emulated Build for ARMV8 with clang" | |||||
| allow_failures: | |||||
| - env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=gcc | |||||
| - env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=clang | |||||
| - env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=gcc | |||||
| - env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=clang | |||||
| # whitelist | # whitelist | ||||
| branches: | branches: | ||||
| only: | only: | ||||
| @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) | |||||
| project(OpenBLAS C ASM) | project(OpenBLAS C ASM) | ||||
| set(OpenBLAS_MAJOR_VERSION 0) | set(OpenBLAS_MAJOR_VERSION 0) | ||||
| set(OpenBLAS_MINOR_VERSION 3) | set(OpenBLAS_MINOR_VERSION 3) | ||||
| set(OpenBLAS_PATCH_VERSION 3) | |||||
| set(OpenBLAS_PATCH_VERSION 4) | |||||
| set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | ||||
| # Adhere to GNU filesystem layout conventions | # Adhere to GNU filesystem layout conventions | ||||
| @@ -15,16 +15,21 @@ include(GNUInstallDirs) | |||||
| include(CMakePackageConfigHelpers) | include(CMakePackageConfigHelpers) | ||||
| set(OpenBLAS_LIBNAME openblas) | |||||
| ####### | ####### | ||||
| if(MSVC) | if(MSVC) | ||||
| option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) | |||||
| option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) | |||||
| endif() | endif() | ||||
| option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF) | |||||
| option(DYNAMIC_ARCH "Build with DYNAMIC_ARCH" OFF) | |||||
| option(DYNAMIC_OLDER "Support older cpus with DYNAMIC_ARCH" OFF) | |||||
| option(BUILD_RELAPACK "Build with ReLAPACK (recursive LAPACK" OFF) | |||||
| option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF) | |||||
| option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64 only)" OFF) | |||||
| option(DYNAMIC_OLDER "Include specific support for older cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF) | |||||
| option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF) | |||||
| # Add a prefix or suffix to all exported symbol names in the shared library. | |||||
| # Avoids conflicts with other BLAS libraries, especially when using | |||||
| # 64 bit integer interfaces in OpenBLAS. | |||||
| set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" ) | |||||
| set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" ) | |||||
| ####### | ####### | ||||
| if(BUILD_WITHOUT_LAPACK) | if(BUILD_WITHOUT_LAPACK) | ||||
| set(NO_LAPACK 1) | set(NO_LAPACK 1) | ||||
| @@ -38,11 +43,13 @@ endif() | |||||
| ####### | ####### | ||||
| message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") | |||||
| message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.") | |||||
| include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake") | include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake") | ||||
| include("${PROJECT_SOURCE_DIR}/cmake/system.cmake") | include("${PROJECT_SOURCE_DIR}/cmake/system.cmake") | ||||
| set(OpenBLAS_LIBNAME openblas${SUFFIX64_UNDERSCORE}) | |||||
| set(BLASDIRS interface driver/level2 driver/level3 driver/others) | set(BLASDIRS interface driver/level2 driver/level3 driver/others) | ||||
| if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
| @@ -210,15 +217,84 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES | |||||
| SOVERSION ${OpenBLAS_MAJOR_VERSION} | SOVERSION ${OpenBLAS_MAJOR_VERSION} | ||||
| ) | ) | ||||
| if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "") | |||||
| if (NOT DEFINED ARCH) | |||||
| set(ARCH_IN "x86_64") | |||||
| else() | |||||
| set(ARCH_IN ${ARCH}) | |||||
| endif() | |||||
| if (${CORE} STREQUAL "generic") | |||||
| set(ARCH_IN "GENERIC") | |||||
| endif () | |||||
| if (NOT DEFINED EXPRECISION) | |||||
| set(EXPRECISION_IN 0) | |||||
| else() | |||||
| set(EXPRECISION_IN ${EXPRECISION}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_CBLAS) | |||||
| set(NO_CBLAS_IN 0) | |||||
| else() | |||||
| set(NO_CBLAS_IN ${NO_CBLAS}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_LAPACK) | |||||
| set(NO_LAPACK_IN 0) | |||||
| else() | |||||
| set(NO_LAPACK_IN ${NO_LAPACK}) | |||||
| endif() | |||||
| if (NOT DEFINED NO_LAPACKE) | |||||
| set(NO_LAPACKE_IN 0) | |||||
| else() | |||||
| set(NO_LAPACKE_IN ${NO_LAPACKE}) | |||||
| endif() | |||||
| if (NOT DEFINED NEED2UNDERSCORES) | |||||
| set(NEED2UNDERSCORES_IN 0) | |||||
| else() | |||||
| set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES}) | |||||
| endif() | |||||
| if (NOT DEFINED ONLY_CBLAS) | |||||
| set(ONLY_CBLAS_IN 0) | |||||
| else() | |||||
| set(ONLY_CBLAS_IN ${ONLY_CBLAS}) | |||||
| endif() | |||||
| if (NOT DEFINED BU) | |||||
| set(BU _) | |||||
| endif() | |||||
| if (NOT ${SYMBOLPREFIX} STREQUAL "") | |||||
| message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}") | |||||
| endif() | |||||
| if (NOT ${SYMBOLSUFFIX} STREQUAL "") | |||||
| message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}") | |||||
| endif() | |||||
| add_custom_command(TARGET ${OpenBLAS_LIBNAME} POST_BUILD | |||||
| COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def | |||||
| COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so | |||||
| COMMENT "renaming symbols" | |||||
| ) | |||||
| endif() | |||||
| # Install project | # Install project | ||||
| # Install libraries | # Install libraries | ||||
| install(TARGETS ${OpenBLAS_LIBNAME} | install(TARGETS ${OpenBLAS_LIBNAME} | ||||
| EXPORT "OpenBLASTargets" | |||||
| EXPORT "OpenBLAS${SUFFIX64}Targets" | |||||
| RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | ||||
| ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} | ||||
| LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) | ||||
| # Install headers | |||||
| set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64}) | |||||
| set(CMAKE_INSTALL_FULL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) | |||||
| message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}") | message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}") | ||||
| set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h) | set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h) | ||||
| @@ -266,29 +342,31 @@ if(NOT NO_LAPACKE) | |||||
| ADD_CUSTOM_TARGET(genlapacke | ADD_CUSTOM_TARGET(genlapacke | ||||
| COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h" | COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h" | ||||
| ) | ) | ||||
| install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||||
| install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64}) | |||||
| endif() | endif() | ||||
| include(FindPkgConfig QUIET) | include(FindPkgConfig QUIET) | ||||
| if(PKG_CONFIG_FOUND) | if(PKG_CONFIG_FOUND) | ||||
| configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY) | |||||
| install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/) | |||||
| configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY) | |||||
| install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/) | |||||
| endif() | endif() | ||||
| # GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share". | # GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share". | ||||
| set(PN OpenBLAS) | set(PN OpenBLAS) | ||||
| set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}") | |||||
| set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}${SUFFIX64}") | |||||
| configure_package_config_file(cmake/${PN}Config.cmake.in | configure_package_config_file(cmake/${PN}Config.cmake.in | ||||
| "${CMAKE_CURRENT_BINARY_DIR}/${PN}Config.cmake" | |||||
| "${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake" | |||||
| INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | ||||
| write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake | write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake | ||||
| VERSION ${${PN}_VERSION} | VERSION ${${PN}_VERSION} | ||||
| COMPATIBILITY AnyNewerVersion) | COMPATIBILITY AnyNewerVersion) | ||||
| install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}Config.cmake | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake | |||||
| install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake | |||||
| DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | |||||
| install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake | |||||
| RENAME ${PN}${SUFFIX64}ConfigVersion.cmake | |||||
| DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | ||||
| install(EXPORT "${PN}Targets" | |||||
| NAMESPACE "${PN}::" | |||||
| install(EXPORT "${PN}${SUFFIX64}Targets" | |||||
| NAMESPACE "${PN}${SUFFIX64}::" | |||||
| DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | ||||
| @@ -251,7 +251,7 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||||
| -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| @@ -4,22 +4,37 @@ CCOMMON_OPT += -march=armv8-a | |||||
| FCOMMON_OPT += -march=armv8-a | FCOMMON_OPT += -march=armv8-a | ||||
| endif | endif | ||||
| ifeq ($(CORE), CORTEXA53) | |||||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||||
| endif | |||||
| ifeq ($(CORE), CORTEXA57) | ifeq ($(CORE), CORTEXA57) | ||||
| CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||||
| FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57 | |||||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57 | |||||
| endif | |||||
| ifeq ($(CORE), CORTEXA72) | |||||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||||
| endif | endif | ||||
| ifeq ($(CORE), VULCAN) | |||||
| CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||||
| FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||||
| ifeq ($(CORE), CORTEXA73) | |||||
| CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||||
| FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||||
| endif | endif | ||||
| ifeq ($(CORE), THUNDERX) | ifeq ($(CORE), THUNDERX) | ||||
| CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx | |||||
| FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx | |||||
| CCOMMON_OPT += -march=armv8-a -mtune=thunderx | |||||
| FCOMMON_OPT += -march=armv8-a -mtune=thunderx | |||||
| endif | |||||
| ifeq ($(CORE), FALKOR) | |||||
| CCOMMON_OPT += -march=armv8.1-a -mtune=falkor | |||||
| FCOMMON_OPT += -march=armv8.1-a -mtune=falkor | |||||
| endif | endif | ||||
| ifeq ($(CORE), THUNDERX2T99) | ifeq ($(CORE), THUNDERX2T99) | ||||
| CCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99 | |||||
| FCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99 | |||||
| CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||||
| FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||||
| endif | endif | ||||
| @@ -48,6 +48,7 @@ ifndef NO_CBLAS | |||||
| @sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h" | @sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h" | ||||
| endif | endif | ||||
| ifneq ($(OSNAME), AIX) | |||||
| ifndef NO_LAPACKE | ifndef NO_LAPACKE | ||||
| @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | ||||
| @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" | @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" | ||||
| @@ -72,6 +73,7 @@ ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku)) | |||||
| ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ | ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ | ||||
| ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | ||||
| endif | endif | ||||
| ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly)) | ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly)) | ||||
| @cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | @cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | ||||
| @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | ||||
| @@ -93,6 +95,33 @@ ifeq ($(OSNAME), CYGWIN_NT) | |||||
| endif | endif | ||||
| endif | endif | ||||
| else | |||||
| #install on AIX has different options syntax | |||||
| ifndef NO_LAPACKE | |||||
| @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||||
| @-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" | |||||
| @-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h" | |||||
| @-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h" | |||||
| @-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h" | |||||
| endif | |||||
| #for install static library | |||||
| ifndef NO_STATIC | |||||
| @echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
| @installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
| @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||||
| ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | |||||
| endif | |||||
| #for install shared library | |||||
| ifndef NO_SHARED | |||||
| @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
| @installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
| @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||||
| ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ | |||||
| ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | |||||
| endif | |||||
| endif | |||||
| #Generating openblas.pc | #Generating openblas.pc | ||||
| @echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" | @echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" | ||||
| @@ -3,7 +3,7 @@ | |||||
| # | # | ||||
| # This library's version | # This library's version | ||||
| VERSION = 0.3.3 | |||||
| VERSION = 0.3.4 | |||||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
| @@ -107,13 +107,13 @@ BUILD_LAPACK_DEPRECATED = 1 | |||||
| # BUILD_RELAPACK = 1 | # BUILD_RELAPACK = 1 | ||||
| # If you want to use legacy threaded Level 3 implementation. | # If you want to use legacy threaded Level 3 implementation. | ||||
| USE_SIMPLE_THREADED_LEVEL3 = 1 | |||||
| # USE_SIMPLE_THREADED_LEVEL3 = 1 | |||||
| # If you want to use the new, still somewhat experimental code that uses | # If you want to use the new, still somewhat experimental code that uses | ||||
| # thread-local storage instead of a central memory buffer in memory.c | # thread-local storage instead of a central memory buffer in memory.c | ||||
| # Note that if your system uses GLIBC, it needs to have at least glibc 2.21 | # Note that if your system uses GLIBC, it needs to have at least glibc 2.21 | ||||
| # for this to work. | # for this to work. | ||||
| USE_TLS = 1 | |||||
| # USE_TLS = 1 | |||||
| # If you want to drive whole 64bit region by BLAS. Not all Fortran | # If you want to drive whole 64bit region by BLAS. Not all Fortran | ||||
| # compiler supports this. It's safe to keep comment it out if you | # compiler supports this. It's safe to keep comment it out if you | ||||
| @@ -152,6 +152,9 @@ NO_AFFINITY = 1 | |||||
| # FUNCTION_PROFILE = 1 | # FUNCTION_PROFILE = 1 | ||||
| # Support for IEEE quad precision(it's *real* REAL*16)( under testing) | # Support for IEEE quad precision(it's *real* REAL*16)( under testing) | ||||
| # This option should not be used - it is a holdover from unfinished code present | |||||
| # in the original GotoBLAS2 library that may be usable as a starting point but | |||||
| # is not even expected to compile in its present form. | |||||
| # QUAD_PRECISION = 1 | # QUAD_PRECISION = 1 | ||||
| # Theads are still working for a while after finishing BLAS operation | # Theads are still working for a while after finishing BLAS operation | ||||
| @@ -189,8 +192,8 @@ NO_AFFINITY = 1 | |||||
| # Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT | # Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT | ||||
| # COMMON_OPT = -O2 | # COMMON_OPT = -O2 | ||||
| # gfortran option for LAPACK | |||||
| # enable this flag only on 64bit Linux and if you need a thread safe lapack library | |||||
| # gfortran option for LAPACK to improve thread-safety | |||||
| # It is enabled by default in Makefile.system for gfortran | |||||
| # Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT | # Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT | ||||
| # FCOMMON_OPT = -frecursive | # FCOMMON_OPT = -frecursive | ||||
| @@ -9,6 +9,11 @@ ifndef TOPDIR | |||||
| TOPDIR = . | TOPDIR = . | ||||
| endif | endif | ||||
| # Catch conflicting usage of ARCH in some BSD environments | |||||
| ifeq ($(ARCH), amd64) | |||||
| override ARCH=x86_64 | |||||
| endif | |||||
| NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib | NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib | ||||
| # Default C compiler | # Default C compiler | ||||
| @@ -505,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT) | |||||
| #CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)' | #CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)' | ||||
| endif | endif | ||||
| ifeq ($(ARCH), arm64) | |||||
| DYNAMIC_CORE = ARMV8 | |||||
| DYNAMIC_CORE += CORTEXA57 | |||||
| DYNAMIC_CORE += THUNDERX | |||||
| DYNAMIC_CORE += THUNDERX2T99 | |||||
| endif | |||||
| # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty | # If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty | ||||
| ifndef DYNAMIC_CORE | ifndef DYNAMIC_CORE | ||||
| override DYNAMIC_ARCH= | override DYNAMIC_ARCH= | ||||
| @@ -713,6 +725,8 @@ endif | |||||
| ifeq ($(F_COMPILER), GFORTRAN) | ifeq ($(F_COMPILER), GFORTRAN) | ||||
| CCOMMON_OPT += -DF_INTERFACE_GFORT | CCOMMON_OPT += -DF_INTERFACE_GFORT | ||||
| FCOMMON_OPT += -Wall | FCOMMON_OPT += -Wall | ||||
| # make single-threaded LAPACK calls thread-safe #1847 | |||||
| FCOMMON_OPT += -frecursive | |||||
| #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | ||||
| ifneq ($(NO_LAPACK), 1) | ifneq ($(NO_LAPACK), 1) | ||||
| EXTRALIB += -lgfortran | EXTRALIB += -lgfortran | ||||
| @@ -1022,6 +1036,8 @@ ifdef USE_TLS | |||||
| CCOMMON_OPT += -DUSE_TLS | CCOMMON_OPT += -DUSE_TLS | ||||
| endif | endif | ||||
| CCOMMON_OPT += -DVERSION=\"$(VERSION)\" | |||||
| ifndef SYMBOLPREFIX | ifndef SYMBOLPREFIX | ||||
| SYMBOLPREFIX = | SYMBOLPREFIX = | ||||
| endif | endif | ||||
| @@ -1199,7 +1215,11 @@ endif | |||||
| LIBDLLNAME = $(LIBPREFIX).dll | LIBDLLNAME = $(LIBPREFIX).dll | ||||
| IMPLIBNAME = lib$(LIBNAMEBASE).dll.a | IMPLIBNAME = lib$(LIBNAMEBASE).dll.a | ||||
| ifneq ($(OSNAME), AIX) | |||||
| LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so) | LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so) | ||||
| else | |||||
| LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a) | |||||
| endif | |||||
| LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib) | LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib) | ||||
| LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def) | LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def) | ||||
| LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp) | LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp) | ||||
| @@ -15,6 +15,11 @@ FCOMMON_OPT += -march=skylake-avx512 | |||||
| ifeq ($(OSNAME), CYGWIN_NT) | ifeq ($(OSNAME), CYGWIN_NT) | ||||
| CCOMMON_OPT += -fno-asynchronous-unwind-tables | CCOMMON_OPT += -fno-asynchronous-unwind-tables | ||||
| endif | endif | ||||
| ifeq ($(OSNAME), WINNT) | |||||
| ifeq ($(C_COMPILER), GCC) | |||||
| CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||||
| endif | |||||
| endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -83,8 +83,11 @@ ARMV5 | |||||
| 8.ARM 64-bit CPU: | 8.ARM 64-bit CPU: | ||||
| ARMV8 | ARMV8 | ||||
| CORTEXA53 | |||||
| CORTEXA57 | CORTEXA57 | ||||
| VULCAN | |||||
| CORTEXA72 | |||||
| CORTEXA73 | |||||
| FALKOR | |||||
| THUNDERX | THUNDERX | ||||
| THUNDERX2T99 | THUNDERX2T99 | ||||
| @@ -205,7 +205,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/); | |||||
| $no_avx512= 0; | $no_avx512= 0; | ||||
| if (($architecture eq "x86") || ($architecture eq "x86_64")) { | if (($architecture eq "x86") || ($architecture eq "x86_64")) { | ||||
| $code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; | $code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; | ||||
| print $tmpf "int main(void){ __asm__ volatile($code); }\n"; | |||||
| print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n"; | |||||
| $args = " -march=skylake-avx512 -o $tmpf.o -x c $tmpf"; | $args = " -march=skylake-avx512 -o $tmpf.o -x c $tmpf"; | ||||
| my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null"); | my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null"); | ||||
| system(@cmd) == 0; | system(@cmd) == 0; | ||||
| @@ -51,7 +51,8 @@ typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=1 | |||||
| typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO; | typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO; | ||||
| typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG; | typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG; | ||||
| typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE; | typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE; | ||||
| typedef CBLAS_ORDER CBLAS_LAYOUT; | |||||
| float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | ||||
| double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | ||||
| float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy); | ||||
| @@ -3,6 +3,11 @@ | |||||
| ## Description: Ported from portion of OpenBLAS/Makefile.system | ## Description: Ported from portion of OpenBLAS/Makefile.system | ||||
| ## Sets Fortran related variables. | ## Sets Fortran related variables. | ||||
| if (INTERFACE64) | |||||
| set(SUFFIX64 64) | |||||
| set(SUFFIX64_UNDERSCORE _64) | |||||
| endif() | |||||
| if (${F_COMPILER} STREQUAL "FLANG") | if (${F_COMPILER} STREQUAL "FLANG") | ||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG") | set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG") | ||||
| if (BINARY64 AND INTERFACE64) | if (BINARY64 AND INTERFACE64) | ||||
| @@ -39,7 +44,7 @@ endif () | |||||
| if (${F_COMPILER} STREQUAL "GFORTRAN") | if (${F_COMPILER} STREQUAL "GFORTRAN") | ||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") | set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") | ||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") | |||||
| set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive") | |||||
| #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | ||||
| if (NOT NO_LAPACK) | if (NOT NO_LAPACK) | ||||
| set(EXTRALIB "{EXTRALIB} -lgfortran") | set(EXTRALIB "{EXTRALIB} -lgfortran") | ||||
| @@ -1,4 +1,5 @@ | |||||
| libdir=@CMAKE_INSTALL_FULL_LIBDIR@ | libdir=@CMAKE_INSTALL_FULL_LIBDIR@ | ||||
| libsuffix=@SUFFIX64_UNDERSCORE@ | |||||
| includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ | includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ | ||||
| openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@ | openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@ | ||||
| @@ -6,5 +7,5 @@ Name: OpenBLAS | |||||
| Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version | Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version | ||||
| Version: @OPENBLAS_VERSION@ | Version: @OPENBLAS_VERSION@ | ||||
| URL: https://github.com/xianyi/OpenBLAS | URL: https://github.com/xianyi/OpenBLAS | ||||
| Libs: -L${libdir} -lopenblas | |||||
| Libs: -L${libdir} -lopenblas${libsuffix} | |||||
| Cflags: -I${includedir} | Cflags: -I${includedir} | ||||
| @@ -41,6 +41,12 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (DEFINED TARGET) | |||||
| if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED TARGET) | if (DEFINED TARGET) | ||||
| message(STATUS "Targeting the ${TARGET} architecture.") | message(STATUS "Targeting the ${TARGET} architecture.") | ||||
| set(GETARCH_FLAGS "-DFORCE_${TARGET}") | set(GETARCH_FLAGS "-DFORCE_${TARGET}") | ||||
| @@ -304,6 +310,8 @@ if (MIXED_MEMORY_ALLOCATION) | |||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") | set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") | ||||
| endif () | endif () | ||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DVERSION=\"\\\"${OpenBLAS_VERSION}\\\"\"") | |||||
| set(REVISION "-r${OpenBLAS_VERSION}") | set(REVISION "-r${OpenBLAS_VERSION}") | ||||
| set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) | set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) | ||||
| @@ -10,6 +10,16 @@ if (${HOST_OS} STREQUAL "WINDOWS") | |||||
| set(HOST_OS WINNT) | set(HOST_OS WINNT) | ||||
| endif () | endif () | ||||
| if (${HOST_OS} STREQUAL "LINUX") | |||||
| # check if we're building natively on Android (TERMUX) | |||||
| EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM) | |||||
| if(${OPERATING_SYSTEM} MATCHES "Android") | |||||
| set(HOST_OS ANDROID) | |||||
| endif(${OPERATING_SYSTEM} MATCHES "Android") | |||||
| endif() | |||||
| if(CMAKE_COMPILER_IS_GNUCC AND WIN32) | if(CMAKE_COMPILER_IS_GNUCC AND WIN32) | ||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine | execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine | ||||
| OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE | OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE | ||||
| @@ -67,7 +77,7 @@ else() | |||||
| endif() | endif() | ||||
| if (X86_64 OR X86) | if (X86_64 OR X86) | ||||
| file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "int main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }") | |||||
| file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512) | execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512) | ||||
| if (NO_AVX512 EQUAL 1) | if (NO_AVX512 EQUAL 1) | ||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512") | set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512") | ||||
| @@ -183,7 +183,7 @@ extern "C" { | |||||
| #define ALLOCA_ALIGN 63UL | #define ALLOCA_ALIGN 63UL | ||||
| #define NUM_BUFFERS (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER) | |||||
| #define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)) | |||||
| #ifdef NEEDBUNDERSCORE | #ifdef NEEDBUNDERSCORE | ||||
| #define BLASFUNC(FUNC) FUNC##_ | #define BLASFUNC(FUNC) FUNC##_ | ||||
| @@ -94,7 +94,7 @@ static inline unsigned int rpcc(void){ | |||||
| #define RPCC_DEFINED | #define RPCC_DEFINED | ||||
| #ifndef NO_AFFINITY | #ifndef NO_AFFINITY | ||||
| #define WHEREAMI | |||||
| //#define WHEREAMI | |||||
| static inline int WhereAmI(void){ | static inline int WhereAmI(void){ | ||||
| int ret=0; | int ret=0; | ||||
| __asm__ __volatile__(".set push \n" | __asm__ __volatile__(".set push \n" | ||||
| @@ -29,25 +29,37 @@ | |||||
| #define CPU_UNKNOWN 0 | #define CPU_UNKNOWN 0 | ||||
| #define CPU_ARMV8 1 | #define CPU_ARMV8 1 | ||||
| #define CPU_CORTEXA57 2 | |||||
| #define CPU_VULCAN 3 | |||||
| #define CPU_THUNDERX 4 | |||||
| #define CPU_THUNDERX2T99 5 | |||||
| // Arm | |||||
| #define CPU_CORTEXA53 2 | |||||
| #define CPU_CORTEXA57 3 | |||||
| #define CPU_CORTEXA72 4 | |||||
| #define CPU_CORTEXA73 5 | |||||
| // Qualcomm | |||||
| #define CPU_FALKOR 6 | |||||
| // Cavium | |||||
| #define CPU_THUNDERX 7 | |||||
| #define CPU_THUNDERX2T99 8 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| "ARMV8" , | "ARMV8" , | ||||
| "CORTEXA53", | |||||
| "CORTEXA57", | "CORTEXA57", | ||||
| "VULCAN", | |||||
| "CORTEXA72", | |||||
| "CORTEXA73", | |||||
| "FALKOR", | |||||
| "THUNDERX", | "THUNDERX", | ||||
| "THUNDERX2T99" | "THUNDERX2T99" | ||||
| }; | }; | ||||
| static char *cpuname_lower[] = { | static char *cpuname_lower[] = { | ||||
| "unknown", | "unknown", | ||||
| "armv8" , | |||||
| "armv8", | |||||
| "cortexa53", | |||||
| "cortexa57", | "cortexa57", | ||||
| "vulcan", | |||||
| "cortexa72", | |||||
| "cortexa73", | |||||
| "falkor", | |||||
| "thunderx", | "thunderx", | ||||
| "thunderx2t99" | "thunderx2t99" | ||||
| }; | }; | ||||
| @@ -114,14 +126,24 @@ int detect(void) | |||||
| fclose(infile); | fclose(infile); | ||||
| if(cpu_part != NULL && cpu_implementer != NULL) { | if(cpu_part != NULL && cpu_implementer != NULL) { | ||||
| if (strstr(cpu_implementer, "0x41") && | |||||
| (strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08") || strstr(cpu_part,"0xd03") )) | |||||
| return CPU_CORTEXA57; //or compatible A53, A72 | |||||
| else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42")) | |||||
| return CPU_VULCAN; | |||||
| else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43")) | |||||
| // Arm | |||||
| if (strstr(cpu_implementer, "0x41")) { | |||||
| if (strstr(cpu_part, "0xd03")) | |||||
| return CPU_CORTEXA53; | |||||
| else if (strstr(cpu_part, "0xd07")) | |||||
| return CPU_CORTEXA57; | |||||
| else if (strstr(cpu_part, "0xd08")) | |||||
| return CPU_CORTEXA72; | |||||
| else if (strstr(cpu_part, "0xd09")) | |||||
| return CPU_CORTEXA73; | |||||
| } | |||||
| // Qualcomm | |||||
| else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) | |||||
| return CPU_FALKOR; | |||||
| // Cavium | |||||
| else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1")) | |||||
| return CPU_THUNDERX; | return CPU_THUNDERX; | ||||
| else if (strstr(cpu_part, "0x0af") && strstr(cpu_implementer, "0x43")) | |||||
| else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af")) | |||||
| return CPU_THUNDERX2T99; | return CPU_THUNDERX2T99; | ||||
| } | } | ||||
| @@ -180,64 +202,63 @@ void get_subdirname(void) | |||||
| void get_cpuconfig(void) | void get_cpuconfig(void) | ||||
| { | { | ||||
| // All arches should define ARMv8 | |||||
| printf("#define ARMV8\n"); | |||||
| printf("#define HAVE_NEON\n"); // This shouldn't be necessary | |||||
| printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary | |||||
| int d = detect(); | int d = detect(); | ||||
| switch (d) | switch (d) | ||||
| { | { | ||||
| case CPU_CORTEXA53: | |||||
| printf("#define %s\n", cpuname[d]); | |||||
| // Fall-through | |||||
| case CPU_ARMV8: | case CPU_ARMV8: | ||||
| printf("#define ARMV8\n"); | |||||
| printf("#define L1_DATA_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 262144\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||||
| break; | |||||
| case CPU_VULCAN: | |||||
| printf("#define VULCAN \n"); | |||||
| printf("#define HAVE_VFP \n"); | |||||
| printf("#define HAVE_VFPV3 \n"); | |||||
| printf("#define HAVE_NEON \n"); | |||||
| printf("#define HAVE_VFPV4 \n"); | |||||
| printf("#define L1_CODE_SIZE 32768 \n"); | |||||
| printf("#define L1_CODE_LINESIZE 64 \n"); | |||||
| printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | |||||
| printf("#define L1_DATA_SIZE 32768 \n"); | |||||
| printf("#define L1_DATA_LINESIZE 64 \n"); | |||||
| printf("#define L1_DATA_ASSOCIATIVE 8 \n"); | |||||
| printf("#define L2_SIZE 262144 \n"); | |||||
| printf("#define L2_LINESIZE 64 \n"); | |||||
| printf("#define L2_ASSOCIATIVE 8 \n"); | |||||
| printf("#define L3_SIZE 33554432 \n"); | |||||
| printf("#define L3_LINESIZE 64 \n"); | |||||
| printf("#define L3_ASSOCIATIVE 32 \n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||||
| printf("#define DTB_SIZE 4096 \n"); | |||||
| // Minimum parameters for ARMv8 (based on A53) | |||||
| printf("#define L1_DATA_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 262144\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||||
| break; | break; | ||||
| case CPU_CORTEXA57: | case CPU_CORTEXA57: | ||||
| printf("#define CORTEXA57\n"); | |||||
| printf("#define HAVE_VFP\n"); | |||||
| printf("#define HAVE_VFPV3\n"); | |||||
| printf("#define HAVE_NEON\n"); | |||||
| printf("#define HAVE_VFPV4\n"); | |||||
| case CPU_CORTEXA72: | |||||
| case CPU_CORTEXA73: | |||||
| // Common minimum settings for these Arm cores | |||||
| // Can change a lot, but we need to be conservative | |||||
| // TODO: detect info from /sys if possible | |||||
| printf("#define %s\n", cpuname[d]); | |||||
| printf("#define L1_CODE_SIZE 49152\n"); | printf("#define L1_CODE_SIZE 49152\n"); | ||||
| printf("#define L1_CODE_LINESIZE 64\n"); | printf("#define L1_CODE_LINESIZE 64\n"); | ||||
| printf("#define L1_CODE_ASSOCIATIVE 3\n"); | printf("#define L1_CODE_ASSOCIATIVE 3\n"); | ||||
| printf("#define L1_DATA_SIZE 32768\n"); | printf("#define L1_DATA_SIZE 32768\n"); | ||||
| printf("#define L1_DATA_LINESIZE 64\n"); | printf("#define L1_DATA_LINESIZE 64\n"); | ||||
| printf("#define L1_DATA_ASSOCIATIVE 2\n"); | printf("#define L1_DATA_ASSOCIATIVE 2\n"); | ||||
| printf("#define L2_SIZE 2097152\n"); | |||||
| printf("#define L2_SIZE 524288\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | printf("#define L2_LINESIZE 64\n"); | ||||
| printf("#define L2_ASSOCIATIVE 16\n"); | printf("#define L2_ASSOCIATIVE 16\n"); | ||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | printf("#define DTB_DEFAULT_ENTRIES 64\n"); | ||||
| printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
| break; | break; | ||||
| case CPU_FALKOR: | |||||
| printf("#define FALKOR\n"); | |||||
| printf("#define L1_CODE_SIZE 65536\n"); | |||||
| printf("#define L1_CODE_LINESIZE 64\n"); | |||||
| printf("#define L1_DATA_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_LINESIZE 128\n"); | |||||
| printf("#define L2_SIZE 524288\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 16\n"); | |||||
| break; | |||||
| case CPU_THUNDERX: | case CPU_THUNDERX: | ||||
| printf("#define ARMV8\n"); | |||||
| printf("#define THUNDERX\n"); | printf("#define THUNDERX\n"); | ||||
| printf("#define L1_DATA_SIZE 32768\n"); | printf("#define L1_DATA_SIZE 32768\n"); | ||||
| printf("#define L1_DATA_LINESIZE 128\n"); | printf("#define L1_DATA_LINESIZE 128\n"); | ||||
| @@ -250,10 +271,6 @@ void get_cpuconfig(void) | |||||
| case CPU_THUNDERX2T99: | case CPU_THUNDERX2T99: | ||||
| printf("#define VULCAN \n"); | printf("#define VULCAN \n"); | ||||
| printf("#define HAVE_VFP \n"); | |||||
| printf("#define HAVE_VFPV3 \n"); | |||||
| printf("#define HAVE_NEON \n"); | |||||
| printf("#define HAVE_VFPV4 \n"); | |||||
| printf("#define L1_CODE_SIZE 32768 \n"); | printf("#define L1_CODE_SIZE 32768 \n"); | ||||
| printf("#define L1_CODE_LINESIZE 64 \n"); | printf("#define L1_CODE_LINESIZE 64 \n"); | ||||
| printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | ||||
| @@ -56,6 +56,7 @@ | |||||
| #define CPUTYPE_CELL 6 | #define CPUTYPE_CELL 6 | ||||
| #define CPUTYPE_PPCG4 7 | #define CPUTYPE_PPCG4 7 | ||||
| #define CPUTYPE_POWER8 8 | #define CPUTYPE_POWER8 8 | ||||
| #define CPUTYPE_POWER9 9 | |||||
| char *cpuname[] = { | char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| @@ -66,7 +67,8 @@ char *cpuname[] = { | |||||
| "POWER6", | "POWER6", | ||||
| "CELL", | "CELL", | ||||
| "PPCG4", | "PPCG4", | ||||
| "POWER8" | |||||
| "POWER8", | |||||
| "POWER9" | |||||
| }; | }; | ||||
| char *lowercpuname[] = { | char *lowercpuname[] = { | ||||
| @@ -78,7 +80,8 @@ char *lowercpuname[] = { | |||||
| "power6", | "power6", | ||||
| "cell", | "cell", | ||||
| "ppcg4", | "ppcg4", | ||||
| "power8" | |||||
| "power8", | |||||
| "power9" | |||||
| }; | }; | ||||
| char *corename[] = { | char *corename[] = { | ||||
| @@ -90,7 +93,8 @@ char *corename[] = { | |||||
| "POWER6", | "POWER6", | ||||
| "CELL", | "CELL", | ||||
| "PPCG4", | "PPCG4", | ||||
| "POWER8" | |||||
| "POWER8", | |||||
| "POWER8" | |||||
| }; | }; | ||||
| int detect(void){ | int detect(void){ | ||||
| @@ -120,6 +124,7 @@ int detect(void){ | |||||
| if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | ||||
| if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | ||||
| if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | ||||
| if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8; | |||||
| if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | ||||
| if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | ||||
| @@ -127,6 +132,33 @@ int detect(void){ | |||||
| #endif | #endif | ||||
| #ifdef _AIX | #ifdef _AIX | ||||
| FILE *infile; | |||||
| char buffer[512], *p; | |||||
| p = (char *)NULL; | |||||
| infile = popen("prtconf|grep 'Processor Type'"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)){ | |||||
| if (!strncmp("Pro", buffer, 3)){ | |||||
| p = strchr(buffer, ':') + 2; | |||||
| #if 0 | |||||
| fprintf(stderr, "%s\n", p); | |||||
| #endif | |||||
| break; | |||||
| } | |||||
| } | |||||
| pclose(infile); | |||||
| if (!strncasecmp(p, "POWER3", 6)) return CPUTYPE_POWER3; | |||||
| if (!strncasecmp(p, "POWER4", 6)) return CPUTYPE_POWER4; | |||||
| if (!strncasecmp(p, "PPC970", 6)) return CPUTYPE_PPC970; | |||||
| if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | |||||
| if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | |||||
| if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | |||||
| if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | |||||
| if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8; | |||||
| if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | |||||
| if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | |||||
| return CPUTYPE_POWER5; | return CPUTYPE_POWER5; | ||||
| #endif | #endif | ||||
| @@ -143,12 +175,12 @@ int detect(void){ | |||||
| return CPUTYPE_PPC970; | return CPUTYPE_PPC970; | ||||
| #endif | #endif | ||||
| #if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) | |||||
| #if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) | |||||
| int id; | int id; | ||||
| id = __asm __volatile("mfpvr %0" : "=r"(id)); | |||||
| __asm __volatile("mfpvr %0" : "=r"(id)); | |||||
| switch ( id >> 16 ) { | switch ( id >> 16 ) { | ||||
| case 0x4e: // POWER9 | case 0x4e: // POWER9 | ||||
| return return CPUTYPE_POWER8; | |||||
| return CPUTYPE_POWER8; | |||||
| break; | break; | ||||
| case 0x4d: | case 0x4d: | ||||
| case 0x4b: // POWER8/8E | case 0x4b: // POWER8/8E | ||||
| @@ -2009,6 +2009,8 @@ int get_coretype(void){ | |||||
| switch (model) { | switch (model) { | ||||
| case 1: | case 1: | ||||
| // AMD Ryzen | // AMD Ryzen | ||||
| case 8: | |||||
| // Ryzen 2 | |||||
| if(support_avx()) | if(support_avx()) | ||||
| #ifndef NO_AVX2 | #ifndef NO_AVX2 | ||||
| return CORE_ZEN; | return CORE_ZEN; | ||||
| @@ -62,9 +62,36 @@ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #ifndef TRANSA | |||||
| #ifndef thread_local | |||||
| # if __STDC_VERSION__ >= 201112 && !defined __STDC_NO_THREADS__ | |||||
| # define thread_local _Thread_local | |||||
| # elif defined _WIN32 && ( \ | |||||
| defined _MSC_VER || \ | |||||
| defined __ICL || \ | |||||
| defined __DMC__ || \ | |||||
| defined __BORLANDC__ ) | |||||
| # define thread_local __declspec(thread) | |||||
| /* note that ICC (linux) and Clang are covered by __GNUC__ */ | |||||
| # elif defined __GNUC__ || \ | |||||
| defined __SUNPRO_C || \ | |||||
| defined __xlC__ | |||||
| # define thread_local __thread | |||||
| # else | |||||
| # define UNSAFE | |||||
| #endif | |||||
| #endif | |||||
| #if defined USE_OPENMP | |||||
| #undef UNSAFE | |||||
| #endif | |||||
| #if !defined(TRANSA) && !defined(UNSAFE) | |||||
| #define Y_DUMMY_NUM 1024 | #define Y_DUMMY_NUM 1024 | ||||
| #if defined(USE_OPENMP) | |||||
| static FLOAT y_dummy[Y_DUMMY_NUM]; | static FLOAT y_dummy[Y_DUMMY_NUM]; | ||||
| #pragma omp threadprivate(y_dummy) | |||||
| # else | |||||
| static thread_local FLOAT y_dummy[Y_DUMMY_NUM]; | |||||
| # endif | |||||
| #endif | #endif | ||||
| static int gemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | static int gemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){ | ||||
| @@ -105,10 +132,12 @@ static int gemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
| #ifdef TRANSA | #ifdef TRANSA | ||||
| y += n_from * incy * COMPSIZE; | y += n_from * incy * COMPSIZE; | ||||
| #else | #else | ||||
| # ifndef UNSAFE | |||||
| //for split matrix row (n) direction and vector x of gemv_n | //for split matrix row (n) direction and vector x of gemv_n | ||||
| x += n_from * incx * COMPSIZE; | x += n_from * incx * COMPSIZE; | ||||
| //store partial result for every thread | //store partial result for every thread | ||||
| y += (m_to - m_from) * 1 * COMPSIZE * pos; | y += (m_to - m_from) * 1 * COMPSIZE * pos; | ||||
| # endif | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -136,7 +165,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x | |||||
| BLASLONG width, i, num_cpu; | BLASLONG width, i, num_cpu; | ||||
| #ifndef TRANSA | |||||
| #if !defined(TRANSA) && !defined(UNSAFE) | |||||
| int split_x=0; | int split_x=0; | ||||
| #endif | #endif | ||||
| @@ -212,7 +241,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x | |||||
| i -= width; | i -= width; | ||||
| } | } | ||||
| #ifndef TRANSA | |||||
| #if !defined(TRANSA) && !defined(UNSAFE) | |||||
| //try to split matrix on row direction and x. | //try to split matrix on row direction and x. | ||||
| //Then, reduction. | //Then, reduction. | ||||
| if (num_cpu < nthreads) { | if (num_cpu < nthreads) { | ||||
| @@ -272,7 +301,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x | |||||
| exec_blas(num_cpu, queue); | exec_blas(num_cpu, queue); | ||||
| } | } | ||||
| #ifndef TRANSA | |||||
| #if !defined(TRANSA) && !defined(UNSAFE) | |||||
| if(split_x==1){ | if(split_x==1){ | ||||
| //reduction | //reduction | ||||
| for(i=0; i<num_cpu; i++){ | for(i=0; i<num_cpu; i++){ | ||||
| @@ -48,6 +48,10 @@ | |||||
| #define SWITCH_RATIO 2 | #define SWITCH_RATIO 2 | ||||
| #endif | #endif | ||||
| #ifndef GEMM_PREFERED_SIZE | |||||
| #define GEMM_PREFERED_SIZE 1 | |||||
| #endif | |||||
| //The array of job_t may overflow the stack. | //The array of job_t may overflow the stack. | ||||
| //Instead, use malloc to alloc job_t. | //Instead, use malloc to alloc job_t. | ||||
| #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD | #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD | ||||
| @@ -510,10 +514,29 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| static int round_up(int remainder, int width, int multiple) | |||||
| { | |||||
| if (multiple > remainder || width <= multiple) | |||||
| return width; | |||||
| width = (width + multiple - 1) / multiple; | |||||
| width = width * multiple; | |||||
| return width; | |||||
| } | |||||
| static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | ||||
| *range_n, FLOAT *sa, FLOAT *sb, | *range_n, FLOAT *sa, FLOAT *sb, | ||||
| BLASLONG nthreads_m, BLASLONG nthreads_n) { | BLASLONG nthreads_m, BLASLONG nthreads_n) { | ||||
| #ifndef USE_OPENMP | |||||
| #ifndef OS_WINDOWS | |||||
| static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER; | |||||
| #else | |||||
| CRITICAL_SECTION level3_lock; | |||||
| InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||||
| #endif | |||||
| #endif | |||||
| blas_arg_t newarg; | blas_arg_t newarg; | ||||
| #ifndef USE_ALLOC_HEAP | #ifndef USE_ALLOC_HEAP | ||||
| @@ -554,6 +577,14 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #ifndef USE_OPENMP | |||||
| #ifndef OS_WINDOWS | |||||
| pthread_mutex_lock(&level3_lock); | |||||
| #else | |||||
| EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||||
| #endif | |||||
| #endif | |||||
| #ifdef USE_ALLOC_HEAP | #ifdef USE_ALLOC_HEAP | ||||
| /* Dynamically allocate workspace */ | /* Dynamically allocate workspace */ | ||||
| job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t)); | job = (job_t*)malloc(MAX_CPU_NUMBER * sizeof(job_t)); | ||||
| @@ -601,9 +632,14 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | |||||
| num_parts = 0; | num_parts = 0; | ||||
| while (m > 0){ | while (m > 0){ | ||||
| width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts); | width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts); | ||||
| width = round_up(m, width, GEMM_PREFERED_SIZE); | |||||
| m -= width; | m -= width; | ||||
| if (m < 0) width = width + m; | if (m < 0) width = width + m; | ||||
| range_M[num_parts + 1] = range_M[num_parts] + width; | range_M[num_parts + 1] = range_M[num_parts] + width; | ||||
| num_parts ++; | num_parts ++; | ||||
| } | } | ||||
| for (i = num_parts; i < MAX_CPU_NUMBER; i++) { | for (i = num_parts; i < MAX_CPU_NUMBER; i++) { | ||||
| @@ -645,9 +681,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | |||||
| if (width < SWITCH_RATIO) { | if (width < SWITCH_RATIO) { | ||||
| width = SWITCH_RATIO; | width = SWITCH_RATIO; | ||||
| } | } | ||||
| width = round_up(n, width, GEMM_PREFERED_SIZE); | |||||
| n -= width; | n -= width; | ||||
| if (n < 0) width = width + n; | if (n < 0) width = width + n; | ||||
| range_N[num_parts + 1] = range_N[num_parts] + width; | range_N[num_parts + 1] = range_N[num_parts] + width; | ||||
| num_parts ++; | num_parts ++; | ||||
| } | } | ||||
| for (j = num_parts; j < MAX_CPU_NUMBER; j++) { | for (j = num_parts; j < MAX_CPU_NUMBER; j++) { | ||||
| @@ -671,6 +710,14 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | |||||
| free(job); | free(job); | ||||
| #endif | #endif | ||||
| #ifndef USE_OPENMP | |||||
| #ifndef OS_WINDOWS | |||||
| pthread_mutex_unlock(&level3_lock); | |||||
| #else | |||||
| LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||||
| #endif | |||||
| #endif | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -15,7 +15,11 @@ endif | |||||
| # COMMONOBJS += info.$(SUFFIX) | # COMMONOBJS += info.$(SUFFIX) | ||||
| ifeq ($(DYNAMIC_ARCH), 1) | ifeq ($(DYNAMIC_ARCH), 1) | ||||
| ifeq ($(ARCH),arm64) | |||||
| COMMONOBJS += dynamic_arm64.$(SUFFIX) | |||||
| else | |||||
| COMMONOBJS += dynamic.$(SUFFIX) | COMMONOBJS += dynamic.$(SUFFIX) | ||||
| endif | |||||
| else | else | ||||
| COMMONOBJS += parameter.$(SUFFIX) | COMMONOBJS += parameter.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c | |||||
| endif | endif | ||||
| ifeq ($(DYNAMIC_ARCH), 1) | ifeq ($(DYNAMIC_ARCH), 1) | ||||
| ifeq ($(ARCH),arm64) | |||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX) | |||||
| else | |||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | ||||
| endif | |||||
| else | else | ||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -582,7 +582,7 @@ int blas_thread_init(void){ | |||||
| if(ret!=0){ | if(ret!=0){ | ||||
| struct rlimit rlim; | struct rlimit rlim; | ||||
| const char *msg = strerror(ret); | const char *msg = strerror(ret); | ||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg); | |||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create failed for thread %ld of %ld: %s\n", i+1,blas_num_threads,msg); | |||||
| #ifdef RLIMIT_NPROC | #ifdef RLIMIT_NPROC | ||||
| if(0 == getrlimit(RLIMIT_NPROC, &rlim)) { | if(0 == getrlimit(RLIMIT_NPROC, &rlim)) { | ||||
| fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC " | fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC " | ||||
| @@ -850,6 +850,11 @@ void goto_set_num_threads(int num_threads) { | |||||
| long i; | long i; | ||||
| #ifdef SMP_SERVER | |||||
| // Handle lazy re-init of the thread-pool after a POSIX fork | |||||
| if (unlikely(blas_server_avail == 0)) blas_thread_init(); | |||||
| #endif | |||||
| if (num_threads < 1) num_threads = blas_num_threads; | if (num_threads < 1) num_threads = blas_num_threads; | ||||
| #ifndef NO_AFFINITY | #ifndef NO_AFFINITY | ||||
| @@ -478,7 +478,12 @@ int BLASFUNC(blas_thread_shutdown)(void){ | |||||
| void goto_set_num_threads(int num_threads) | void goto_set_num_threads(int num_threads) | ||||
| { | { | ||||
| long i; | |||||
| long i; | |||||
| #if defined(SMP_SERVER) && defined(OS_CYGWIN_NT) | |||||
| // Handle lazy re-init of the thread-pool after a POSIX fork | |||||
| if (unlikely(blas_server_avail == 0)) blas_thread_init(); | |||||
| #endif | |||||
| if (num_threads < 1) num_threads = blas_cpu_number; | if (num_threads < 1) num_threads = blas_cpu_number; | ||||
| @@ -0,0 +1,198 @@ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #include "common.h" | |||||
| #include <asm/hwcap.h> | |||||
| #include <sys/auxv.h> | |||||
| extern gotoblas_t gotoblas_ARMV8; | |||||
| extern gotoblas_t gotoblas_CORTEXA57; | |||||
| extern gotoblas_t gotoblas_THUNDERX; | |||||
| extern gotoblas_t gotoblas_THUNDERX2T99; | |||||
| extern void openblas_warning(int verbose, const char * msg); | |||||
| #define NUM_CORETYPES 4 | |||||
| /* | |||||
| * In case asm/hwcap.h is outdated on the build system, make sure | |||||
| * that HWCAP_CPUID is defined | |||||
| */ | |||||
| #ifndef HWCAP_CPUID | |||||
| #define HWCAP_CPUID (1 << 11) | |||||
| #endif | |||||
| #define get_cpu_ftr(id, var) ({ \ | |||||
| asm("mrs %0, "#id : "=r" (var)); \ | |||||
| }) | |||||
| static char *corename[] = { | |||||
| "armv8", | |||||
| "cortexa57", | |||||
| "thunderx", | |||||
| "thunderx2t99", | |||||
| "unknown" | |||||
| }; | |||||
| char *gotoblas_corename(void) { | |||||
| if (gotoblas == &gotoblas_ARMV8) return corename[ 0]; | |||||
| if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1]; | |||||
| if (gotoblas == &gotoblas_THUNDERX) return corename[ 2]; | |||||
| if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3]; | |||||
| return corename[NUM_CORETYPES]; | |||||
| } | |||||
| static gotoblas_t *force_coretype(char *coretype) { | |||||
| int i ; | |||||
| int found = -1; | |||||
| char message[128]; | |||||
| for ( i=0 ; i < NUM_CORETYPES; i++) | |||||
| { | |||||
| if (!strncasecmp(coretype, corename[i], 20)) | |||||
| { | |||||
| found = i; | |||||
| break; | |||||
| } | |||||
| } | |||||
| switch (found) | |||||
| { | |||||
| case 0: return (&gotoblas_ARMV8); | |||||
| case 1: return (&gotoblas_CORTEXA57); | |||||
| case 2: return (&gotoblas_THUNDERX); | |||||
| case 3: return (&gotoblas_THUNDERX2T99); | |||||
| } | |||||
| snprintf(message, 128, "Core not found: %s\n", coretype); | |||||
| openblas_warning(1, message); | |||||
| return NULL; | |||||
| } | |||||
| static gotoblas_t *get_coretype(void) { | |||||
| int implementer, variant, part, arch, revision, midr_el1; | |||||
| if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { | |||||
| char coremsg[128]; | |||||
| snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n"); | |||||
| openblas_warning(1, coremsg); | |||||
| return NULL; | |||||
| } | |||||
| get_cpu_ftr(MIDR_EL1, midr_el1); | |||||
| /* | |||||
| * MIDR_EL1 | |||||
| * | |||||
| * 31 24 23 20 19 16 15 4 3 0 | |||||
| * ----------------------------------------------------------------- | |||||
| * | Implementer | Variant | Architecture | Part Number | Revision | | |||||
| * ----------------------------------------------------------------- | |||||
| */ | |||||
| implementer = (midr_el1 >> 24) & 0xFF; | |||||
| part = (midr_el1 >> 4) & 0xFFF; | |||||
| switch(implementer) | |||||
| { | |||||
| case 0x41: // ARM | |||||
| switch (part) | |||||
| { | |||||
| case 0xd07: // Cortex A57 | |||||
| case 0xd08: // Cortex A72 | |||||
| case 0xd03: // Cortex A53 | |||||
| return &gotoblas_CORTEXA57; | |||||
| } | |||||
| break; | |||||
| case 0x42: // Broadcom | |||||
| switch (part) | |||||
| { | |||||
| case 0x516: // Vulcan | |||||
| return &gotoblas_THUNDERX2T99; | |||||
| } | |||||
| break; | |||||
| case 0x43: // Cavium | |||||
| switch (part) | |||||
| { | |||||
| case 0x0a1: // ThunderX | |||||
| return &gotoblas_THUNDERX; | |||||
| case 0x0af: // ThunderX2 | |||||
| return &gotoblas_THUNDERX2T99; | |||||
| } | |||||
| break; | |||||
| } | |||||
| return NULL; | |||||
| } | |||||
| void gotoblas_dynamic_init(void) { | |||||
| char coremsg[128]; | |||||
| char coren[22]; | |||||
| char *p; | |||||
| if (gotoblas) return; | |||||
| p = getenv("OPENBLAS_CORETYPE"); | |||||
| if ( p ) | |||||
| { | |||||
| gotoblas = force_coretype(p); | |||||
| } | |||||
| else | |||||
| { | |||||
| gotoblas = get_coretype(); | |||||
| } | |||||
| if (gotoblas == NULL) | |||||
| { | |||||
| snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n"); | |||||
| openblas_warning(1, coremsg); | |||||
| gotoblas = &gotoblas_ARMV8; | |||||
| } | |||||
| if (gotoblas && gotoblas->init) { | |||||
| strncpy(coren, gotoblas_corename(), 20); | |||||
| sprintf(coremsg, "Core: %s\n", coren); | |||||
| openblas_warning(2, coremsg); | |||||
| gotoblas -> init(); | |||||
| } else { | |||||
| openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); | |||||
| exit(1); | |||||
| } | |||||
| } | |||||
| void gotoblas_dynamic_quit(void) { | |||||
| gotoblas = NULL; | |||||
| } | |||||
| @@ -73,8 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #if defined(USE_TLS) | |||||
| #if defined(USE_TLS) && defined(SMP) | |||||
| #define COMPILE_TLS | #define COMPILE_TLS | ||||
| #if USE_TLS != 1 | |||||
| #undef COMPILE_TLS | |||||
| #endif | |||||
| #if defined(__GLIBC_PREREQ) | #if defined(__GLIBC_PREREQ) | ||||
| #if !__GLIBC_PREREQ(2,20) | #if !__GLIBC_PREREQ(2,20) | ||||
| #undef COMPILE_TLS | #undef COMPILE_TLS | ||||
| @@ -254,6 +259,16 @@ int get_num_procs(void) { | |||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef OS_AIX | |||||
| int get_num_procs(void) { | |||||
| static int nums = 0; | |||||
| if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | |||||
| return nums; | |||||
| } | |||||
| #endif | |||||
| #ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
| int get_num_procs(void) { | int get_num_procs(void) { | ||||
| @@ -1733,6 +1748,22 @@ int get_num_procs(void) { | |||||
| return nums; | return nums; | ||||
| } | } | ||||
| #endif | #endif | ||||
| #ifdef OS_HAIKU | |||||
| int get_num_procs(void) { | |||||
| static int nums = 0; | |||||
| if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | |||||
| return nums; | |||||
| } | |||||
| #endif | |||||
| #ifdef OS_AIX | |||||
| int get_num_procs(void) { | |||||
| static int nums = 0; | |||||
| if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | |||||
| return nums; | |||||
| } | |||||
| #endif | |||||
| #ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
| @@ -2555,7 +2586,7 @@ void *blas_memory_alloc(int procpos){ | |||||
| printf("Alloc Start ...\n"); | printf("Alloc Start ...\n"); | ||||
| #endif | #endif | ||||
| #if defined(WHEREAMI) && !defined(USE_OPENMP) | |||||
| /* #if defined(WHEREAMI) && !defined(USE_OPENMP) | |||||
| mypos = WhereAmI(); | mypos = WhereAmI(); | ||||
| @@ -2565,12 +2596,12 @@ void *blas_memory_alloc(int procpos){ | |||||
| do { | do { | ||||
| if (!memory[position].used && (memory[position].pos == mypos)) { | if (!memory[position].used && (memory[position].pos == mypos)) { | ||||
| LOCK_COMMAND(&alloc_lock); | LOCK_COMMAND(&alloc_lock); | ||||
| /* blas_lock(&memory[position].lock);*/ | |||||
| // blas_lock(&memory[position].lock); | |||||
| if (!memory[position].used) goto allocation; | if (!memory[position].used) goto allocation; | ||||
| UNLOCK_COMMAND(&alloc_lock); | UNLOCK_COMMAND(&alloc_lock); | ||||
| /* blas_unlock(&memory[position].lock);*/ | |||||
| // blas_unlock(&memory[position].lock); | |||||
| } | } | ||||
| position ++; | position ++; | ||||
| @@ -2578,24 +2609,24 @@ void *blas_memory_alloc(int procpos){ | |||||
| } while (position < NUM_BUFFERS); | } while (position < NUM_BUFFERS); | ||||
| #endif | |||||
| #endif */ | |||||
| position = 0; | position = 0; | ||||
| LOCK_COMMAND(&alloc_lock); | |||||
| do { | do { | ||||
| /* if (!memory[position].used) { */ | /* if (!memory[position].used) { */ | ||||
| LOCK_COMMAND(&alloc_lock); | |||||
| /* blas_lock(&memory[position].lock);*/ | /* blas_lock(&memory[position].lock);*/ | ||||
| if (!memory[position].used) goto allocation; | if (!memory[position].used) goto allocation; | ||||
| UNLOCK_COMMAND(&alloc_lock); | |||||
| /* blas_unlock(&memory[position].lock);*/ | /* blas_unlock(&memory[position].lock);*/ | ||||
| /* } */ | /* } */ | ||||
| position ++; | position ++; | ||||
| } while (position < NUM_BUFFERS); | } while (position < NUM_BUFFERS); | ||||
| UNLOCK_COMMAND(&alloc_lock); | |||||
| goto error; | goto error; | ||||
| @@ -42,8 +42,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| static char* openblas_config_str="" | static char* openblas_config_str="" | ||||
| "OpenBLAS " | |||||
| VERSION | |||||
| " " | |||||
| #ifdef USE64BITINT | #ifdef USE64BITINT | ||||
| "USE64BITINT " | |||||
| " USE64BITINT " | |||||
| #endif | #endif | ||||
| #ifdef NO_CBLAS | #ifdef NO_CBLAS | ||||
| "NO_CBLAS " | "NO_CBLAS " | ||||
| @@ -730,35 +730,8 @@ void blas_set_parameter(void){ | |||||
| #if defined(ARCH_ARM64) | #if defined(ARCH_ARM64) | ||||
| #if defined(VULCAN) || defined(THUNDERX2T99) | |||||
| unsigned long dgemm_prefetch_size_a; | |||||
| unsigned long dgemm_prefetch_size_b; | |||||
| unsigned long dgemm_prefetch_size_c; | |||||
| #endif | |||||
| void blas_set_parameter(void) | void blas_set_parameter(void) | ||||
| { | { | ||||
| #if defined(VULCAN) || defined(THUNDERX2T99) | |||||
| dgemm_p = 160; | |||||
| dgemm_q = 128; | |||||
| dgemm_r = 4096; | |||||
| sgemm_p = 128; | |||||
| sgemm_q = 352; | |||||
| sgemm_r = 4096; | |||||
| cgemm_p = 128; | |||||
| cgemm_q = 224; | |||||
| cgemm_r = 4096; | |||||
| zgemm_p = 128; | |||||
| zgemm_q = 112; | |||||
| zgemm_r = 4096; | |||||
| dgemm_prefetch_size_a = 3584; | |||||
| dgemm_prefetch_size_b = 512; | |||||
| dgemm_prefetch_size_c = 128; | |||||
| #endif | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -114,9 +114,9 @@ $(LIBDYNNAME) : ../$(LIBNAME).osx.renamed osx.def | |||||
| endif | endif | ||||
| ifneq (,$(filter 1 2,$(NOFORTRAN))) | ifneq (,$(filter 1 2,$(NOFORTRAN))) | ||||
| #only build without Fortran | #only build without Fortran | ||||
| $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||||
| $(CC) $(CFLAGS) $(LDFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||||
| else | else | ||||
| $(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||||
| endif | endif | ||||
| dllinit.$(SUFFIX) : dllinit.c | dllinit.$(SUFFIX) : dllinit.c | ||||
| @@ -292,9 +292,6 @@ if ($link ne "") { | |||||
| && ($flags !~ /^-LIST:/) | && ($flags !~ /^-LIST:/) | ||||
| && ($flags !~ /^-LANG:/) | && ($flags !~ /^-LANG:/) | ||||
| ) { | ) { | ||||
| if ($vendor eq "PGI") { | |||||
| $flags =~ s/lib$/libso/; | |||||
| } | |||||
| $linker_L .= $flags . " "; | $linker_L .= $flags . " "; | ||||
| } | } | ||||
| @@ -311,17 +308,11 @@ if ($link ne "") { | |||||
| if ($flags =~ /^\-rpath\@/) { | if ($flags =~ /^\-rpath\@/) { | ||||
| $flags =~ s/\@/\,/g; | $flags =~ s/\@/\,/g; | ||||
| if ($vendor eq "PGI") { | |||||
| $flags =~ s/lib$/libso/; | |||||
| } | |||||
| $linker_L .= "-Wl,". $flags . " " ; | $linker_L .= "-Wl,". $flags . " " ; | ||||
| } | } | ||||
| if ($flags =~ /^\-rpath-link\@/) { | if ($flags =~ /^\-rpath-link\@/) { | ||||
| $flags =~ s/\@/\,/g; | $flags =~ s/\@/\,/g; | ||||
| if ($vendor eq "PGI") { | |||||
| $flags =~ s/lib$/libso/; | |||||
| } | |||||
| $linker_L .= "-Wl,". $flags . " " ; | $linker_L .= "-Wl,". $flags . " " ; | ||||
| } | } | ||||
| @@ -330,7 +321,6 @@ if ($link ne "") { | |||||
| && ($flags !~ /gfortranbegin/) | && ($flags !~ /gfortranbegin/) | ||||
| && ($flags !~ /frtbegin/) | && ($flags !~ /frtbegin/) | ||||
| && ($flags !~ /pathfstart/) | && ($flags !~ /pathfstart/) | ||||
| && ($flags !~ /numa/) | |||||
| && ($flags !~ /crt[0-9]/) | && ($flags !~ /crt[0-9]/) | ||||
| && ($flags !~ /gcc/) | && ($flags !~ /gcc/) | ||||
| && ($flags !~ /user32/) | && ($flags !~ /user32/) | ||||
| @@ -927,11 +927,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ARCHCONFIG "-DARMV8 " \ | #define ARCHCONFIG "-DARMV8 " \ | ||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ | ||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "armv8" | #define LIBNAME "armv8" | ||||
| #define CORENAME "ARMV8" | #define CORENAME "ARMV8" | ||||
| #endif | #endif | ||||
| #ifdef FORCE_CORTEXA53 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXA53" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXA53 " \ | |||||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "cortexa53" | |||||
| #define CORENAME "CORTEXA53" | |||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXA57 | #ifdef FORCE_CORTEXA57 | ||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "ARM64" | #define ARCHITECTURE "ARM64" | ||||
| @@ -942,26 +959,57 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | ||||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | ||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "cortexa57" | #define LIBNAME "cortexa57" | ||||
| #define CORENAME "CORTEXA57" | #define CORENAME "CORTEXA57" | ||||
| #else | #else | ||||
| #endif | #endif | ||||
| #ifdef FORCE_VULCAN | |||||
| #ifdef FORCE_CORTEXA72 | |||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "ARM64" | #define ARCHITECTURE "ARM64" | ||||
| #define SUBARCHITECTURE "VULCAN" | |||||
| #define SUBARCHITECTURE "CORTEXA72" | |||||
| #define SUBDIRNAME "arm64" | #define SUBDIRNAME "arm64" | ||||
| #define ARCHCONFIG "-DVULCAN " \ | |||||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||||
| "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | |||||
| #define ARCHCONFIG "-DCORTEXA72 " \ | |||||
| "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "cortexa72" | |||||
| #define CORENAME "CORTEXA72" | |||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_CORTEXA73 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "CORTEXA73" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DCORTEXA73 " \ | |||||
| "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "cortexa73" | |||||
| #define CORENAME "CORTEXA73" | |||||
| #else | |||||
| #endif | |||||
| #ifdef FORCE_FALKOR | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "FALKOR" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DFALKOR " \ | |||||
| "-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ | |||||
| "-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | ||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||||
| #define LIBNAME "vulcan" | |||||
| #define CORENAME "VULCAN" | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "falkor" | |||||
| #define CORENAME "FALKOR" | |||||
| #else | #else | ||||
| #endif | #endif | ||||
| @@ -973,13 +1021,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ARCHCONFIG "-DTHUNDERX " \ | #define ARCHCONFIG "-DTHUNDERX " \ | ||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ | "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ | ||||
| "-DL2_SIZE=16777216 -DL2_LINESIZE=128 -DL2_ASSOCIATIVE=16 " \ | "-DL2_SIZE=16777216 -DL2_LINESIZE=128 -DL2_ASSOCIATIVE=16 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "thunderx" | #define LIBNAME "thunderx" | ||||
| #define CORENAME "THUNDERX" | #define CORENAME "THUNDERX" | ||||
| #else | #else | ||||
| #endif | #endif | ||||
| #ifdef FORCE_THUNDERX2T99 | #ifdef FORCE_THUNDERX2T99 | ||||
| #define ARMV8 | |||||
| #define FORCE | #define FORCE | ||||
| #define ARCHITECTURE "ARM64" | #define ARCHITECTURE "ARM64" | ||||
| #define SUBARCHITECTURE "THUNDERX2T99" | #define SUBARCHITECTURE "THUNDERX2T99" | ||||
| @@ -990,7 +1040,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | ||||
| "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | ||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON" | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "thunderx2t99" | #define LIBNAME "thunderx2t99" | ||||
| #define CORENAME "THUNDERX2T99" | #define CORENAME "THUNDERX2T99" | ||||
| #else | #else | ||||
| @@ -75,6 +75,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||||
| if (alpha == ZERO) return; | if (alpha == ZERO) return; | ||||
| if (incx == 0 && incy == 0) { | |||||
| *y += n * alpha *(*x); | |||||
| return; | |||||
| } | |||||
| IDEBUG_START; | IDEBUG_START; | ||||
| FUNCTION_PROFILE_START(); | FUNCTION_PROFILE_START(); | ||||
| @@ -97,7 +97,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint * | |||||
| blas_level1_thread(mode, n, k1, k2, dummyalpha, | blas_level1_thread(mode, n, k1, k2, dummyalpha, | ||||
| a, lda, NULL, 0, ipiv, incx, | a, lda, NULL, 0, ipiv, incx, | ||||
| laswp[flag], nthreads); | |||||
| (int(*)())laswp[flag], nthreads); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -96,7 +96,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint * | |||||
| mode = BLAS_SINGLE | BLAS_COMPLEX; | mode = BLAS_SINGLE | BLAS_COMPLEX; | ||||
| #endif | #endif | ||||
| blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, laswp[flag], nthreads); | |||||
| blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)())laswp[flag], nthreads); | |||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -42,7 +42,7 @@ | |||||
| #include "functable.h" | #include "functable.h" | ||||
| #endif | #endif | ||||
| #if defined(THUNDERX2T99) || defined(VULCAN) | |||||
| #if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) | |||||
| // Multithreaded swap gives performance benefits in ThunderX2T99 | // Multithreaded swap gives performance benefits in ThunderX2T99 | ||||
| #else | #else | ||||
| // Disable multi-threading as it does not show any performance | // Disable multi-threading as it does not show any performance | ||||
| @@ -82,6 +82,12 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in | |||||
| if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; | if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; | ||||
| if (incx == 0 && incy == 0) { | |||||
| *y += n * (alpha_r * (*x) - alpha_i* (*(x+1)) ); | |||||
| *(y+1) += n * (alpha_i * (*x) + alpha_r * (*(x +1)) ); | |||||
| return; | |||||
| } | |||||
| IDEBUG_START; | IDEBUG_START; | ||||
| FUNCTION_PROFILE_START(); | FUNCTION_PROFILE_START(); | ||||
| @@ -43,6 +43,10 @@ | |||||
| #include "functable.h" | #include "functable.h" | ||||
| #endif | #endif | ||||
| // this is smallest dimension N of square input a to permit threading | |||||
| // see graph in issue #1820 for explanation | |||||
| #define MULTI_THREAD_MINIMAL 362 | |||||
| #ifdef XDOUBLE | #ifdef XDOUBLE | ||||
| #define ERROR_NAME "XHEMV " | #define ERROR_NAME "XHEMV " | ||||
| #elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
| @@ -195,7 +199,11 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, void *VALPHA | |||||
| buffer = (FLOAT *)blas_memory_alloc(1); | buffer = (FLOAT *)blas_memory_alloc(1); | ||||
| #ifdef SMP | #ifdef SMP | ||||
| nthreads = num_cpu_avail(2); | |||||
| if (n<MULTI_THREAD_MINIMAL) { | |||||
| nthreads = 1 ; | |||||
| } else { | |||||
| nthreads = num_cpu_avail(2); | |||||
| }; | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -42,6 +42,14 @@ | |||||
| #include "functable.h" | #include "functable.h" | ||||
| #endif | #endif | ||||
| #if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) | |||||
| // Multithreaded swap gives performance benefits in ThunderX2T99 | |||||
| #else | |||||
| // Disable multi-threading as it does not show any performance | |||||
| // benefits. Keep the multi-threading code for the record. | |||||
| #undef SMP | |||||
| #endif | |||||
| #ifndef CBLAS | #ifndef CBLAS | ||||
| void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ | void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ | ||||
| @@ -81,7 +89,7 @@ FLOAT *y = (FLOAT*)vy; | |||||
| #ifdef SMP | #ifdef SMP | ||||
| //disable multi-thread when incx==0 or incy==0 | //disable multi-thread when incx==0 or incy==0 | ||||
| //In that case, the threads would be dependent. | //In that case, the threads would be dependent. | ||||
| if (incx == 0 || incy == 0) | |||||
| if (incx == 0 || incy == 0 || n < 1048576 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT)) | |||||
| nthreads = 1; | nthreads = 1; | ||||
| else | else | ||||
| nthreads = num_cpu_avail(1); | nthreads = num_cpu_avail(1); | ||||
| @@ -88,7 +88,11 @@ lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) | |||||
| $(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F) | $(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F) | ||||
| setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h | setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h | ||||
| ifeq ($(USE_GEMM3M), 1) | |||||
| $(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@ | |||||
| else | |||||
| $(CC) -c $(CFLAGS) $< -o $@ | $(CC) -c $(CFLAGS) $< -o $@ | ||||
| endif | |||||
| setparam$(TSUFFIX).c : setparam-ref.c | setparam$(TSUFFIX).c : setparam-ref.c | ||||
| sed 's/TS/$(TSUFFIX)/g' $< > $(@F) | sed 's/TS/$(TSUFFIX)/g' $< > $(@F) | ||||
| @@ -58,11 +58,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X!, { d4 - d5 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| fldmiad X!, { d6 - d7 } | |||||
| vldmia.f64 X!, { d6 - d7 } | |||||
| vabs.f64 d6, d6 | vabs.f64 d6, d6 | ||||
| vadd.f64 d1 , d1, d5 | vadd.f64 d1 , d1, d5 | ||||
| vabs.f64 d7, d7 | vabs.f64 d7, d7 | ||||
| @@ -73,7 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 } | |||||
| vldmia.f64 X!, { d4 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| @@ -82,22 +82,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S4 | .macro KERNEL_S4 | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| @@ -107,7 +107,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| @@ -118,11 +118,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| fldmias X!, { s4 - s5 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| fldmias X!, { s6 - s7 } | |||||
| vldmia.f32 X!, { s6 - s7 } | |||||
| vabs.f32 s6, s6 | vabs.f32 s6, s6 | ||||
| vadd.f32 s1 , s1, s5 | vadd.f32 s1 , s1, s5 | ||||
| vabs.f32 s7, s7 | vabs.f32 s7, s7 | ||||
| @@ -133,7 +133,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 } | |||||
| vldmia.f32 X!, { s4 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| @@ -142,22 +142,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S4 | .macro KERNEL_S4 | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| @@ -167,7 +167,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| @@ -184,11 +184,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X!, { d4 - d5 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| fldmiad X!, { d6 - d7 } | |||||
| vldmia.f64 X!, { d6 - d7 } | |||||
| vabs.f64 d6, d6 | vabs.f64 d6, d6 | ||||
| vadd.f64 d1 , d1, d5 | vadd.f64 d1 , d1, d5 | ||||
| vabs.f64 d7, d7 | vabs.f64 d7, d7 | ||||
| @@ -196,11 +196,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vadd.f64 d1 , d1, d7 | vadd.f64 d1 , d1, d7 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X!, { d4 - d5 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| fldmiad X!, { d6 - d7 } | |||||
| vldmia.f64 X!, { d6 - d7 } | |||||
| vabs.f64 d6, d6 | vabs.f64 d6, d6 | ||||
| vadd.f64 d1 , d1, d5 | vadd.f64 d1 , d1, d5 | ||||
| vabs.f64 d7, d7 | vabs.f64 d7, d7 | ||||
| @@ -212,11 +212,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 } | |||||
| vldmia.f64 X!, { d4 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| fldmiad X!, { d4 } | |||||
| vldmia.f64 X!, { d4 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| @@ -226,28 +226,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S4 | .macro KERNEL_S4 | ||||
| fldmiad X, { d4 -d5 } | |||||
| vldmia.f64 X, { d4 -d5 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| vadd.f64 d0 , d0, d5 | vadd.f64 d0 , d0, d5 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmiad X, { d4 -d5 } | |||||
| vldmia.f64 X, { d4 -d5 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| vadd.f64 d0 , d0, d5 | vadd.f64 d0 , d0, d5 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmiad X, { d4 -d5 } | |||||
| vldmia.f64 X, { d4 -d5 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| vadd.f64 d0 , d0, d5 | vadd.f64 d0 , d0, d5 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmiad X, { d4 -d5 } | |||||
| vldmia.f64 X, { d4 -d5 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| @@ -259,7 +259,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 -d5 } | |||||
| vldmia.f64 X, { d4 -d5 } | |||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| @@ -273,22 +273,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmias X!, { s4 - s5 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| fldmias X!, { s6 - s7 } | |||||
| vldmia.f32 X!, { s6 - s7 } | |||||
| vabs.f32 s6, s6 | vabs.f32 s6, s6 | ||||
| vadd.f32 s1 , s1, s5 | vadd.f32 s1 , s1, s5 | ||||
| vabs.f32 s7, s7 | vabs.f32 s7, s7 | ||||
| vadd.f32 s0 , s0, s6 | vadd.f32 s0 , s0, s6 | ||||
| vadd.f32 s1 , s1, s7 | vadd.f32 s1 , s1, s7 | ||||
| fldmias X!, { s4 - s5 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| fldmias X!, { s6 - s7 } | |||||
| vldmia.f32 X!, { s6 - s7 } | |||||
| vabs.f32 s6, s6 | vabs.f32 s6, s6 | ||||
| vadd.f32 s1 , s1, s5 | vadd.f32 s1 , s1, s5 | ||||
| vabs.f32 s7, s7 | vabs.f32 s7, s7 | ||||
| @@ -300,11 +300,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 } | |||||
| vldmia.f32 X!, { s4 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| fldmias X!, { s4 } | |||||
| vldmia.f32 X!, { s4 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| @@ -313,28 +313,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S4 | .macro KERNEL_S4 | ||||
| fldmias X, { s4 -s5 } | |||||
| vldmia.f32 X, { s4 -s5 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| vadd.f32 s0 , s0, s5 | vadd.f32 s0 , s0, s5 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmias X, { s4 -s5 } | |||||
| vldmia.f32 X, { s4 -s5 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| vadd.f32 s0 , s0, s5 | vadd.f32 s0 , s0, s5 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmias X, { s4 -s5 } | |||||
| vldmia.f32 X, { s4 -s5 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| vadd.f32 s0 , s0, s5 | vadd.f32 s0 , s0, s5 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fldmias X, { s4 -s5 } | |||||
| vldmia.f32 X, { s4 -s5 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| @@ -346,7 +346,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 -s5 } | |||||
| vldmia.f32 X, { s4 -s5 } | |||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vadd.f32 s0 , s0, s4 | vadd.f32 s0 , s0, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| @@ -146,17 +146,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X!, { d4 - d7 } | |||||
| vldmia.f64 X!, { d4 - d7 } | |||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad Y , { d8 - d11 } | |||||
| vldmia.f64 Y , { d8 - d11 } | |||||
| fmacd d8 , d0, d4 | fmacd d8 , d0, d4 | ||||
| fstmiad Y!, { d8 } | |||||
| vstmia.f64 Y!, { d8 } | |||||
| fmacd d9 , d0, d5 | fmacd d9 , d0, d5 | ||||
| fstmiad Y!, { d9 } | |||||
| vstmia.f64 Y!, { d9 } | |||||
| fmacd d10, d0, d6 | fmacd d10, d0, d6 | ||||
| fstmiad Y!, { d10 } | |||||
| vstmia.f64 Y!, { d10 } | |||||
| fmacd d11, d0, d7 | fmacd d11, d0, d7 | ||||
| fstmiad Y!, { d11 } | |||||
| vstmia.f64 Y!, { d11 } | |||||
| .endm | .endm | ||||
| @@ -164,19 +164,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 } | |||||
| fldmiad Y , { d8 } | |||||
| vldmia.f64 X!, { d4 } | |||||
| vldmia.f64 Y , { d8 } | |||||
| fmacd d8 , d0, d4 | fmacd d8 , d0, d4 | ||||
| fstmiad Y!, { d8 } | |||||
| vstmia.f64 Y!, { d8 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X , { d4 } | |||||
| fldmiad Y , { d8 } | |||||
| vldmia.f64 X , { d4 } | |||||
| vldmia.f64 Y , { d8 } | |||||
| fmacd d8 , d0, d4 | fmacd d8 , d0, d4 | ||||
| fstmiad Y , { d8 } | |||||
| vstmia.f64 Y , { d8 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -186,16 +186,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| fldmias X!, { s4 - s7 } | |||||
| fldmias Y , { s8 - s11 } | |||||
| vldmia.f32 X!, { s4 - s7 } | |||||
| vldmia.f32 Y , { s8 - s11 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fstmias Y!, { s8 } | |||||
| vstmia.f32 Y!, { s8 } | |||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| fstmias Y!, { s9 } | |||||
| vstmia.f32 Y!, { s9 } | |||||
| fmacs s10, s0, s6 | fmacs s10, s0, s6 | ||||
| fstmias Y!, { s10 } | |||||
| vstmia.f32 Y!, { s10 } | |||||
| fmacs s11, s0, s7 | fmacs s11, s0, s7 | ||||
| fstmias Y!, { s11 } | |||||
| vstmia.f32 Y!, { s11 } | |||||
| .endm | .endm | ||||
| @@ -203,19 +203,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 } | |||||
| fldmias Y , { s8 } | |||||
| vldmia.f32 X!, { s4 } | |||||
| vldmia.f32 Y , { s8 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fstmias Y!, { s8 } | |||||
| vstmia.f32 Y!, { s8 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X , { s4 } | |||||
| fldmias Y , { s8 } | |||||
| vldmia.f32 X , { s4 } | |||||
| vldmia.f32 Y , { s8 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fstmias Y , { s8 } | |||||
| vstmia.f32 Y , { s8 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -231,42 +231,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X!, { d4 - d7 } | |||||
| vldmia.f64 X!, { d4 - d7 } | |||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad Y , { d8 - d11 } | |||||
| vldmia.f64 Y , { d8 - d11 } | |||||
| FMAC_R1 d8 , d0, d4 | FMAC_R1 d8 , d0, d4 | ||||
| FMAC_R2 d8 , d1, d5 | FMAC_R2 d8 , d1, d5 | ||||
| FMAC_I1 d9 , d0, d5 | FMAC_I1 d9 , d0, d5 | ||||
| FMAC_I2 d9 , d1, d4 | FMAC_I2 d9 , d1, d4 | ||||
| fstmiad Y!, { d8 } | |||||
| fstmiad Y!, { d9 } | |||||
| vstmia.f64 Y!, { d8 } | |||||
| vstmia.f64 Y!, { d9 } | |||||
| FMAC_R1 d10, d0, d6 | FMAC_R1 d10, d0, d6 | ||||
| FMAC_R2 d10, d1, d7 | FMAC_R2 d10, d1, d7 | ||||
| FMAC_I1 d11, d0, d7 | FMAC_I1 d11, d0, d7 | ||||
| FMAC_I2 d11, d1, d6 | FMAC_I2 d11, d1, d6 | ||||
| fstmiad Y!, { d10 } | |||||
| fstmiad Y!, { d11 } | |||||
| vstmia.f64 Y!, { d10 } | |||||
| vstmia.f64 Y!, { d11 } | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X!, { d4 - d7 } | |||||
| vldmia.f64 X!, { d4 - d7 } | |||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad Y , { d8 - d11 } | |||||
| vldmia.f64 Y , { d8 - d11 } | |||||
| FMAC_R1 d8 , d0, d4 | FMAC_R1 d8 , d0, d4 | ||||
| FMAC_R2 d8 , d1, d5 | FMAC_R2 d8 , d1, d5 | ||||
| FMAC_I1 d9 , d0, d5 | FMAC_I1 d9 , d0, d5 | ||||
| FMAC_I2 d9 , d1, d4 | FMAC_I2 d9 , d1, d4 | ||||
| fstmiad Y!, { d8 } | |||||
| fstmiad Y!, { d9 } | |||||
| vstmia.f64 Y!, { d8 } | |||||
| vstmia.f64 Y!, { d9 } | |||||
| FMAC_R1 d10, d0, d6 | FMAC_R1 d10, d0, d6 | ||||
| FMAC_R2 d10, d1, d7 | FMAC_R2 d10, d1, d7 | ||||
| FMAC_I1 d11, d0, d7 | FMAC_I1 d11, d0, d7 | ||||
| FMAC_I2 d11, d1, d6 | FMAC_I2 d11, d1, d6 | ||||
| fstmiad Y!, { d10 } | |||||
| fstmiad Y!, { d11 } | |||||
| vstmia.f64 Y!, { d10 } | |||||
| vstmia.f64 Y!, { d11 } | |||||
| @@ -277,15 +277,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 - d5 } | |||||
| fldmiad Y , { d8 - d9 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vldmia.f64 Y , { d8 - d9 } | |||||
| FMAC_R1 d8 , d0, d4 | FMAC_R1 d8 , d0, d4 | ||||
| FMAC_R2 d8 , d1, d5 | FMAC_R2 d8 , d1, d5 | ||||
| FMAC_I1 d9 , d0, d5 | FMAC_I1 d9 , d0, d5 | ||||
| FMAC_I2 d9 , d1, d4 | FMAC_I2 d9 , d1, d4 | ||||
| fstmiad Y!, { d8 } | |||||
| fstmiad Y!, { d9 } | |||||
| vstmia.f64 Y!, { d8 } | |||||
| vstmia.f64 Y!, { d9 } | |||||
| @@ -293,14 +293,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X , { d4 - d5 } | |||||
| fldmiad Y , { d8 - d9 } | |||||
| vldmia.f64 X , { d4 - d5 } | |||||
| vldmia.f64 Y , { d8 - d9 } | |||||
| FMAC_R1 d8 , d0, d4 | FMAC_R1 d8 , d0, d4 | ||||
| FMAC_R2 d8 , d1, d5 | FMAC_R2 d8 , d1, d5 | ||||
| FMAC_I1 d9 , d0, d5 | FMAC_I1 d9 , d0, d5 | ||||
| FMAC_I2 d9 , d1, d4 | FMAC_I2 d9 , d1, d4 | ||||
| fstmiad Y , { d8 - d9 } | |||||
| vstmia.f64 Y , { d8 - d9 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -314,40 +314,40 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmias X!, { s4 - s7 } | |||||
| vldmia.f32 X!, { s4 - s7 } | |||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmias Y , { s8 - s11 } | |||||
| vldmia.f32 Y , { s8 - s11 } | |||||
| FMAC_R1 s8 , s0, s4 | FMAC_R1 s8 , s0, s4 | ||||
| FMAC_R2 s8 , s1, s5 | FMAC_R2 s8 , s1, s5 | ||||
| FMAC_I1 s9 , s0, s5 | FMAC_I1 s9 , s0, s5 | ||||
| FMAC_I2 s9 , s1, s4 | FMAC_I2 s9 , s1, s4 | ||||
| fstmias Y!, { s8 } | |||||
| fstmias Y!, { s9 } | |||||
| vstmia.f32 Y!, { s8 } | |||||
| vstmia.f32 Y!, { s9 } | |||||
| FMAC_R1 s10, s0, s6 | FMAC_R1 s10, s0, s6 | ||||
| FMAC_R2 s10, s1, s7 | FMAC_R2 s10, s1, s7 | ||||
| FMAC_I1 s11, s0, s7 | FMAC_I1 s11, s0, s7 | ||||
| FMAC_I2 s11, s1, s6 | FMAC_I2 s11, s1, s6 | ||||
| fstmias Y!, { s10 } | |||||
| fstmias Y!, { s11 } | |||||
| vstmia.f32 Y!, { s10 } | |||||
| vstmia.f32 Y!, { s11 } | |||||
| fldmias X!, { s4 - s7 } | |||||
| fldmias Y , { s8 - s11 } | |||||
| vldmia.f32 X!, { s4 - s7 } | |||||
| vldmia.f32 Y , { s8 - s11 } | |||||
| FMAC_R1 s8 , s0, s4 | FMAC_R1 s8 , s0, s4 | ||||
| FMAC_R2 s8 , s1, s5 | FMAC_R2 s8 , s1, s5 | ||||
| FMAC_I1 s9 , s0, s5 | FMAC_I1 s9 , s0, s5 | ||||
| FMAC_I2 s9 , s1, s4 | FMAC_I2 s9 , s1, s4 | ||||
| fstmias Y!, { s8 } | |||||
| fstmias Y!, { s9 } | |||||
| vstmia.f32 Y!, { s8 } | |||||
| vstmia.f32 Y!, { s9 } | |||||
| FMAC_R1 s10, s0, s6 | FMAC_R1 s10, s0, s6 | ||||
| FMAC_R2 s10, s1, s7 | FMAC_R2 s10, s1, s7 | ||||
| FMAC_I1 s11, s0, s7 | FMAC_I1 s11, s0, s7 | ||||
| FMAC_I2 s11, s1, s6 | FMAC_I2 s11, s1, s6 | ||||
| fstmias Y!, { s10 } | |||||
| fstmias Y!, { s11 } | |||||
| vstmia.f32 Y!, { s10 } | |||||
| vstmia.f32 Y!, { s11 } | |||||
| @@ -358,15 +358,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 - s5 } | |||||
| fldmias Y , { s8 - s9 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vldmia.f32 Y , { s8 - s9 } | |||||
| FMAC_R1 s8 , s0, s4 | FMAC_R1 s8 , s0, s4 | ||||
| FMAC_R2 s8 , s1, s5 | FMAC_R2 s8 , s1, s5 | ||||
| FMAC_I1 s9 , s0, s5 | FMAC_I1 s9 , s0, s5 | ||||
| FMAC_I2 s9 , s1, s4 | FMAC_I2 s9 , s1, s4 | ||||
| fstmias Y!, { s8 } | |||||
| fstmias Y!, { s9 } | |||||
| vstmia.f32 Y!, { s8 } | |||||
| vstmia.f32 Y!, { s9 } | |||||
| @@ -374,14 +374,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X , { s4 - s5 } | |||||
| fldmias Y , { s8 - s9 } | |||||
| vldmia.f32 X , { s4 - s5 } | |||||
| vldmia.f32 Y , { s8 - s9 } | |||||
| FMAC_R1 s8 , s0, s4 | FMAC_R1 s8 , s0, s4 | ||||
| FMAC_R2 s8 , s1, s5 | FMAC_R2 s8 , s1, s5 | ||||
| FMAC_I1 s9 , s0, s5 | FMAC_I1 s9 , s0, s5 | ||||
| FMAC_I2 s9 , s1, s4 | FMAC_I2 s9 , s1, s4 | ||||
| fstmias Y , { s8 - s9 } | |||||
| vstmia.f32 Y , { s8 - s9 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -65,15 +65,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_F4 | .macro COPY_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmias X!, { s0 - s7 } | |||||
| fstmias Y!, { s0 - s7 } | |||||
| vldmia.f32 X!, { s0 - s7 } | |||||
| vstmia.f32 Y!, { s0 - s7 } | |||||
| .endm | .endm | ||||
| .macro COPY_F1 | .macro COPY_F1 | ||||
| fldmias X!, { s0 - s1 } | |||||
| fstmias Y!, { s0 - s1 } | |||||
| vldmia.f32 X!, { s0 - s1 } | |||||
| vstmia.f32 Y!, { s0 - s1 } | |||||
| .endm | .endm | ||||
| @@ -83,23 +83,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_S4 | .macro COPY_S4 | ||||
| nop | nop | ||||
| fldmias X, { s0 - s1 } | |||||
| fstmias Y, { s0 - s1 } | |||||
| vldmia.f32 X, { s0 - s1 } | |||||
| vstmia.f32 Y, { s0 - s1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s2 - s3 } | |||||
| fstmias Y, { s2 - s3 } | |||||
| vldmia.f32 X, { s2 - s3 } | |||||
| vstmia.f32 Y, { s2 - s3 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s0 - s1 } | |||||
| fstmias Y, { s0 - s1 } | |||||
| vldmia.f32 X, { s0 - s1 } | |||||
| vstmia.f32 Y, { s0 - s1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s2 - s3 } | |||||
| fstmias Y, { s2 - s3 } | |||||
| vldmia.f32 X, { s2 - s3 } | |||||
| vstmia.f32 Y, { s2 - s3 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -108,8 +108,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_S1 | .macro COPY_S1 | ||||
| fldmias X, { s0 - s1 } | |||||
| fstmias Y, { s0 - s1 } | |||||
| vldmia.f32 X, { s0 - s1 } | |||||
| vstmia.f32 Y, { s0 - s1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -76,30 +76,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmias X!, { s4 - s5 } | |||||
| fldmias Y!, { s8 - s9 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vldmia.f32 Y!, { s8 - s9 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fmacs s1 , s4, s9 | fmacs s1 , s4, s9 | ||||
| fldmias X!, { s6 - s7 } | |||||
| vldmia.f32 X!, { s6 - s7 } | |||||
| fmacs s2 , s5, s9 | fmacs s2 , s5, s9 | ||||
| fmacs s3 , s5, s8 | fmacs s3 , s5, s8 | ||||
| fldmias Y!, { s10 - s11 } | |||||
| vldmia.f32 Y!, { s10 - s11 } | |||||
| fmacs s0 , s6, s10 | fmacs s0 , s6, s10 | ||||
| fmacs s1 , s6, s11 | fmacs s1 , s6, s11 | ||||
| fmacs s2 , s7, s11 | fmacs s2 , s7, s11 | ||||
| fmacs s3 , s7, s10 | fmacs s3 , s7, s10 | ||||
| fldmias X!, { s4 - s5 } | |||||
| fldmias Y!, { s8 - s9 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vldmia.f32 Y!, { s8 - s9 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fmacs s1 , s4, s9 | fmacs s1 , s4, s9 | ||||
| fldmias X!, { s6 - s7 } | |||||
| vldmia.f32 X!, { s6 - s7 } | |||||
| fmacs s2 , s5, s9 | fmacs s2 , s5, s9 | ||||
| fmacs s3 , s5, s8 | fmacs s3 , s5, s8 | ||||
| fldmias Y!, { s10 - s11 } | |||||
| vldmia.f32 Y!, { s10 - s11 } | |||||
| fmacs s0 , s6, s10 | fmacs s0 , s6, s10 | ||||
| fmacs s1 , s6, s11 | fmacs s1 , s6, s11 | ||||
| fmacs s2 , s7, s11 | fmacs s2 , s7, s11 | ||||
| @@ -109,8 +109,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 - s5 } | |||||
| fldmias Y!, { s8 - s9 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vldmia.f32 Y!, { s8 - s9 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fmacs s1 , s4, s9 | fmacs s1 , s4, s9 | ||||
| fmacs s2 , s5, s9 | fmacs s2 , s5, s9 | ||||
| @@ -125,8 +125,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| nop | nop | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s8 - s9 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s8 - s9 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fmacs s1 , s4, s9 | fmacs s1 , s4, s9 | ||||
| fmacs s2 , s5, s9 | fmacs s2 , s5, s9 | ||||
| @@ -134,8 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s8 - s9 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s8 - s9 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fmacs s1 , s4, s9 | fmacs s1 , s4, s9 | ||||
| fmacs s2 , s5, s9 | fmacs s2 , s5, s9 | ||||
| @@ -143,8 +143,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s8 - s9 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s8 - s9 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fmacs s1 , s4, s9 | fmacs s1 , s4, s9 | ||||
| fmacs s2 , s5, s9 | fmacs s2 , s5, s9 | ||||
| @@ -152,8 +152,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s8 - s9 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s8 - s9 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fmacs s1 , s4, s9 | fmacs s1 , s4, s9 | ||||
| fmacs s2 , s5, s9 | fmacs s2 , s5, s9 | ||||
| @@ -166,8 +166,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s8 - s9 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s8 - s9 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fmacs s1 , s4, s9 | fmacs s1 , s4, s9 | ||||
| fmacs s2 , s5, s9 | fmacs s2 , s5, s9 | ||||
| @@ -165,9 +165,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_I | .macro KERNEL2x2_I | ||||
| pld [ AO, #A_PRE ] | pld [ AO, #A_PRE ] | ||||
| fldmias AO!, { s0 - s3 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| pld [ BO, #B_PRE ] | pld [ BO, #B_PRE ] | ||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmuls s8 , s0, s4 | fmuls s8 , s0, s4 | ||||
| @@ -197,9 +197,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_M1 | .macro KERNEL2x2_M1 | ||||
| pld [ AO, #A_PRE ] | pld [ AO, #A_PRE ] | ||||
| fldmias AO!, { s0 - s3 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| pld [ BO, #B_PRE ] | pld [ BO, #B_PRE ] | ||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| @@ -225,8 +225,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_M2 | .macro KERNEL2x2_M2 | ||||
| fldmias AO!, { s0 - s3 } | |||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| @@ -254,8 +254,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_E | .macro KERNEL2x2_E | ||||
| fldmias AO!, { s0 - s3 } | |||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| @@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_SUB | .macro KERNEL2x2_SUB | ||||
| fldmias AO!, { s0 - s3 } | |||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| @@ -317,7 +317,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias CO1, { s4 - s7 } | |||||
| vldmia.f32 CO1, { s4 - s7 } | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| @@ -329,9 +329,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s11 | FMAC_R2 s6 , s1 , s11 | ||||
| FMAC_I2 s7 , s1 , s10 | FMAC_I2 s7 , s1 , s10 | ||||
| fstmias CO1, { s4 - s7 } | |||||
| vstmia.f32 CO1, { s4 - s7 } | |||||
| fldmias CO2, { s4 - s7 } | |||||
| vldmia.f32 CO2, { s4 - s7 } | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| @@ -343,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s15 | FMAC_R2 s6 , s1 , s15 | ||||
| FMAC_I2 s7 , s1 , s14 | FMAC_I2 s7 , s1 , s14 | ||||
| fstmias CO2, { s4 - s7 } | |||||
| vstmia.f32 CO2, { s4 - s7 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -500,23 +500,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias CO1, { s4 - s5 } | |||||
| vldmia.f32 CO1, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| FMAC_R2 s4 , s1 , s9 | FMAC_R2 s4 , s1 , s9 | ||||
| FMAC_I2 s5 , s1 , s8 | FMAC_I2 s5 , s1 , s8 | ||||
| fstmias CO1, { s4 - s5 } | |||||
| vstmia.f32 CO1, { s4 - s5 } | |||||
| fldmias CO2, { s4 - s5 } | |||||
| vldmia.f32 CO2, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| FMAC_R2 s4 , s1 , s13 | FMAC_R2 s4 , s1 , s13 | ||||
| FMAC_I2 s5 , s1 , s12 | FMAC_I2 s5 , s1 , s12 | ||||
| fstmias CO2, { s4 - s5 } | |||||
| vstmia.f32 CO2, { s4 - s5 } | |||||
| add CO1, CO1, #8 | add CO1, CO1, #8 | ||||
| @@ -671,7 +671,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias CO1, { s4 - s7 } | |||||
| vldmia.f32 CO1, { s4 - s7 } | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| @@ -683,7 +683,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s11 | FMAC_R2 s6 , s1 , s11 | ||||
| FMAC_I2 s7 , s1 , s10 | FMAC_I2 s7 , s1 , s10 | ||||
| fstmias CO1, { s4 - s7 } | |||||
| vstmia.f32 CO1, { s4 - s7 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -800,14 +800,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias CO1, { s4 - s5 } | |||||
| vldmia.f32 CO1, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| FMAC_R2 s4 , s1 , s9 | FMAC_R2 s4 , s1 , s9 | ||||
| FMAC_I2 s5 , s1 , s8 | FMAC_I2 s5 , s1 , s8 | ||||
| fstmias CO1, { s4 - s5 } | |||||
| vstmia.f32 CO1, { s4 - s5 } | |||||
| add CO1, CO1, #8 | add CO1, CO1, #8 | ||||
| @@ -182,30 +182,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_I | .macro KERNEL2x2_I | ||||
| pld [ AO , #A_PRE ] | pld [ AO , #A_PRE ] | ||||
| pld [ BO , #B_PRE ] | pld [ BO , #B_PRE ] | ||||
| fldmias AO!, { s0 - s1 } | |||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| fmuls s16 , s0, s8 | fmuls s16 , s0, s8 | ||||
| fmuls s24 , s1, s9 | fmuls s24 , s1, s9 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmuls s17 , s0, s9 | fmuls s17 , s0, s9 | ||||
| fmuls s25 , s1, s8 | fmuls s25 , s1, s8 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmuls s18 , s2, s8 | fmuls s18 , s2, s8 | ||||
| fmuls s26 , s3, s9 | fmuls s26 , s3, s9 | ||||
| fldmias AO!, { s4 - s5 } | |||||
| vldmia.f32 AO!, { s4 - s5 } | |||||
| fmuls s19 , s2, s9 | fmuls s19 , s2, s9 | ||||
| fmuls s27 , s3, s8 | fmuls s27 , s3, s8 | ||||
| fldmias BO!, { s12 - s13 } | |||||
| vldmia.f32 BO!, { s12 - s13 } | |||||
| fmuls s20 , s0, s10 | fmuls s20 , s0, s10 | ||||
| fmuls s28 , s1, s11 | fmuls s28 , s1, s11 | ||||
| fldmias AO!, { s6 - s7 } | |||||
| vldmia.f32 AO!, { s6 - s7 } | |||||
| fmuls s21 , s0, s11 | fmuls s21 , s0, s11 | ||||
| fmuls s29 , s1, s10 | fmuls s29 , s1, s10 | ||||
| fldmias BO!, { s14 - s15 } | |||||
| vldmia.f32 BO!, { s14 - s15 } | |||||
| fmuls s22 , s2, s10 | fmuls s22 , s2, s10 | ||||
| fmuls s30 , s3, s11 | fmuls s30 , s3, s11 | ||||
| fmuls s23 , s2, s11 | fmuls s23 , s2, s11 | ||||
| @@ -218,17 +218,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_M1 | .macro KERNEL2x2_M1 | ||||
| fmacs s16 , s0, s8 | fmacs s16 , s0, s8 | ||||
| fldmias AO!, { s4 - s5 } | |||||
| vldmia.f32 AO!, { s4 - s5 } | |||||
| fmacs s24 , s1, s9 | fmacs s24 , s1, s9 | ||||
| fmacs s17 , s0, s9 | fmacs s17 , s0, s9 | ||||
| fldmias BO!, { s12 - s13 } | |||||
| vldmia.f32 BO!, { s12 - s13 } | |||||
| fmacs s25 , s1, s8 | fmacs s25 , s1, s8 | ||||
| fmacs s18 , s2, s8 | fmacs s18 , s2, s8 | ||||
| fldmias AO!, { s6 - s7 } | |||||
| vldmia.f32 AO!, { s6 - s7 } | |||||
| fmacs s26 , s3, s9 | fmacs s26 , s3, s9 | ||||
| fmacs s19 , s2, s9 | fmacs s19 , s2, s9 | ||||
| fldmias BO!, { s14 - s15 } | |||||
| vldmia.f32 BO!, { s14 - s15 } | |||||
| fmacs s27 , s3, s8 | fmacs s27 , s3, s8 | ||||
| fmacs s20 , s0, s10 | fmacs s20 , s0, s10 | ||||
| @@ -250,19 +250,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ BO , #B_PRE ] | pld [ BO , #B_PRE ] | ||||
| fmacs s24 , s5, s13 | fmacs s24 , s5, s13 | ||||
| fmacs s17 , s4, s13 | fmacs s17 , s4, s13 | ||||
| fldmias AO!, { s0 - s1 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| fmacs s25 , s5, s12 | fmacs s25 , s5, s12 | ||||
| fmacs s18 , s6, s12 | fmacs s18 , s6, s12 | ||||
| fmacs s26 , s7, s13 | fmacs s26 , s7, s13 | ||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| fmacs s19 , s6, s13 | fmacs s19 , s6, s13 | ||||
| fmacs s27 , s7, s12 | fmacs s27 , s7, s12 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmacs s20 , s4, s14 | fmacs s20 , s4, s14 | ||||
| fmacs s28 , s5, s15 | fmacs s28 , s5, s15 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmacs s21 , s4, s15 | fmacs s21 , s4, s15 | ||||
| fmacs s29 , s5, s14 | fmacs s29 , s5, s14 | ||||
| @@ -300,16 +300,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_SUB | .macro KERNEL2x2_SUB | ||||
| fldmias AO!, { s0 - s1 } | |||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| fmacs s16 , s0, s8 | fmacs s16 , s0, s8 | ||||
| fmacs s24 , s1, s9 | fmacs s24 , s1, s9 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmacs s17 , s0, s9 | fmacs s17 , s0, s9 | ||||
| fmacs s25 , s1, s8 | fmacs s25 , s1, s8 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmacs s18 , s2, s8 | fmacs s18 , s2, s8 | ||||
| fmacs s26 , s3, s9 | fmacs s26 , s3, s9 | ||||
| fmacs s19 , s2, s9 | fmacs s19 , s2, s9 | ||||
| @@ -338,8 +338,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias CO1, { s4 - s7 } | |||||
| fldmias CO2, { s8 - s11 } | |||||
| vldmia.f32 CO1, { s4 - s7 } | |||||
| vldmia.f32 CO2, { s8 - s11 } | |||||
| FADD_R s16, s24 , s16 | FADD_R s16, s24 , s16 | ||||
| FADD_I s17, s25 , s17 | FADD_I s17, s25 , s17 | ||||
| @@ -370,8 +370,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s10, s1 , s23 | FMAC_R2 s10, s1 , s23 | ||||
| FMAC_I2 s11, s1 , s22 | FMAC_I2 s11, s1 , s22 | ||||
| fstmias CO1, { s4 - s7 } | |||||
| fstmias CO2, { s8 - s11 } | |||||
| vstmia.f32 CO1, { s4 - s7 } | |||||
| vstmia.f32 CO2, { s8 - s11 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -534,8 +534,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias CO1, { s4 - s5 } | |||||
| fldmias CO2, { s8 - s9 } | |||||
| vldmia.f32 CO1, { s4 - s5 } | |||||
| vldmia.f32 CO2, { s8 - s9 } | |||||
| FADD_R s16, s24 , s16 | FADD_R s16, s24 , s16 | ||||
| FADD_I s17, s25 , s17 | FADD_I s17, s25 , s17 | ||||
| @@ -552,8 +552,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s8 , s1 , s21 | FMAC_R2 s8 , s1 , s21 | ||||
| FMAC_I2 s9 , s1 , s20 | FMAC_I2 s9 , s1 , s20 | ||||
| fstmias CO1, { s4 - s5 } | |||||
| fstmias CO2, { s8 - s9 } | |||||
| vstmia.f32 CO1, { s4 - s5 } | |||||
| vstmia.f32 CO2, { s8 - s9 } | |||||
| add CO1, CO1, #8 | add CO1, CO1, #8 | ||||
| @@ -716,7 +716,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias CO1, { s4 - s7 } | |||||
| vldmia.f32 CO1, { s4 - s7 } | |||||
| FADD_R s16, s24 , s16 | FADD_R s16, s24 , s16 | ||||
| FADD_I s17, s25 , s17 | FADD_I s17, s25 , s17 | ||||
| @@ -733,7 +733,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s19 | FMAC_R2 s6 , s1 , s19 | ||||
| FMAC_I2 s7 , s1 , s18 | FMAC_I2 s7 , s1 , s18 | ||||
| fstmias CO1, { s4 - s7 } | |||||
| vstmia.f32 CO1, { s4 - s7 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -851,7 +851,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias CO1, { s4 - s5 } | |||||
| vldmia.f32 CO1, { s4 - s5 } | |||||
| FADD_R s16, s24 , s16 | FADD_R s16, s24 , s16 | ||||
| FADD_I s17, s25 , s17 | FADD_I s17, s25 , s17 | ||||
| @@ -861,7 +861,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s4 , s1 , s17 | FMAC_R2 s4 , s1 , s17 | ||||
| FMAC_I2 s5 , s1 , s16 | FMAC_I2 s5 , s1 , s16 | ||||
| fstmias CO1, { s4 - s5 } | |||||
| vstmia.f32 CO1, { s4 - s5 } | |||||
| add CO1, CO1, #8 | add CO1, CO1, #8 | ||||
| @@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s6 , [ AO2, #8 ] | flds s6 , [ AO2, #8 ] | ||||
| flds s7 , [ AO2, #12 ] | flds s7 , [ AO2, #12 ] | ||||
| fstmias BO!, { s0 - s7 } | |||||
| vstmia.f32 BO!, { s0 - s7 } | |||||
| add AO2, AO2, #16 | add AO2, AO2, #16 | ||||
| .endm | .endm | ||||
| @@ -99,7 +99,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s3 , [ AO2, #4 ] | flds s3 , [ AO2, #4 ] | ||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| fstmias BO!, { s0 - s3 } | |||||
| vstmia.f32 BO!, { s0 - s3 } | |||||
| add AO2, AO2, #8 | add AO2, AO2, #8 | ||||
| .endm | .endm | ||||
| @@ -111,7 +111,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s2 , [ AO1, #8 ] | flds s2 , [ AO1, #8 ] | ||||
| flds s3 , [ AO1, #12 ] | flds s3 , [ AO1, #12 ] | ||||
| fstmias BO!, { s0 - s3 } | |||||
| vstmia.f32 BO!, { s0 - s3 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| .endm | .endm | ||||
| @@ -122,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0 , [ AO1, #0 ] | flds s0 , [ AO1, #0 ] | ||||
| flds s1 , [ AO1, #4 ] | flds s1 , [ AO1, #4 ] | ||||
| fstmias BO!, { s0 - s1 } | |||||
| vstmia.f32 BO!, { s0 - s1 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| .endm | .endm | ||||
| @@ -73,12 +73,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| **************************************************************************************/ | **************************************************************************************/ | ||||
| .macro COPY2x2 | .macro COPY2x2 | ||||
| fldmias AO1, { s0 - s3 } | |||||
| vldmia.f32 AO1, { s0 - s3 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmias r3, { s4 - s7 } | |||||
| vldmia.f32 r3, { s4 - s7 } | |||||
| fstmias BO1, { s0 - s7 } | |||||
| vstmia.f32 BO1, { s0 - s7 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -86,12 +86,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x2 | .macro COPY1x2 | ||||
| fldmias AO1, { s0 -s1 } | |||||
| vldmia.f32 AO1, { s0 -s1 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmias r3, { s2 - s3 } | |||||
| vldmia.f32 r3, { s2 - s3 } | |||||
| fstmias BO2, { s0 - s3 } | |||||
| vstmia.f32 BO2, { s0 - s3 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| add BO2, BO2, #16 | add BO2, BO2, #16 | ||||
| @@ -100,9 +100,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /*************************************************************************************************************************/ | /*************************************************************************************************************************/ | ||||
| .macro COPY2x1 | .macro COPY2x1 | ||||
| fldmias AO1, { s0 - s3 } | |||||
| vldmia.f32 AO1, { s0 - s3 } | |||||
| fstmias BO1, { s0 - s3 } | |||||
| vstmia.f32 BO1, { s0 - s3 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -110,9 +110,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x1 | .macro COPY1x1 | ||||
| fldmias AO1, { s0 - s1 } | |||||
| vldmia.f32 AO1, { s0 - s1 } | |||||
| fstmias BO2, { s0 - s1 } | |||||
| vstmia.f32 BO2, { s0 - s1 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| add BO2, BO2, #8 | add BO2, BO2, #8 | ||||
| @@ -201,7 +201,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias YO, { s4 - s7 } | |||||
| vldmia.f32 YO, { s4 - s7 } | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| @@ -213,9 +213,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s11 | FMAC_R2 s6 , s1 , s11 | ||||
| FMAC_I2 s7 , s1 , s10 | FMAC_I2 s7 , s1 , s10 | ||||
| fstmias YO!, { s4 - s7 } | |||||
| vstmia.f32 YO!, { s4 - s7 } | |||||
| fldmias YO, { s4 - s7 } | |||||
| vldmia.f32 YO, { s4 - s7 } | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| @@ -227,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s15 | FMAC_R2 s6 , s1 , s15 | ||||
| FMAC_I2 s7 , s1 , s14 | FMAC_I2 s7 , s1 , s14 | ||||
| fstmias YO!, { s4 - s7 } | |||||
| vstmia.f32 YO!, { s4 - s7 } | |||||
| .endm | .endm | ||||
| @@ -266,14 +266,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias YO, { s4 - s5 } | |||||
| vldmia.f32 YO, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| FMAC_R2 s4 , s1 , s9 | FMAC_R2 s4 , s1 , s9 | ||||
| FMAC_I2 s5 , s1 , s8 | FMAC_I2 s5 , s1 , s8 | ||||
| fstmias YO, { s4 - s5 } | |||||
| vstmia.f32 YO, { s4 - s5 } | |||||
| add YO, YO, #8 | add YO, YO, #8 | ||||
| @@ -349,47 +349,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias YO, { s4 - s5 } | |||||
| vldmia.f32 YO, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| FMAC_R2 s4 , s1 , s9 | FMAC_R2 s4 , s1 , s9 | ||||
| FMAC_I2 s5 , s1 , s8 | FMAC_I2 s5 , s1 , s8 | ||||
| fstmias YO, { s4 - s5 } | |||||
| vstmia.f32 YO, { s4 - s5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s6 - s7 } | |||||
| vldmia.f32 YO, { s6 - s7 } | |||||
| FMAC_R1 s6 , s0 , s10 | FMAC_R1 s6 , s0 , s10 | ||||
| FMAC_I1 s7 , s0 , s11 | FMAC_I1 s7 , s0 , s11 | ||||
| FMAC_R2 s6 , s1 , s11 | FMAC_R2 s6 , s1 , s11 | ||||
| FMAC_I2 s7 , s1 , s10 | FMAC_I2 s7 , s1 , s10 | ||||
| fstmias YO, { s6 - s7 } | |||||
| vstmia.f32 YO, { s6 - s7 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s4 - s5 } | |||||
| vldmia.f32 YO, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| FMAC_R2 s4 , s1 , s13 | FMAC_R2 s4 , s1 , s13 | ||||
| FMAC_I2 s5 , s1 , s12 | FMAC_I2 s5 , s1 , s12 | ||||
| fstmias YO, { s4 - s5 } | |||||
| vstmia.f32 YO, { s4 - s5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s6 - s7 } | |||||
| vldmia.f32 YO, { s6 - s7 } | |||||
| FMAC_R1 s6 , s0 , s14 | FMAC_R1 s6 , s0 , s14 | ||||
| FMAC_I1 s7 , s0 , s15 | FMAC_I1 s7 , s0 , s15 | ||||
| FMAC_R2 s6 , s1 , s15 | FMAC_R2 s6 , s1 , s15 | ||||
| FMAC_I2 s7 , s1 , s14 | FMAC_I2 s7 , s1 , s14 | ||||
| fstmias YO, { s6 - s7 } | |||||
| vstmia.f32 YO, { s6 - s7 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| @@ -430,14 +430,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA_R | flds s0, ALPHA_R | ||||
| flds s1, ALPHA_I | flds s1, ALPHA_I | ||||
| fldmias YO, { s4 - s5 } | |||||
| vldmia.f32 YO, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s8 | FMAC_R1 s4 , s0 , s8 | ||||
| FMAC_I1 s5 , s0 , s9 | FMAC_I1 s5 , s0 , s9 | ||||
| FMAC_R2 s4 , s1 , s9 | FMAC_R2 s4 , s1 , s9 | ||||
| FMAC_I2 s5 , s1 , s8 | FMAC_I2 s5 , s1 , s8 | ||||
| fstmias YO, { s4 - s5 } | |||||
| vstmia.f32 YO, { s4 - s5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| @@ -150,9 +150,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X1 | .macro KERNEL_F2X1 | ||||
| fldmias XO! , { s2 - s3 } | |||||
| fldmias AO1!, { s4 - s5 } | |||||
| fldmias AO2!, { s8 - s9 } | |||||
| vldmia.f32 XO! , { s2 - s3 } | |||||
| vldmia.f32 AO1!, { s4 - s5 } | |||||
| vldmia.f32 AO2!, { s8 - s9 } | |||||
| fmacs s12 , s4 , s2 | fmacs s12 , s4 , s2 | ||||
| fmacs s13 , s4 , s3 | fmacs s13 , s4 , s3 | ||||
| @@ -168,7 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F2 | .macro SAVE_F2 | ||||
| fldmias YO, { s4 - s7 } | |||||
| vldmia.f32 YO, { s4 - s7 } | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| @@ -180,7 +180,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s15 | FMAC_R2 s6 , s1 , s15 | ||||
| FMAC_I2 s7 , s1 , s14 | FMAC_I2 s7 , s1 , s14 | ||||
| fstmias YO!, { s4 - s7 } | |||||
| vstmia.f32 YO!, { s4 - s7 } | |||||
| .endm | .endm | ||||
| @@ -204,8 +204,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmias XO! , { s2 - s3 } | |||||
| fldmias AO1!, { s4 - s5 } | |||||
| vldmia.f32 XO! , { s2 - s3 } | |||||
| vldmia.f32 AO1!, { s4 - s5 } | |||||
| fmacs s12 , s4 , s2 | fmacs s12 , s4 , s2 | ||||
| fmacs s13 , s4 , s3 | fmacs s13 , s4 , s3 | ||||
| @@ -216,14 +216,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmias YO, { s4 - s5 } | |||||
| vldmia.f32 YO, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| FMAC_R2 s4 , s1 , s13 | FMAC_R2 s4 , s1 , s13 | ||||
| FMAC_I2 s5 , s1 , s12 | FMAC_I2 s5 , s1 , s12 | ||||
| fstmias YO!, { s4 - s5 } | |||||
| vstmia.f32 YO!, { s4 - s5 } | |||||
| .endm | .endm | ||||
| @@ -249,9 +249,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X1 | .macro KERNEL_S2X1 | ||||
| fldmias XO , { s2 - s3 } | |||||
| fldmias AO1!, { s4 - s5 } | |||||
| fldmias AO2!, { s8 - s9 } | |||||
| vldmia.f32 XO , { s2 - s3 } | |||||
| vldmia.f32 AO1!, { s4 - s5 } | |||||
| vldmia.f32 AO2!, { s8 - s9 } | |||||
| fmacs s12 , s4 , s2 | fmacs s12 , s4 , s2 | ||||
| fmacs s13 , s4 , s3 | fmacs s13 , s4 , s3 | ||||
| @@ -269,25 +269,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S2 | .macro SAVE_S2 | ||||
| fldmias YO, { s4 - s5 } | |||||
| vldmia.f32 YO, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| FMAC_R2 s4 , s1 , s13 | FMAC_R2 s4 , s1 , s13 | ||||
| FMAC_I2 s5 , s1 , s12 | FMAC_I2 s5 , s1 , s12 | ||||
| fstmias YO, { s4 - s5 } | |||||
| vstmia.f32 YO, { s4 - s5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s6 - s7 } | |||||
| vldmia.f32 YO, { s6 - s7 } | |||||
| FMAC_R1 s6 , s0 , s14 | FMAC_R1 s6 , s0 , s14 | ||||
| FMAC_I1 s7 , s0 , s15 | FMAC_I1 s7 , s0 , s15 | ||||
| FMAC_R2 s6 , s1 , s15 | FMAC_R2 s6 , s1 , s15 | ||||
| FMAC_I2 s7 , s1 , s14 | FMAC_I2 s7 , s1 , s14 | ||||
| fstmias YO, { s6 - s7 } | |||||
| vstmia.f32 YO, { s6 - s7 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| @@ -313,8 +313,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmias XO , { s2 - s3 } | |||||
| fldmias AO1!, { s4 - s5 } | |||||
| vldmia.f32 XO , { s2 - s3 } | |||||
| vldmia.f32 AO1!, { s4 - s5 } | |||||
| fmacs s12 , s4 , s2 | fmacs s12 , s4 , s2 | ||||
| fmacs s13 , s4 , s3 | fmacs s13 , s4 , s3 | ||||
| @@ -327,14 +327,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmias YO, { s4 - s5 } | |||||
| vldmia.f32 YO, { s4 - s5 } | |||||
| FMAC_R1 s4 , s0 , s12 | FMAC_R1 s4 , s0 , s12 | ||||
| FMAC_I1 s5 , s0 , s13 | FMAC_I1 s5 , s0 , s13 | ||||
| FMAC_R2 s4 , s1 , s13 | FMAC_R2 s4 , s1 , s13 | ||||
| FMAC_I2 s5 , s1 , s12 | FMAC_I2 s5 , s1 , s12 | ||||
| fstmias YO, { s4 - s5 } | |||||
| vstmia.f32 YO, { s4 - s5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| @@ -165,9 +165,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_I | .macro KERNEL2x2_I | ||||
| pld [ AO, #A_PRE ] | pld [ AO, #A_PRE ] | ||||
| fldmias AO!, { s0 - s3 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| pld [ BO, #B_PRE ] | pld [ BO, #B_PRE ] | ||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmuls s8 , s0, s4 | fmuls s8 , s0, s4 | ||||
| @@ -197,9 +197,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_M1 | .macro KERNEL2x2_M1 | ||||
| pld [ AO, #A_PRE ] | pld [ AO, #A_PRE ] | ||||
| fldmias AO!, { s0 - s3 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| pld [ BO, #B_PRE ] | pld [ BO, #B_PRE ] | ||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| @@ -225,8 +225,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_M2 | .macro KERNEL2x2_M2 | ||||
| fldmias AO!, { s0 - s3 } | |||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| @@ -254,8 +254,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_E | .macro KERNEL2x2_E | ||||
| fldmias AO!, { s0 - s3 } | |||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| @@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_SUB | .macro KERNEL2x2_SUB | ||||
| fldmias AO!, { s0 - s3 } | |||||
| fldmias BO!, { s4 - s7 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| vldmia.f32 BO!, { s4 - s7 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s0, s5 | fmacs s9 , s0, s5 | ||||
| @@ -331,7 +331,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s11 | FMAC_R2 s6 , s1 , s11 | ||||
| FMAC_I2 s7 , s1 , s10 | FMAC_I2 s7 , s1 , s10 | ||||
| fstmias CO1, { s4 - s7 } | |||||
| vstmia.f32 CO1, { s4 - s7 } | |||||
| flds s4, FP_ZERO | flds s4, FP_ZERO | ||||
| vmov.f32 s5, s4 | vmov.f32 s5, s4 | ||||
| @@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s15 | FMAC_R2 s6 , s1 , s15 | ||||
| FMAC_I2 s7 , s1 , s14 | FMAC_I2 s7 , s1 , s14 | ||||
| fstmias CO2, { s4 - s7 } | |||||
| vstmia.f32 CO2, { s4 - s7 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -513,7 +513,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s4 , s1 , s9 | FMAC_R2 s4 , s1 , s9 | ||||
| FMAC_I2 s5 , s1 , s8 | FMAC_I2 s5 , s1 , s8 | ||||
| fstmias CO1, { s4 - s5 } | |||||
| vstmia.f32 CO1, { s4 - s5 } | |||||
| flds s4, FP_ZERO | flds s4, FP_ZERO | ||||
| vmov.f32 s5, s4 | vmov.f32 s5, s4 | ||||
| @@ -523,7 +523,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s4 , s1 , s13 | FMAC_R2 s4 , s1 , s13 | ||||
| FMAC_I2 s5 , s1 , s12 | FMAC_I2 s5 , s1 , s12 | ||||
| fstmias CO2, { s4 - s5 } | |||||
| vstmia.f32 CO2, { s4 - s5 } | |||||
| add CO1, CO1, #8 | add CO1, CO1, #8 | ||||
| @@ -693,7 +693,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s11 | FMAC_R2 s6 , s1 , s11 | ||||
| FMAC_I2 s7 , s1 , s10 | FMAC_I2 s7 , s1 , s10 | ||||
| fstmias CO1, { s4 - s7 } | |||||
| vstmia.f32 CO1, { s4 - s7 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -818,7 +818,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s4 , s1 , s9 | FMAC_R2 s4 , s1 , s9 | ||||
| FMAC_I2 s5 , s1 , s8 | FMAC_I2 s5 , s1 , s8 | ||||
| fstmias CO1, { s4 - s5 } | |||||
| vstmia.f32 CO1, { s4 - s5 } | |||||
| add CO1, CO1, #8 | add CO1, CO1, #8 | ||||
| @@ -170,30 +170,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_I | .macro KERNEL2x2_I | ||||
| pld [ AO , #A_PRE ] | pld [ AO , #A_PRE ] | ||||
| pld [ BO , #B_PRE ] | pld [ BO , #B_PRE ] | ||||
| fldmias AO!, { s0 - s1 } | |||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| fmuls s16 , s0, s8 | fmuls s16 , s0, s8 | ||||
| fmuls s24 , s1, s9 | fmuls s24 , s1, s9 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmuls s17 , s0, s9 | fmuls s17 , s0, s9 | ||||
| fmuls s25 , s1, s8 | fmuls s25 , s1, s8 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmuls s18 , s2, s8 | fmuls s18 , s2, s8 | ||||
| fmuls s26 , s3, s9 | fmuls s26 , s3, s9 | ||||
| fldmias AO!, { s4 - s5 } | |||||
| vldmia.f32 AO!, { s4 - s5 } | |||||
| fmuls s19 , s2, s9 | fmuls s19 , s2, s9 | ||||
| fmuls s27 , s3, s8 | fmuls s27 , s3, s8 | ||||
| fldmias BO!, { s12 - s13 } | |||||
| vldmia.f32 BO!, { s12 - s13 } | |||||
| fmuls s20 , s0, s10 | fmuls s20 , s0, s10 | ||||
| fmuls s28 , s1, s11 | fmuls s28 , s1, s11 | ||||
| fldmias AO!, { s6 - s7 } | |||||
| vldmia.f32 AO!, { s6 - s7 } | |||||
| fmuls s21 , s0, s11 | fmuls s21 , s0, s11 | ||||
| fmuls s29 , s1, s10 | fmuls s29 , s1, s10 | ||||
| fldmias BO!, { s14 - s15 } | |||||
| vldmia.f32 BO!, { s14 - s15 } | |||||
| fmuls s22 , s2, s10 | fmuls s22 , s2, s10 | ||||
| fmuls s30 , s3, s11 | fmuls s30 , s3, s11 | ||||
| fmuls s23 , s2, s11 | fmuls s23 , s2, s11 | ||||
| @@ -206,17 +206,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_M1 | .macro KERNEL2x2_M1 | ||||
| fmacs s16 , s0, s8 | fmacs s16 , s0, s8 | ||||
| fldmias AO!, { s4 - s5 } | |||||
| vldmia.f32 AO!, { s4 - s5 } | |||||
| fmacs s24 , s1, s9 | fmacs s24 , s1, s9 | ||||
| fmacs s17 , s0, s9 | fmacs s17 , s0, s9 | ||||
| fldmias BO!, { s12 - s13 } | |||||
| vldmia.f32 BO!, { s12 - s13 } | |||||
| fmacs s25 , s1, s8 | fmacs s25 , s1, s8 | ||||
| fmacs s18 , s2, s8 | fmacs s18 , s2, s8 | ||||
| fldmias AO!, { s6 - s7 } | |||||
| vldmia.f32 AO!, { s6 - s7 } | |||||
| fmacs s26 , s3, s9 | fmacs s26 , s3, s9 | ||||
| fmacs s19 , s2, s9 | fmacs s19 , s2, s9 | ||||
| fldmias BO!, { s14 - s15 } | |||||
| vldmia.f32 BO!, { s14 - s15 } | |||||
| fmacs s27 , s3, s8 | fmacs s27 , s3, s8 | ||||
| fmacs s20 , s0, s10 | fmacs s20 , s0, s10 | ||||
| @@ -238,19 +238,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ BO , #B_PRE ] | pld [ BO , #B_PRE ] | ||||
| fmacs s24 , s5, s13 | fmacs s24 , s5, s13 | ||||
| fmacs s17 , s4, s13 | fmacs s17 , s4, s13 | ||||
| fldmias AO!, { s0 - s1 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| fmacs s25 , s5, s12 | fmacs s25 , s5, s12 | ||||
| fmacs s18 , s6, s12 | fmacs s18 , s6, s12 | ||||
| fmacs s26 , s7, s13 | fmacs s26 , s7, s13 | ||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| fmacs s19 , s6, s13 | fmacs s19 , s6, s13 | ||||
| fmacs s27 , s7, s12 | fmacs s27 , s7, s12 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmacs s20 , s4, s14 | fmacs s20 , s4, s14 | ||||
| fmacs s28 , s5, s15 | fmacs s28 , s5, s15 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmacs s21 , s4, s15 | fmacs s21 , s4, s15 | ||||
| fmacs s29 , s5, s14 | fmacs s29 , s5, s14 | ||||
| @@ -288,16 +288,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL2x2_SUB | .macro KERNEL2x2_SUB | ||||
| fldmias AO!, { s0 - s1 } | |||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| fmacs s16 , s0, s8 | fmacs s16 , s0, s8 | ||||
| fmacs s24 , s1, s9 | fmacs s24 , s1, s9 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmacs s17 , s0, s9 | fmacs s17 , s0, s9 | ||||
| fmacs s25 , s1, s8 | fmacs s25 , s1, s8 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmacs s18 , s2, s8 | fmacs s18 , s2, s8 | ||||
| fmacs s26 , s3, s9 | fmacs s26 , s3, s9 | ||||
| fmacs s19 , s2, s9 | fmacs s19 , s2, s9 | ||||
| @@ -354,8 +354,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s10, s1 , s23 | FMAC_R2 s10, s1 , s23 | ||||
| FMAC_I2 s11, s1 , s22 | FMAC_I2 s11, s1 , s22 | ||||
| fstmias CO1, { s4 - s7 } | |||||
| fstmias CO2, { s8 - s11 } | |||||
| vstmia.f32 CO1, { s4 - s7 } | |||||
| vstmia.f32 CO2, { s8 - s11 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -532,8 +532,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s8 , s1 , s21 | FMAC_R2 s8 , s1 , s21 | ||||
| FMAC_I2 s9 , s1 , s20 | FMAC_I2 s9 , s1 , s20 | ||||
| fstmias CO1, { s4 - s5 } | |||||
| fstmias CO2, { s8 - s9 } | |||||
| vstmia.f32 CO1, { s4 - s5 } | |||||
| vstmia.f32 CO2, { s8 - s9 } | |||||
| add CO1, CO1, #8 | add CO1, CO1, #8 | ||||
| @@ -710,7 +710,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s6 , s1 , s19 | FMAC_R2 s6 , s1 , s19 | ||||
| FMAC_I2 s7 , s1 , s18 | FMAC_I2 s7 , s1 , s18 | ||||
| fstmias CO1, { s4 - s7 } | |||||
| vstmia.f32 CO1, { s4 - s7 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -835,7 +835,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 s4 , s1 , s17 | FMAC_R2 s4 , s1 , s17 | ||||
| FMAC_I2 s5 , s1 , s16 | FMAC_I2 s5 , s1 , s16 | ||||
| fstmias CO1, { s4 - s5 } | |||||
| vstmia.f32 CO1, { s4 - s5 } | |||||
| add CO1, CO1, #8 | add CO1, CO1, #8 | ||||
| @@ -65,15 +65,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_F4 | .macro COPY_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X!, { d0 - d3 } | |||||
| fstmiad Y!, { d0 - d3 } | |||||
| vldmia.f64 X!, { d0 - d3 } | |||||
| vstmia.f64 Y!, { d0 - d3 } | |||||
| .endm | .endm | ||||
| .macro COPY_F1 | .macro COPY_F1 | ||||
| fldmiad X!, { d0 } | |||||
| fstmiad Y!, { d0 } | |||||
| vldmia.f64 X!, { d0 } | |||||
| vstmia.f64 Y!, { d0 } | |||||
| .endm | .endm | ||||
| @@ -83,23 +83,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_S4 | .macro COPY_S4 | ||||
| nop | nop | ||||
| fldmiad X, { d0 } | |||||
| fstmiad Y, { d0 } | |||||
| vldmia.f64 X, { d0 } | |||||
| vstmia.f64 Y, { d0 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d1 } | |||||
| fstmiad Y, { d1 } | |||||
| vldmia.f64 X, { d1 } | |||||
| vstmia.f64 Y, { d1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d0 } | |||||
| fstmiad Y, { d0 } | |||||
| vldmia.f64 X, { d0 } | |||||
| vstmia.f64 Y, { d0 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d1 } | |||||
| fstmiad Y, { d1 } | |||||
| vldmia.f64 X, { d1 } | |||||
| vstmia.f64 Y, { d1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -108,8 +108,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_S1 | .macro COPY_S1 | ||||
| fldmiad X, { d0 } | |||||
| fstmiad Y, { d0 } | |||||
| vldmia.f64 X, { d0 } | |||||
| vstmia.f64 Y, { d0 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -67,26 +67,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X!, { d8 } | |||||
| vldmia.f64 X!, { d8 } | |||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad Y!, { d4 } | |||||
| fldmiad Y!, { d5 } | |||||
| vldmia.f64 Y!, { d4 } | |||||
| vldmia.f64 Y!, { d5 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fldmiad X!, { d9 } | |||||
| fldmiad Y!, { d6 } | |||||
| vldmia.f64 X!, { d9 } | |||||
| vldmia.f64 Y!, { d6 } | |||||
| fmacd d1 , d5, d9 | fmacd d1 , d5, d9 | ||||
| fldmiad X!, { d10 } | |||||
| fldmiad X!, { d11 } | |||||
| vldmia.f64 X!, { d10 } | |||||
| vldmia.f64 X!, { d11 } | |||||
| fmacd d0 , d6, d10 | fmacd d0 , d6, d10 | ||||
| fldmiad Y!, { d7 } | |||||
| vldmia.f64 Y!, { d7 } | |||||
| fmacd d1 , d7, d11 | fmacd d1 , d7, d11 | ||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 } | |||||
| fldmiad Y!, { d8 } | |||||
| vldmia.f64 X!, { d4 } | |||||
| vldmia.f64 Y!, { d8 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| .endm | .endm | ||||
| @@ -97,26 +97,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S4 | .macro KERNEL_S4 | ||||
| nop | nop | ||||
| fldmiad X, { d4 } | |||||
| fldmiad Y, { d8 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vldmia.f64 Y, { d8 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fldmiad X, { d5 } | |||||
| fldmiad Y, { d9 } | |||||
| vldmia.f64 X, { d5 } | |||||
| vldmia.f64 Y, { d9 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fmacd d1 , d5, d9 | fmacd d1 , d5, d9 | ||||
| fldmiad X, { d6 } | |||||
| fldmiad Y, { d10 } | |||||
| vldmia.f64 X, { d6 } | |||||
| vldmia.f64 Y, { d10 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fmacd d0 , d6, d10 | fmacd d0 , d6, d10 | ||||
| fldmiad X, { d7 } | |||||
| fldmiad Y, { d11 } | |||||
| vldmia.f64 X, { d7 } | |||||
| vldmia.f64 Y, { d11 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fmacd d1 , d7, d11 | fmacd d1 , d7, d11 | ||||
| @@ -126,8 +126,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 } | |||||
| fldmiad Y, { d8 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vldmia.f64 Y, { d8 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -331,7 +331,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add r4 , CO2, r3 | add r4 , CO2, r3 | ||||
| pld [ CO2 , #C_PRE ] | pld [ CO2 , #C_PRE ] | ||||
| fldmiad CO1, { d8 - d11 } | |||||
| vldmia.f64 CO1, { d8 - d11 } | |||||
| pld [ r4 , #C_PRE ] | pld [ r4 , #C_PRE ] | ||||
| fmacd d8 , d0 , d16 | fmacd d8 , d0 , d16 | ||||
| @@ -352,7 +352,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fmacd d15, d0 , d23 | fmacd d15, d0 , d23 | ||||
| fstd d11, [CO1, #24 ] | fstd d11, [CO1, #24 ] | ||||
| fldmiad r4, { d8 - d11 } | |||||
| vldmia.f64 r4, { d8 - d11 } | |||||
| fmacd d8 , d0 , d24 | fmacd d8 , d0 , d24 | ||||
| fstd d12, [CO2] | fstd d12, [CO2] | ||||
| @@ -367,7 +367,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ CO2 , #C_PRE ] | pld [ CO2 , #C_PRE ] | ||||
| fldmiad CO2, { d12 - d15 } | |||||
| vldmia.f64 CO2, { d12 - d15 } | |||||
| fstd d8 , [r4 ] | fstd d8 , [r4 ] | ||||
| fmacd d12, d0 , d28 | fmacd d12, d0 , d28 | ||||
| @@ -378,7 +378,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fstd d11, [r4 , #24 ] | fstd d11, [r4 , #24 ] | ||||
| fmacd d15, d0 , d31 | fmacd d15, d0 , d31 | ||||
| fstmiad CO2, { d12 - d15 } | |||||
| vstmia.f64 CO2, { d12 - d15 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -73,7 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d3 , [ AO2, #8 ] | fldd d3 , [ AO2, #8 ] | ||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| fstmiad BO!, { d0 - d3 } | |||||
| vstmia.f64 BO!, { d0 - d3 } | |||||
| add AO2, AO2, #16 | add AO2, AO2, #16 | ||||
| .endm | .endm | ||||
| @@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d1 , [ AO2, #0 ] | fldd d1 , [ AO2, #0 ] | ||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| fstmiad BO!, { d0 - d1 } | |||||
| vstmia.f64 BO!, { d0 - d1 } | |||||
| add AO2, AO2, #8 | add AO2, AO2, #8 | ||||
| .endm | .endm | ||||
| @@ -95,7 +95,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0 , [ AO1, #0 ] | fldd d0 , [ AO1, #0 ] | ||||
| fldd d1 , [ AO1, #8 ] | fldd d1 , [ AO1, #8 ] | ||||
| fstmiad BO!, { d0 - d1 } | |||||
| vstmia.f64 BO!, { d0 - d1 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| .endm | .endm | ||||
| @@ -105,7 +105,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0 , [ AO1, #0 ] | fldd d0 , [ AO1, #0 ] | ||||
| fstmiad BO!, { d0 } | |||||
| vstmia.f64 BO!, { d0 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| .endm | .endm | ||||
| @@ -105,10 +105,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d11, [ AO4, #16 ] | fldd d11, [ AO4, #16 ] | ||||
| fldd d15, [ AO4, #24 ] | fldd d15, [ AO4, #24 ] | ||||
| fstmiad BO!, { d0 - d3 } | |||||
| vstmia.f64 BO!, { d0 - d3 } | |||||
| add AO4, AO4, #32 | add AO4, AO4, #32 | ||||
| fstmiad BO!, { d4 - d7 } | |||||
| fstmiad BO!, { d8 - d15 } | |||||
| vstmia.f64 BO!, { d4 - d7 } | |||||
| vstmia.f64 BO!, { d8 - d15 } | |||||
| .endm | .endm | ||||
| @@ -122,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d3 , [ AO4, #0 ] | fldd d3 , [ AO4, #0 ] | ||||
| add AO3, AO3, #8 | add AO3, AO3, #8 | ||||
| fstmiad BO!, { d0 - d3 } | |||||
| vstmia.f64 BO!, { d0 - d3 } | |||||
| add AO4, AO4, #8 | add AO4, AO4, #8 | ||||
| .endm | .endm | ||||
| @@ -140,7 +140,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d5 , [ AO2, #16 ] | fldd d5 , [ AO2, #16 ] | ||||
| fldd d7 , [ AO2, #24 ] | fldd d7 , [ AO2, #24 ] | ||||
| fstmiad BO!, { d0 - d7 } | |||||
| vstmia.f64 BO!, { d0 - d7 } | |||||
| add AO2, AO2, #32 | add AO2, AO2, #32 | ||||
| .endm | .endm | ||||
| @@ -152,7 +152,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d1 , [ AO2, #0 ] | fldd d1 , [ AO2, #0 ] | ||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| fstmiad BO!, { d0 - d1 } | |||||
| vstmia.f64 BO!, { d0 - d1 } | |||||
| add AO2, AO2, #8 | add AO2, AO2, #8 | ||||
| .endm | .endm | ||||
| @@ -164,7 +164,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d2 , [ AO1, #16 ] | fldd d2 , [ AO1, #16 ] | ||||
| fldd d3 , [ AO1, #24 ] | fldd d3 , [ AO1, #24 ] | ||||
| fstmiad BO!, { d0 - d3 } | |||||
| vstmia.f64 BO!, { d0 - d3 } | |||||
| add AO1, AO1, #32 | add AO1, AO1, #32 | ||||
| .endm | .endm | ||||
| @@ -174,7 +174,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0 , [ AO1, #0 ] | fldd d0 , [ AO1, #0 ] | ||||
| fstmiad BO!, { d0 } | |||||
| vstmia.f64 BO!, { d0 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| .endm | .endm | ||||
| @@ -76,21 +76,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY4x4 | .macro COPY4x4 | ||||
| pld [ AO1, #A_PRE ] | pld [ AO1, #A_PRE ] | ||||
| fldmiad AO1, { d0 - d3 } | |||||
| vldmia.f64 AO1, { d0 - d3 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| pld [ r3, #A_PRE ] | pld [ r3, #A_PRE ] | ||||
| fldmiad r3, { d4 - d7 } | |||||
| vldmia.f64 r3, { d4 - d7 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| pld [ r3, #A_PRE ] | pld [ r3, #A_PRE ] | ||||
| fldmiad r3, { d8 - d11 } | |||||
| vldmia.f64 r3, { d8 - d11 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| pld [ r3, #A_PRE ] | pld [ r3, #A_PRE ] | ||||
| fldmiad r3, { d12 - d15 } | |||||
| vldmia.f64 r3, { d12 - d15 } | |||||
| fstmiad BO1, { d0 - d15 } | |||||
| vstmia.f64 BO1, { d0 - d15 } | |||||
| add AO1, AO1, #32 | add AO1, AO1, #32 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -98,18 +98,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY2x4 | .macro COPY2x4 | ||||
| fldmiad AO1, { d0 - d1 } | |||||
| vldmia.f64 AO1, { d0 - d1 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmiad r3, { d2 - d3 } | |||||
| vldmia.f64 r3, { d2 - d3 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmiad r3, { d4 - d5 } | |||||
| vldmia.f64 r3, { d4 - d5 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmiad r3, { d6 - d7 } | |||||
| vldmia.f64 r3, { d6 - d7 } | |||||
| fstmiad BO2, { d0 - d7 } | |||||
| vstmia.f64 BO2, { d0 - d7 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO2, BO2, #64 | add BO2, BO2, #64 | ||||
| @@ -117,18 +117,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x4 | .macro COPY1x4 | ||||
| fldmiad AO1, { d0 } | |||||
| vldmia.f64 AO1, { d0 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmiad r3, { d1 } | |||||
| vldmia.f64 r3, { d1 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmiad r3, { d2 } | |||||
| vldmia.f64 r3, { d2 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmiad r3, { d3 } | |||||
| vldmia.f64 r3, { d3 } | |||||
| fstmiad BO3, { d0 - d3 } | |||||
| vstmia.f64 BO3, { d0 - d3 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| add BO3, BO3, #32 | add BO3, BO3, #32 | ||||
| @@ -139,13 +139,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY4x2 | .macro COPY4x2 | ||||
| pld [ AO1, #A_PRE ] | pld [ AO1, #A_PRE ] | ||||
| fldmiad AO1, { d0 - d3 } | |||||
| vldmia.f64 AO1, { d0 - d3 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| pld [ r3, #A_PRE ] | pld [ r3, #A_PRE ] | ||||
| fldmiad r3, { d4 - d7 } | |||||
| vldmia.f64 r3, { d4 - d7 } | |||||
| fstmiad BO1, { d0 - d7 } | |||||
| vstmia.f64 BO1, { d0 - d7 } | |||||
| add AO1, AO1, #32 | add AO1, AO1, #32 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -153,12 +153,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY2x2 | .macro COPY2x2 | ||||
| fldmiad AO1, { d0 - d1 } | |||||
| vldmia.f64 AO1, { d0 - d1 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmiad r3, { d2 - d3 } | |||||
| vldmia.f64 r3, { d2 - d3 } | |||||
| fstmiad BO2, { d0 - d3 } | |||||
| vstmia.f64 BO2, { d0 - d3 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO2, BO2, #32 | add BO2, BO2, #32 | ||||
| @@ -166,12 +166,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x2 | .macro COPY1x2 | ||||
| fldmiad AO1, { d0 } | |||||
| vldmia.f64 AO1, { d0 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmiad r3, { d1 } | |||||
| vldmia.f64 r3, { d1 } | |||||
| fstmiad BO3, { d0 - d1 } | |||||
| vstmia.f64 BO3, { d0 - d1 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| add BO3, BO3, #16 | add BO3, BO3, #16 | ||||
| @@ -182,9 +182,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY4x1 | .macro COPY4x1 | ||||
| pld [ AO1, #A_PRE ] | pld [ AO1, #A_PRE ] | ||||
| fldmiad AO1, { d0 - d3 } | |||||
| vldmia.f64 AO1, { d0 - d3 } | |||||
| fstmiad BO1, { d0 - d3 } | |||||
| vstmia.f64 BO1, { d0 - d3 } | |||||
| add AO1, AO1, #32 | add AO1, AO1, #32 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -192,9 +192,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY2x1 | .macro COPY2x1 | ||||
| fldmiad AO1, { d0 - d1 } | |||||
| vldmia.f64 AO1, { d0 - d1 } | |||||
| fstmiad BO2, { d0 - d1 } | |||||
| vstmia.f64 BO2, { d0 - d1 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO2, BO2, #16 | add BO2, BO2, #16 | ||||
| @@ -202,9 +202,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x1 | .macro COPY1x1 | ||||
| fldmiad AO1, { d0 } | |||||
| vldmia.f64 AO1, { d0 } | |||||
| fstmiad BO3, { d0 } | |||||
| vstmia.f64 BO3, { d0 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| add BO3, BO3, #8 | add BO3, BO3, #8 | ||||
| @@ -128,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d8 , [ BO ] | fldd d8 , [ BO ] | ||||
| pld [ AO , #A_PRE ] | pld [ AO , #A_PRE ] | ||||
| fldmiad AO!, { d0 - d1} | |||||
| vldmia.f64 AO!, { d0 - d1} | |||||
| fmuld d16 , d0, d8 | fmuld d16 , d0, d8 | ||||
| fldmiad AO!, { d2 - d3} | |||||
| vldmia.f64 AO!, { d2 - d3} | |||||
| fmuld d17 , d1, d8 | fmuld d17 , d1, d8 | ||||
| fldd d9 , [ BO, #8 ] | fldd d9 , [ BO, #8 ] | ||||
| fmuld d18 , d2, d8 | fmuld d18 , d2, d8 | ||||
| @@ -148,10 +148,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fmuld d23 , d3, d9 | fmuld d23 , d3, d9 | ||||
| fmuld d24 , d0, d10 | fmuld d24 , d0, d10 | ||||
| fldmiad AO!, { d4 - d5 } | |||||
| vldmia.f64 AO!, { d4 - d5 } | |||||
| fmuld d25 , d1, d10 | fmuld d25 , d1, d10 | ||||
| fmuld d26 , d2, d10 | fmuld d26 , d2, d10 | ||||
| fldmiad AO!, { d6 - d7 } | |||||
| vldmia.f64 AO!, { d6 - d7 } | |||||
| fmuld d27 , d3, d10 | fmuld d27 , d3, d10 | ||||
| fldd d13, [ BO, #8 ] | fldd d13, [ BO, #8 ] | ||||
| @@ -173,10 +173,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d8 , [ BO ] | fldd d8 , [ BO ] | ||||
| pld [ AO , #A_PRE ] | pld [ AO , #A_PRE ] | ||||
| fldmiad AO!, { d0 - d1} | |||||
| vldmia.f64 AO!, { d0 - d1} | |||||
| fmacd d16 , d0, d8 | fmacd d16 , d0, d8 | ||||
| fldmiad AO!, { d2 - d3} | |||||
| vldmia.f64 AO!, { d2 - d3} | |||||
| fmacd d17 , d1, d8 | fmacd d17 , d1, d8 | ||||
| fldd d9 , [ BO, #8 ] | fldd d9 , [ BO, #8 ] | ||||
| fmacd d18 , d2, d8 | fmacd d18 , d2, d8 | ||||
| @@ -193,10 +193,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fmacd d23 , d3, d9 | fmacd d23 , d3, d9 | ||||
| fmacd d24 , d0, d10 | fmacd d24 , d0, d10 | ||||
| fldmiad AO!, { d4 - d5 } | |||||
| vldmia.f64 AO!, { d4 - d5 } | |||||
| fmacd d25 , d1, d10 | fmacd d25 , d1, d10 | ||||
| fmacd d26 , d2, d10 | fmacd d26 , d2, d10 | ||||
| fldmiad AO!, { d6 - d7 } | |||||
| vldmia.f64 AO!, { d6 - d7 } | |||||
| fmacd d27 , d3, d10 | fmacd d27 , d3, d10 | ||||
| fldd d13, [ BO, #8 ] | fldd d13, [ BO, #8 ] | ||||
| @@ -225,11 +225,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d8 , [ BO ] | fldd d8 , [ BO ] | ||||
| fmacd d21 , d5, d13 | fmacd d21 , d5, d13 | ||||
| fmacd d22 , d6, d13 | fmacd d22 , d6, d13 | ||||
| fldmiad AO!, { d0 - d1 } | |||||
| vldmia.f64 AO!, { d0 - d1 } | |||||
| fmacd d23 , d7, d13 | fmacd d23 , d7, d13 | ||||
| fmacd d24 , d4, d14 | fmacd d24 , d4, d14 | ||||
| fldmiad AO!, { d2 - d3 } | |||||
| vldmia.f64 AO!, { d2 - d3 } | |||||
| fmacd d25 , d5, d14 | fmacd d25 , d5, d14 | ||||
| fldd d9 , [ BO, #8 ] | fldd d9 , [ BO, #8 ] | ||||
| fmacd d26 , d6, d14 | fmacd d26 , d6, d14 | ||||
| @@ -257,10 +257,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fmacd d19 , d3, d8 | fmacd d19 , d3, d8 | ||||
| fmacd d20 , d0, d9 | fmacd d20 , d0, d9 | ||||
| fldmiad AO!, { d4 - d5 } | |||||
| vldmia.f64 AO!, { d4 - d5 } | |||||
| fmacd d21 , d1, d9 | fmacd d21 , d1, d9 | ||||
| fmacd d22 , d2, d9 | fmacd d22 , d2, d9 | ||||
| fldmiad AO!, { d6 - d7 } | |||||
| vldmia.f64 AO!, { d6 - d7 } | |||||
| fmacd d23 , d3, d9 | fmacd d23 , d3, d9 | ||||
| fmacd d24 , d0, d10 | fmacd d24 , d0, d10 | ||||
| @@ -390,7 +390,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fstd d11, [r4 , #24 ] | fstd d11, [r4 , #24 ] | ||||
| fmuld d15, d0 , d31 | fmuld d15, d0 , d31 | ||||
| fstmiad CO2, { d12 - d15 } | |||||
| vstmia.f64 CO2, { d12 - d15 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -139,8 +139,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F8X1 | .macro KERNEL_F8X1 | ||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| fldmiad XO! , { d2 } | |||||
| fldmiad AO1 , { d4 - d7 } | |||||
| vldmia.f64 XO! , { d2 } | |||||
| vldmia.f64 AO1 , { d4 - d7 } | |||||
| vmla.f64 d8 , d2 , d4 | vmla.f64 d8 , d2 , d4 | ||||
| pld [ AO2 , #4*SIZE ] | pld [ AO2 , #4*SIZE ] | ||||
| @@ -150,7 +150,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vmla.f64 d11 , d2 , d7 | vmla.f64 d11 , d2 , d7 | ||||
| fldmiad r3 , { d4 - d7 } | |||||
| vldmia.f64 r3 , { d4 - d7 } | |||||
| vmla.f64 d12 , d2 , d4 | vmla.f64 d12 , d2 , d4 | ||||
| vmla.f64 d13 , d2 , d5 | vmla.f64 d13 , d2 , d5 | ||||
| @@ -164,23 +164,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F8 | .macro SAVE_F8 | ||||
| fldmiad YO, { d4 - d7 } | |||||
| vldmia.f64 YO, { d4 - d7 } | |||||
| vmla.f64 d4 , d0, d8 | vmla.f64 d4 , d0, d8 | ||||
| vmla.f64 d5 , d0, d9 | vmla.f64 d5 , d0, d9 | ||||
| vmla.f64 d6 , d0, d10 | vmla.f64 d6 , d0, d10 | ||||
| vmla.f64 d7 , d0, d11 | vmla.f64 d7 , d0, d11 | ||||
| fstmiad YO!, { d4 - d7 } | |||||
| vstmia.f64 YO!, { d4 - d7 } | |||||
| fldmiad YO, { d4 - d7 } | |||||
| vldmia.f64 YO, { d4 - d7 } | |||||
| vmla.f64 d4 , d0, d12 | vmla.f64 d4 , d0, d12 | ||||
| vmla.f64 d5 , d0, d13 | vmla.f64 d5 , d0, d13 | ||||
| vmla.f64 d6 , d0, d14 | vmla.f64 d6 , d0, d14 | ||||
| vmla.f64 d7 , d0, d15 | vmla.f64 d7 , d0, d15 | ||||
| fstmiad YO!, { d4 - d7 } | |||||
| vstmia.f64 YO!, { d4 - d7 } | |||||
| .endm | .endm | ||||
| @@ -195,8 +195,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmiad XO! , { d2 } | |||||
| fldmiad AO1 , { d8 } | |||||
| vldmia.f64 XO! , { d2 } | |||||
| vldmia.f64 AO1 , { d8 } | |||||
| vmla.f64 d12 , d2 , d8 | vmla.f64 d12 , d2 , d8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| @@ -204,9 +204,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmiad YO, { d4 } | |||||
| vldmia.f64 YO, { d4 } | |||||
| vmla.f64 d4, d0, d12 | vmla.f64 d4, d0, d12 | ||||
| fstmiad YO!, { d4 } | |||||
| vstmia.f64 YO!, { d4 } | |||||
| .endm | .endm | ||||
| @@ -234,8 +234,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S4X1 | .macro KERNEL_S4X1 | ||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| fldmiad XO , { d2 } | |||||
| fldmiad AO1 , { d8 - d11 } | |||||
| vldmia.f64 XO , { d2 } | |||||
| vldmia.f64 AO1 , { d8 - d11 } | |||||
| vmla.f64 d12 , d2 , d8 | vmla.f64 d12 , d2 , d8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| @@ -249,24 +249,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S4 | .macro SAVE_S4 | ||||
| fldmiad YO, { d4 } | |||||
| vldmia.f64 YO, { d4 } | |||||
| vmla.f64 d4 , d0, d12 | vmla.f64 d4 , d0, d12 | ||||
| fstmiad YO, { d4 } | |||||
| vstmia.f64 YO, { d4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d5 } | |||||
| vldmia.f64 YO, { d5 } | |||||
| vmla.f64 d5 , d0, d13 | vmla.f64 d5 , d0, d13 | ||||
| fstmiad YO, { d5 } | |||||
| vstmia.f64 YO, { d5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d4 } | |||||
| vldmia.f64 YO, { d4 } | |||||
| vmla.f64 d4 , d0, d14 | vmla.f64 d4 , d0, d14 | ||||
| fstmiad YO, { d4 } | |||||
| vstmia.f64 YO, { d4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d5 } | |||||
| vldmia.f64 YO, { d5 } | |||||
| vmla.f64 d5 , d0, d15 | vmla.f64 d5 , d0, d15 | ||||
| fstmiad YO, { d5 } | |||||
| vstmia.f64 YO, { d5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmiad XO , { d2 } | |||||
| fldmiad AO1 , { d8 } | |||||
| vldmia.f64 XO , { d2 } | |||||
| vldmia.f64 AO1 , { d8 } | |||||
| vmla.f64 d12 , d2 , d8 | vmla.f64 d12 , d2 , d8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| add XO, XO , INC_X | add XO, XO , INC_X | ||||
| @@ -292,9 +292,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmiad YO, { d4 } | |||||
| vldmia.f64 YO, { d4 } | |||||
| vmla.f64 d4, d0, d12 | vmla.f64 d4, d0, d12 | ||||
| fstmiad YO , { d4 } | |||||
| vstmia.f64 YO , { d4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -338,8 +338,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F8X1 | .macro KERNEL_F8X1 | ||||
| pld [ AO2, #A_PRE ] | pld [ AO2, #A_PRE ] | ||||
| fldmias XO! , { s2 } | |||||
| fldmias AO1 , { s4 - s7 } | |||||
| vldmia.f32 XO! , { s2 } | |||||
| vldmia.f32 AO1 , { s4 - s7 } | |||||
| vmla.f32 s8 , s2 , s4 | vmla.f32 s8 , s2 , s4 | ||||
| vmla.f32 s9 , s2 , s5 | vmla.f32 s9 , s2 , s5 | ||||
| @@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add r3, AO1, #4*SIZE | add r3, AO1, #4*SIZE | ||||
| fldmias r3 , { s4 - s7 } | |||||
| vldmia.f32 r3 , { s4 - s7 } | |||||
| vmla.f32 s12 , s2 , s4 | vmla.f32 s12 , s2 , s4 | ||||
| vmla.f32 s13 , s2 , s5 | vmla.f32 s13 , s2 , s5 | ||||
| @@ -362,24 +362,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F8 | .macro SAVE_F8 | ||||
| fldmias YO, { s4 - s7 } | |||||
| vldmia.f32 YO, { s4 - s7 } | |||||
| vmla.f32 s4 , s0, s8 | vmla.f32 s4 , s0, s8 | ||||
| vmla.f32 s5 , s0, s9 | vmla.f32 s5 , s0, s9 | ||||
| vmla.f32 s6 , s0, s10 | vmla.f32 s6 , s0, s10 | ||||
| vmla.f32 s7 , s0, s11 | vmla.f32 s7 , s0, s11 | ||||
| fstmias YO!, { s4 - s7 } | |||||
| vstmia.f32 YO!, { s4 - s7 } | |||||
| fldmias YO, { s4 - s7 } | |||||
| vldmia.f32 YO, { s4 - s7 } | |||||
| vmla.f32 s4 , s0, s12 | vmla.f32 s4 , s0, s12 | ||||
| vmla.f32 s5 , s0, s13 | vmla.f32 s5 , s0, s13 | ||||
| vmla.f32 s6 , s0, s14 | vmla.f32 s6 , s0, s14 | ||||
| vmla.f32 s7 , s0, s15 | vmla.f32 s7 , s0, s15 | ||||
| fstmias YO!, { s4 - s7 } | |||||
| vstmia.f32 YO!, { s4 - s7 } | |||||
| .endm | .endm | ||||
| @@ -394,8 +394,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmias XO! , { s2 } | |||||
| fldmias AO1 , { s8 } | |||||
| vldmia.f32 XO! , { s2 } | |||||
| vldmia.f32 AO1 , { s8 } | |||||
| vmla.f32 s12 , s2 , s8 | vmla.f32 s12 , s2 , s8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| @@ -403,9 +403,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmias YO, { s4 } | |||||
| vldmia.f32 YO, { s4 } | |||||
| vmla.f32 s4, s0, s12 | vmla.f32 s4, s0, s12 | ||||
| fstmias YO!, { s4 } | |||||
| vstmia.f32 YO!, { s4 } | |||||
| .endm | .endm | ||||
| @@ -434,8 +434,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S4X1 | .macro KERNEL_S4X1 | ||||
| fldmias XO , { s2 } | |||||
| fldmias AO1 , { s8 - s11 } | |||||
| vldmia.f32 XO , { s2 } | |||||
| vldmia.f32 AO1 , { s8 - s11 } | |||||
| vmla.f32 s12 , s2 , s8 | vmla.f32 s12 , s2 , s8 | ||||
| vmla.f32 s13 , s2 , s9 | vmla.f32 s13 , s2 , s9 | ||||
| @@ -449,24 +449,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S4 | .macro SAVE_S4 | ||||
| fldmias YO, { s4 } | |||||
| vldmia.f32 YO, { s4 } | |||||
| vmla.f32 s4 , s0, s12 | vmla.f32 s4 , s0, s12 | ||||
| fstmias YO, { s4 } | |||||
| vstmia.f32 YO, { s4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s5 } | |||||
| vldmia.f32 YO, { s5 } | |||||
| vmla.f32 s5 , s0, s13 | vmla.f32 s5 , s0, s13 | ||||
| fstmias YO, { s5 } | |||||
| vstmia.f32 YO, { s5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s4 } | |||||
| vldmia.f32 YO, { s4 } | |||||
| vmla.f32 s4 , s0, s14 | vmla.f32 s4 , s0, s14 | ||||
| fstmias YO, { s4 } | |||||
| vstmia.f32 YO, { s4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s5 } | |||||
| vldmia.f32 YO, { s5 } | |||||
| vmla.f32 s5 , s0, s15 | vmla.f32 s5 , s0, s15 | ||||
| fstmias YO, { s5 } | |||||
| vstmia.f32 YO, { s5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -482,8 +482,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmias XO , { s2 } | |||||
| fldmias AO1 , { s8 } | |||||
| vldmia.f32 XO , { s2 } | |||||
| vldmia.f32 AO1 , { s8 } | |||||
| vmla.f32 s12 , s2 , s8 | vmla.f32 s12 , s2 , s8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| add XO, XO , INC_X | add XO, XO , INC_X | ||||
| @@ -492,9 +492,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmias YO, { s4 } | |||||
| vldmia.f32 YO, { s4 } | |||||
| vmla.f32 s4, s0, s12 | vmla.f32 s4, s0, s12 | ||||
| fstmias YO , { s4 } | |||||
| vstmia.f32 YO , { s4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -138,8 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F8X1 | .macro KERNEL_F8X1 | ||||
| fldmiad XO! , { d4 } | |||||
| fldmiad AO1 , { d8 - d15 } | |||||
| vldmia.f64 XO! , { d4 } | |||||
| vldmia.f64 AO1 , { d8 - d15 } | |||||
| vmla.f64 d24 , d4 , d8 | vmla.f64 d24 , d4 , d8 | ||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| @@ -158,7 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F8 | .macro SAVE_F8 | ||||
| fldmiad YO, { d16 - d23 } | |||||
| vldmia.f64 YO, { d16 - d23 } | |||||
| vmla.f64 d16, d0, d24 | vmla.f64 d16, d0, d24 | ||||
| vmla.f64 d17, d0, d25 | vmla.f64 d17, d0, d25 | ||||
| @@ -169,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vmla.f64 d22, d0, d30 | vmla.f64 d22, d0, d30 | ||||
| vmla.f64 d23, d0, d31 | vmla.f64 d23, d0, d31 | ||||
| fstmiad YO!, { d16 - d23 } | |||||
| vstmia.f64 YO!, { d16 - d23 } | |||||
| .endm | .endm | ||||
| @@ -184,8 +184,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmiad XO! , { d4 } | |||||
| fldmiad AO1 , { d8 } | |||||
| vldmia.f64 XO! , { d4 } | |||||
| vldmia.f64 AO1 , { d8 } | |||||
| vmla.f64 d24 , d4 , d8 | vmla.f64 d24 , d4 , d8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| @@ -193,9 +193,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmiad YO, { d16 } | |||||
| vldmia.f64 YO, { d16 } | |||||
| vmla.f64 d16, d0, d24 | vmla.f64 d16, d0, d24 | ||||
| fstmiad YO!, { d16 } | |||||
| vstmia.f64 YO!, { d16 } | |||||
| .endm | .endm | ||||
| @@ -234,8 +234,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| pld [ AO2 , #A_PRE+32 ] | pld [ AO2 , #A_PRE+32 ] | ||||
| fldmiad XO , { d4 } | |||||
| fldmiad AO1 , { d8 - d15 } | |||||
| vldmia.f64 XO , { d4 } | |||||
| vldmia.f64 AO1 , { d8 - d15 } | |||||
| vmla.f64 d24 , d4 , d8 | vmla.f64 d24 , d4 , d8 | ||||
| vmla.f64 d25 , d4 , d9 | vmla.f64 d25 , d4 , d9 | ||||
| @@ -253,44 +253,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S8 | .macro SAVE_S8 | ||||
| fldmiad YO, { d16 } | |||||
| vldmia.f64 YO, { d16 } | |||||
| vmla.f64 d16, d0, d24 | vmla.f64 d16, d0, d24 | ||||
| fstmiad YO, { d16 } | |||||
| vstmia.f64 YO, { d16 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d17 } | |||||
| vldmia.f64 YO, { d17 } | |||||
| vmla.f64 d17, d0, d25 | vmla.f64 d17, d0, d25 | ||||
| fstmiad YO, { d17 } | |||||
| vstmia.f64 YO, { d17 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d18 } | |||||
| vldmia.f64 YO, { d18 } | |||||
| vmla.f64 d18, d0, d26 | vmla.f64 d18, d0, d26 | ||||
| fstmiad YO, { d18 } | |||||
| vstmia.f64 YO, { d18 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d19 } | |||||
| vldmia.f64 YO, { d19 } | |||||
| vmla.f64 d19, d0, d27 | vmla.f64 d19, d0, d27 | ||||
| fstmiad YO, { d19 } | |||||
| vstmia.f64 YO, { d19 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d20 } | |||||
| vldmia.f64 YO, { d20 } | |||||
| vmla.f64 d20, d0, d28 | vmla.f64 d20, d0, d28 | ||||
| fstmiad YO, { d20 } | |||||
| vstmia.f64 YO, { d20 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d21 } | |||||
| vldmia.f64 YO, { d21 } | |||||
| vmla.f64 d21, d0, d29 | vmla.f64 d21, d0, d29 | ||||
| fstmiad YO, { d21 } | |||||
| vstmia.f64 YO, { d21 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d22 } | |||||
| vldmia.f64 YO, { d22 } | |||||
| vmla.f64 d22, d0, d30 | vmla.f64 d22, d0, d30 | ||||
| fstmiad YO, { d22 } | |||||
| vstmia.f64 YO, { d22 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d23 } | |||||
| vldmia.f64 YO, { d23 } | |||||
| vmla.f64 d23, d0, d31 | vmla.f64 d23, d0, d31 | ||||
| fstmiad YO, { d23 } | |||||
| vstmia.f64 YO, { d23 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -306,8 +306,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmiad XO , { d4 } | |||||
| fldmiad AO1 , { d8 } | |||||
| vldmia.f64 XO , { d4 } | |||||
| vldmia.f64 AO1 , { d8 } | |||||
| vmla.f64 d24 , d4 , d8 | vmla.f64 d24 , d4 , d8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| @@ -316,9 +316,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmiad YO, { d16 } | |||||
| vldmia.f64 YO, { d16 } | |||||
| vmla.f64 d16, d0, d24 | vmla.f64 d16, d0, d24 | ||||
| fstmiad YO, { d16 } | |||||
| vstmia.f64 YO, { d16 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -361,8 +361,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F8X1 | .macro KERNEL_F8X1 | ||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| fldmias XO! , { s4 } | |||||
| fldmias AO1 , { s8 - s15 } | |||||
| vldmia.f32 XO! , { s4 } | |||||
| vldmia.f32 AO1 , { s8 - s15 } | |||||
| vmla.f32 s24 , s4 , s8 | vmla.f32 s24 , s4 , s8 | ||||
| vmla.f32 s25 , s4 , s9 | vmla.f32 s25 , s4 , s9 | ||||
| @@ -379,7 +379,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F8 | .macro SAVE_F8 | ||||
| fldmias YO, { s16 - s23 } | |||||
| vldmia.f32 YO, { s16 - s23 } | |||||
| vmla.f32 s16, s0, s24 | vmla.f32 s16, s0, s24 | ||||
| vmla.f32 s17, s0, s25 | vmla.f32 s17, s0, s25 | ||||
| @@ -390,7 +390,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| vmla.f32 s22, s0, s30 | vmla.f32 s22, s0, s30 | ||||
| vmla.f32 s23, s0, s31 | vmla.f32 s23, s0, s31 | ||||
| fstmias YO!, { s16 - s23 } | |||||
| vstmia.f32 YO!, { s16 - s23 } | |||||
| .endm | .endm | ||||
| @@ -405,8 +405,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmias XO! , { s4 } | |||||
| fldmias AO1 , { s8 } | |||||
| vldmia.f32 XO! , { s4 } | |||||
| vldmia.f32 AO1 , { s8 } | |||||
| vmla.f32 s24 , s4 , s8 | vmla.f32 s24 , s4 , s8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| @@ -414,9 +414,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmias YO, { s16 } | |||||
| vldmia.f32 YO, { s16 } | |||||
| vmla.f32 s16, s0, s24 | vmla.f32 s16, s0, s24 | ||||
| fstmias YO!, { s16 } | |||||
| vstmia.f32 YO!, { s16 } | |||||
| .endm | .endm | ||||
| @@ -454,8 +454,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S8X1 | .macro KERNEL_S8X1 | ||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| fldmias XO , { s4 } | |||||
| fldmias AO1 , { s8 - s15 } | |||||
| vldmia.f32 XO , { s4 } | |||||
| vldmia.f32 AO1 , { s8 - s15 } | |||||
| vmla.f32 s24 , s4 , s8 | vmla.f32 s24 , s4 , s8 | ||||
| vmla.f32 s25 , s4 , s9 | vmla.f32 s25 , s4 , s9 | ||||
| @@ -473,44 +473,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S8 | .macro SAVE_S8 | ||||
| fldmias YO, { s16 } | |||||
| vldmia.f32 YO, { s16 } | |||||
| vmla.f32 s16, s0, s24 | vmla.f32 s16, s0, s24 | ||||
| fstmias YO, { s16 } | |||||
| vstmia.f32 YO, { s16 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s17 } | |||||
| vldmia.f32 YO, { s17 } | |||||
| vmla.f32 s17, s0, s25 | vmla.f32 s17, s0, s25 | ||||
| fstmias YO, { s17 } | |||||
| vstmia.f32 YO, { s17 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s18 } | |||||
| vldmia.f32 YO, { s18 } | |||||
| vmla.f32 s18, s0, s26 | vmla.f32 s18, s0, s26 | ||||
| fstmias YO, { s18 } | |||||
| vstmia.f32 YO, { s18 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s19 } | |||||
| vldmia.f32 YO, { s19 } | |||||
| vmla.f32 s19, s0, s27 | vmla.f32 s19, s0, s27 | ||||
| fstmias YO, { s19 } | |||||
| vstmia.f32 YO, { s19 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s20 } | |||||
| vldmia.f32 YO, { s20 } | |||||
| vmla.f32 s20, s0, s28 | vmla.f32 s20, s0, s28 | ||||
| fstmias YO, { s20 } | |||||
| vstmia.f32 YO, { s20 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s21 } | |||||
| vldmia.f32 YO, { s21 } | |||||
| vmla.f32 s21, s0, s29 | vmla.f32 s21, s0, s29 | ||||
| fstmias YO, { s21 } | |||||
| vstmia.f32 YO, { s21 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s22 } | |||||
| vldmia.f32 YO, { s22 } | |||||
| vmla.f32 s22, s0, s30 | vmla.f32 s22, s0, s30 | ||||
| fstmias YO, { s22 } | |||||
| vstmia.f32 YO, { s22 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s23 } | |||||
| vldmia.f32 YO, { s23 } | |||||
| vmla.f32 s23, s0, s31 | vmla.f32 s23, s0, s31 | ||||
| fstmias YO, { s23 } | |||||
| vstmia.f32 YO, { s23 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -526,8 +526,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmias XO , { s4 } | |||||
| fldmias AO1 , { s8 } | |||||
| vldmia.f32 XO , { s4 } | |||||
| vldmia.f32 AO1 , { s8 } | |||||
| vmla.f32 s24 , s4 , s8 | vmla.f32 s24 , s4 , s8 | ||||
| add AO1, AO1, LDA | add AO1, AO1, LDA | ||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| @@ -536,9 +536,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmias YO, { s16 } | |||||
| vldmia.f32 YO, { s16 } | |||||
| vmla.f32 s16, s0, s24 | vmla.f32 s16, s0, s24 | ||||
| fstmias YO, { s16 } | |||||
| vstmia.f32 YO, { s16 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -112,13 +112,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X4 | .macro KERNEL_F2X4 | ||||
| pld [ XO , #X_PRE ] | pld [ XO , #X_PRE ] | ||||
| fldmiad XO! , { d12 - d15 } | |||||
| vldmia.f64 XO! , { d12 - d15 } | |||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmiad AO1!, { d8 - d9 } | |||||
| vldmia.f64 AO1!, { d8 - d9 } | |||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| fldmiad AO2!, { d4 - d5 } | |||||
| fldmiad AO1!, { d10 - d11 } | |||||
| fldmiad AO2!, { d6 - d7 } | |||||
| vldmia.f64 AO2!, { d4 - d5 } | |||||
| vldmia.f64 AO1!, { d10 - d11 } | |||||
| vldmia.f64 AO2!, { d6 - d7 } | |||||
| vmla.f64 d2 , d12 , d8 | vmla.f64 d2 , d12 , d8 | ||||
| vmla.f64 d3 , d12 , d4 | vmla.f64 d3 , d12 , d4 | ||||
| @@ -133,9 +133,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X1 | .macro KERNEL_F2X1 | ||||
| fldmiad XO! , { d1 } | |||||
| fldmiad AO1!, { d8 } | |||||
| fldmiad AO2!, { d4 } | |||||
| vldmia.f64 XO! , { d1 } | |||||
| vldmia.f64 AO1!, { d8 } | |||||
| vldmia.f64 AO2!, { d4 } | |||||
| vmla.f64 d2 , d1 , d8 | vmla.f64 d2 , d1 , d8 | ||||
| vmla.f64 d3 , d1 , d4 | vmla.f64 d3 , d1 , d4 | ||||
| @@ -143,10 +143,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F2 | .macro SAVE_F2 | ||||
| fldmiad YO, { d4 - d5 } | |||||
| vldmia.f64 YO, { d4 - d5 } | |||||
| vmla.f64 d4, d0, d2 | vmla.f64 d4, d0, d2 | ||||
| vmla.f64 d5, d0, d3 | vmla.f64 d5, d0, d3 | ||||
| fstmiad YO!, { d4 - d5 } | |||||
| vstmia.f64 YO!, { d4 - d5 } | |||||
| .endm | .endm | ||||
| @@ -160,10 +160,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X4 | .macro KERNEL_F1X4 | ||||
| pld [ XO , #X_PRE ] | pld [ XO , #X_PRE ] | ||||
| fldmiad XO! , { d12 - d15 } | |||||
| vldmia.f64 XO! , { d12 - d15 } | |||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmiad AO1!, { d8 - d9 } | |||||
| fldmiad AO1!, { d10 - d11 } | |||||
| vldmia.f64 AO1!, { d8 - d9 } | |||||
| vldmia.f64 AO1!, { d10 - d11 } | |||||
| vmla.f64 d2 , d12 , d8 | vmla.f64 d2 , d12 , d8 | ||||
| vmla.f64 d2 , d13 , d9 | vmla.f64 d2 , d13 , d9 | ||||
| vmla.f64 d2 , d14, d10 | vmla.f64 d2 , d14, d10 | ||||
| @@ -173,17 +173,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmiad XO! , { d1 } | |||||
| fldmiad AO1!, { d8 } | |||||
| vldmia.f64 XO! , { d1 } | |||||
| vldmia.f64 AO1!, { d8 } | |||||
| vmla.f64 d2 , d1 , d8 | vmla.f64 d2 , d1 , d8 | ||||
| .endm | .endm | ||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmiad YO, { d4 } | |||||
| vldmia.f64 YO, { d4 } | |||||
| vmla.f64 d4, d0, d2 | vmla.f64 d4, d0, d2 | ||||
| fstmiad YO!, { d4 } | |||||
| vstmia.f64 YO!, { d4 } | |||||
| .endm | .endm | ||||
| @@ -197,23 +197,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X4 | .macro KERNEL_S2X4 | ||||
| fldmiad XO , { d12 } | |||||
| vldmia.f64 XO , { d12 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmiad AO1!, { d8 - d9 } | |||||
| vldmia.f64 AO1!, { d8 - d9 } | |||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| fldmiad AO2!, { d4 - d5 } | |||||
| vldmia.f64 AO2!, { d4 - d5 } | |||||
| fldmiad XO , { d13 } | |||||
| vldmia.f64 XO , { d13 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmiad AO1!, { d10 - d11 } | |||||
| fldmiad AO2!, { d6 - d7 } | |||||
| vldmia.f64 AO1!, { d10 - d11 } | |||||
| vldmia.f64 AO2!, { d6 - d7 } | |||||
| fldmiad XO , { d14 } | |||||
| vldmia.f64 XO , { d14 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmiad XO , { d15 } | |||||
| vldmia.f64 XO , { d15 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d2 , d12 , d8 | vmla.f64 d2 , d12 , d8 | ||||
| @@ -229,9 +229,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X1 | .macro KERNEL_S2X1 | ||||
| fldmiad XO , { d1 } | |||||
| fldmiad AO1!, { d8 } | |||||
| fldmiad AO2!, { d4 } | |||||
| vldmia.f64 XO , { d1 } | |||||
| vldmia.f64 AO1!, { d8 } | |||||
| vldmia.f64 AO2!, { d4 } | |||||
| vmla.f64 d2 , d1 , d8 | vmla.f64 d2 , d1 , d8 | ||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d3 , d1 , d4 | vmla.f64 d3 , d1 , d4 | ||||
| @@ -240,14 +240,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S2 | .macro SAVE_S2 | ||||
| fldmiad YO, { d4 } | |||||
| vldmia.f64 YO, { d4 } | |||||
| vmla.f64 d4, d0, d2 | vmla.f64 d4, d0, d2 | ||||
| fstmiad YO, { d4 } | |||||
| vstmia.f64 YO, { d4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d5 } | |||||
| vldmia.f64 YO, { d5 } | |||||
| vmla.f64 d5, d0, d3 | vmla.f64 d5, d0, d3 | ||||
| fstmiad YO, { d5 } | |||||
| vstmia.f64 YO, { d5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -261,20 +261,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X4 | .macro KERNEL_S1X4 | ||||
| fldmiad XO , { d12 } | |||||
| vldmia.f64 XO , { d12 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmiad AO1!, { d8 - d9 } | |||||
| vldmia.f64 AO1!, { d8 - d9 } | |||||
| fldmiad XO , { d13 } | |||||
| vldmia.f64 XO , { d13 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmiad AO1!, { d10 - d11 } | |||||
| vldmia.f64 AO1!, { d10 - d11 } | |||||
| fldmiad XO , { d14 } | |||||
| vldmia.f64 XO , { d14 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmiad XO , { d15 } | |||||
| vldmia.f64 XO , { d15 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d2 , d12 , d8 | vmla.f64 d2 , d12 , d8 | ||||
| @@ -286,8 +286,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmiad XO , { d1 } | |||||
| fldmiad AO1!, { d8 } | |||||
| vldmia.f64 XO , { d1 } | |||||
| vldmia.f64 AO1!, { d8 } | |||||
| vmla.f64 d2 , d1 , d8 | vmla.f64 d2 , d1 , d8 | ||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| @@ -295,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmiad YO, { d4 } | |||||
| vldmia.f64 YO, { d4 } | |||||
| vmla.f64 d4, d0, d2 | vmla.f64 d4, d0, d2 | ||||
| fstmiad YO, { d4 } | |||||
| vstmia.f64 YO, { d4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -315,11 +315,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X4 | .macro KERNEL_F2X4 | ||||
| fldmias XO! , { s12 - s15 } | |||||
| fldmias AO1!, { s8 - s9 } | |||||
| fldmias AO2!, { s4 - s5 } | |||||
| fldmias AO1!, { s10 - s11 } | |||||
| fldmias AO2!, { s6 - s7 } | |||||
| vldmia.f32 XO! , { s12 - s15 } | |||||
| vldmia.f32 AO1!, { s8 - s9 } | |||||
| vldmia.f32 AO2!, { s4 - s5 } | |||||
| vldmia.f32 AO1!, { s10 - s11 } | |||||
| vldmia.f32 AO2!, { s6 - s7 } | |||||
| vmla.f32 s2 , s12 , s8 | vmla.f32 s2 , s12 , s8 | ||||
| vmla.f32 s3 , s12 , s4 | vmla.f32 s3 , s12 , s4 | ||||
| @@ -334,9 +334,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X1 | .macro KERNEL_F2X1 | ||||
| fldmias XO! , { s1 } | |||||
| fldmias AO1!, { s8 } | |||||
| fldmias AO2!, { s4 } | |||||
| vldmia.f32 XO! , { s1 } | |||||
| vldmia.f32 AO1!, { s8 } | |||||
| vldmia.f32 AO2!, { s4 } | |||||
| vmla.f32 s2 , s1 , s8 | vmla.f32 s2 , s1 , s8 | ||||
| vmla.f32 s3 , s1 , s4 | vmla.f32 s3 , s1 , s4 | ||||
| @@ -344,10 +344,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F2 | .macro SAVE_F2 | ||||
| fldmias YO, { s4 - s5 } | |||||
| vldmia.f32 YO, { s4 - s5 } | |||||
| vmla.f32 s4, s0, s2 | vmla.f32 s4, s0, s2 | ||||
| vmla.f32 s5, s0, s3 | vmla.f32 s5, s0, s3 | ||||
| fstmias YO!, { s4 - s5 } | |||||
| vstmia.f32 YO!, { s4 - s5 } | |||||
| .endm | .endm | ||||
| @@ -359,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X4 | .macro KERNEL_F1X4 | ||||
| fldmias XO! , { s12 - s15 } | |||||
| fldmias AO1!, { s8 - s9 } | |||||
| fldmias AO1!, { s10 - s11 } | |||||
| vldmia.f32 XO! , { s12 - s15 } | |||||
| vldmia.f32 AO1!, { s8 - s9 } | |||||
| vldmia.f32 AO1!, { s10 - s11 } | |||||
| vmla.f32 s2 , s12 , s8 | vmla.f32 s2 , s12 , s8 | ||||
| vmla.f32 s2 , s13 , s9 | vmla.f32 s2 , s13 , s9 | ||||
| vmla.f32 s2 , s14, s10 | vmla.f32 s2 , s14, s10 | ||||
| @@ -371,17 +371,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmias XO! , { s1 } | |||||
| fldmias AO1!, { s8 } | |||||
| vldmia.f32 XO! , { s1 } | |||||
| vldmia.f32 AO1!, { s8 } | |||||
| vmla.f32 s2 , s1 , s8 | vmla.f32 s2 , s1 , s8 | ||||
| .endm | .endm | ||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmias YO, { s4 } | |||||
| vldmia.f32 YO, { s4 } | |||||
| vmla.f32 s4, s0, s2 | vmla.f32 s4, s0, s2 | ||||
| fstmias YO!, { s4 } | |||||
| vstmia.f32 YO!, { s4 } | |||||
| .endm | .endm | ||||
| @@ -395,21 +395,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X4 | .macro KERNEL_S2X4 | ||||
| fldmias XO , { s12 } | |||||
| vldmia.f32 XO , { s12 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias AO1!, { s8 - s9 } | |||||
| fldmias AO2!, { s4 - s5 } | |||||
| vldmia.f32 AO1!, { s8 - s9 } | |||||
| vldmia.f32 AO2!, { s4 - s5 } | |||||
| fldmias XO , { s13 } | |||||
| vldmia.f32 XO , { s13 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias AO1!, { s10 - s11 } | |||||
| fldmias AO2!, { s6 - s7 } | |||||
| vldmia.f32 AO1!, { s10 - s11 } | |||||
| vldmia.f32 AO2!, { s6 - s7 } | |||||
| fldmias XO , { s14 } | |||||
| vldmia.f32 XO , { s14 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias XO , { s15 } | |||||
| vldmia.f32 XO , { s15 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s2 , s12 , s8 | vmla.f32 s2 , s12 , s8 | ||||
| @@ -425,9 +425,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X1 | .macro KERNEL_S2X1 | ||||
| fldmias XO , { s1 } | |||||
| fldmias AO1!, { s8 } | |||||
| fldmias AO2!, { s4 } | |||||
| vldmia.f32 XO , { s1 } | |||||
| vldmia.f32 AO1!, { s8 } | |||||
| vldmia.f32 AO2!, { s4 } | |||||
| vmla.f32 s2 , s1 , s8 | vmla.f32 s2 , s1 , s8 | ||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s3 , s1 , s4 | vmla.f32 s3 , s1 , s4 | ||||
| @@ -436,14 +436,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S2 | .macro SAVE_S2 | ||||
| fldmias YO, { s4 } | |||||
| vldmia.f32 YO, { s4 } | |||||
| vmla.f32 s4, s0, s2 | vmla.f32 s4, s0, s2 | ||||
| fstmias YO, { s4 } | |||||
| vstmia.f32 YO, { s4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s5 } | |||||
| vldmia.f32 YO, { s5 } | |||||
| vmla.f32 s5, s0, s3 | vmla.f32 s5, s0, s3 | ||||
| fstmias YO, { s5 } | |||||
| vstmia.f32 YO, { s5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -456,20 +456,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X4 | .macro KERNEL_S1X4 | ||||
| fldmias XO , { s12 } | |||||
| vldmia.f32 XO , { s12 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmias AO1!, { s8 - s9 } | |||||
| vldmia.f32 AO1!, { s8 - s9 } | |||||
| fldmias XO , { s13 } | |||||
| vldmia.f32 XO , { s13 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias AO1!, { s10 - s11 } | |||||
| vldmia.f32 AO1!, { s10 - s11 } | |||||
| fldmias XO , { s14 } | |||||
| vldmia.f32 XO , { s14 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias XO , { s15 } | |||||
| vldmia.f32 XO , { s15 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s2 , s12 , s8 | vmla.f32 s2 , s12 , s8 | ||||
| @@ -481,8 +481,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmias XO , { s1 } | |||||
| fldmias AO1!, { s8 } | |||||
| vldmia.f32 XO , { s1 } | |||||
| vldmia.f32 AO1!, { s8 } | |||||
| vmla.f32 s2 , s1 , s8 | vmla.f32 s2 , s1 , s8 | ||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| @@ -490,9 +490,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmias YO, { s4 } | |||||
| vldmia.f32 YO, { s4 } | |||||
| vmla.f32 s4, s0, s2 | vmla.f32 s4, s0, s2 | ||||
| fstmias YO, { s4 } | |||||
| vstmia.f32 YO, { s4 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -108,17 +108,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X4 | .macro KERNEL_F2X4 | ||||
| pld [ XO , #X_PRE ] | pld [ XO , #X_PRE ] | ||||
| fldmiad XO! , { d28 - d31 } | |||||
| vldmia.f64 XO! , { d28 - d31 } | |||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmiad AO1!, { d8 - d9 } | |||||
| vldmia.f64 AO1!, { d8 - d9 } | |||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| fldmiad AO2!, { d16 - d17 } | |||||
| vldmia.f64 AO2!, { d16 - d17 } | |||||
| vmla.f64 d4 , d28 , d8 | vmla.f64 d4 , d28 , d8 | ||||
| vmla.f64 d5 , d28 , d16 | vmla.f64 d5 , d28 , d16 | ||||
| fldmiad AO1!, { d10 - d11 } | |||||
| vldmia.f64 AO1!, { d10 - d11 } | |||||
| vmla.f64 d4 , d29 , d9 | vmla.f64 d4 , d29 , d9 | ||||
| vmla.f64 d5 , d29 , d17 | vmla.f64 d5 , d29 , d17 | ||||
| fldmiad AO2!, { d18 - d19 } | |||||
| vldmia.f64 AO2!, { d18 - d19 } | |||||
| vmla.f64 d4 , d30, d10 | vmla.f64 d4 , d30, d10 | ||||
| vmla.f64 d5 , d30, d18 | vmla.f64 d5 , d30, d18 | ||||
| vmla.f64 d4 , d31, d11 | vmla.f64 d4 , d31, d11 | ||||
| @@ -129,9 +129,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X1 | .macro KERNEL_F2X1 | ||||
| fldmiad XO! , { d2 } | |||||
| fldmiad AO1!, { d8 } | |||||
| fldmiad AO2!, { d16 } | |||||
| vldmia.f64 XO! , { d2 } | |||||
| vldmia.f64 AO1!, { d8 } | |||||
| vldmia.f64 AO2!, { d16 } | |||||
| vmla.f64 d4 , d2 , d8 | vmla.f64 d4 , d2 , d8 | ||||
| vmla.f64 d5 , d2 , d16 | vmla.f64 d5 , d2 , d16 | ||||
| @@ -139,10 +139,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F2 | .macro SAVE_F2 | ||||
| fldmiad YO, { d24 - d25 } | |||||
| vldmia.f64 YO, { d24 - d25 } | |||||
| vmla.f64 d24, d0, d4 | vmla.f64 d24, d0, d4 | ||||
| vmla.f64 d25, d0, d5 | vmla.f64 d25, d0, d5 | ||||
| fstmiad YO!, { d24 - d25 } | |||||
| vstmia.f64 YO!, { d24 - d25 } | |||||
| .endm | .endm | ||||
| @@ -156,23 +156,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X4 | .macro KERNEL_S2X4 | ||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmiad XO , { d28 } | |||||
| vldmia.f64 XO , { d28 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmiad AO1!, { d8 - d9 } | |||||
| vldmia.f64 AO1!, { d8 - d9 } | |||||
| pld [ AO2 , #A_PRE ] | pld [ AO2 , #A_PRE ] | ||||
| fldmiad AO2!, { d16 - d17 } | |||||
| vldmia.f64 AO2!, { d16 - d17 } | |||||
| vmla.f64 d4 , d28 , d8 | vmla.f64 d4 , d28 , d8 | ||||
| fldmiad XO , { d29 } | |||||
| vldmia.f64 XO , { d29 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d5 , d28 , d16 | vmla.f64 d5 , d28 , d16 | ||||
| fldmiad AO1!, { d10 - d11 } | |||||
| vldmia.f64 AO1!, { d10 - d11 } | |||||
| vmla.f64 d4 , d29 , d9 | vmla.f64 d4 , d29 , d9 | ||||
| fldmiad XO , { d30 } | |||||
| vldmia.f64 XO , { d30 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d5 , d29 , d17 | vmla.f64 d5 , d29 , d17 | ||||
| fldmiad AO2!, { d18 - d19 } | |||||
| vldmia.f64 AO2!, { d18 - d19 } | |||||
| vmla.f64 d4 , d30, d10 | vmla.f64 d4 , d30, d10 | ||||
| fldmiad XO , { d31 } | |||||
| vldmia.f64 XO , { d31 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d5 , d30, d18 | vmla.f64 d5 , d30, d18 | ||||
| vmla.f64 d4 , d31, d11 | vmla.f64 d4 , d31, d11 | ||||
| @@ -183,10 +183,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X1 | .macro KERNEL_S2X1 | ||||
| fldmiad XO , { d2 } | |||||
| fldmiad AO1!, { d8 } | |||||
| vldmia.f64 XO , { d2 } | |||||
| vldmia.f64 AO1!, { d8 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmiad AO2!, { d16 } | |||||
| vldmia.f64 AO2!, { d16 } | |||||
| vmla.f64 d4 , d2 , d8 | vmla.f64 d4 , d2 , d8 | ||||
| vmla.f64 d5 , d2 , d16 | vmla.f64 d5 , d2 , d16 | ||||
| @@ -194,14 +194,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S2 | .macro SAVE_S2 | ||||
| fldmiad YO, { d24 } | |||||
| vldmia.f64 YO, { d24 } | |||||
| vmla.f64 d24, d0, d4 | vmla.f64 d24, d0, d4 | ||||
| fstmiad YO, { d24 } | |||||
| vstmia.f64 YO, { d24 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d24 } | |||||
| vldmia.f64 YO, { d24 } | |||||
| vmla.f64 d24, d0, d5 | vmla.f64 d24, d0, d5 | ||||
| fstmiad YO, { d24 } | |||||
| vstmia.f64 YO, { d24 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -215,11 +215,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X4 | .macro KERNEL_F1X4 | ||||
| pld [ XO , #X_PRE ] | pld [ XO , #X_PRE ] | ||||
| fldmiad XO! , { d28 - d31 } | |||||
| vldmia.f64 XO! , { d28 - d31 } | |||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmiad AO1!, { d8 - d9 } | |||||
| vldmia.f64 AO1!, { d8 - d9 } | |||||
| vmla.f64 d4 , d28 , d8 | vmla.f64 d4 , d28 , d8 | ||||
| fldmiad AO1!, { d10 - d11 } | |||||
| vldmia.f64 AO1!, { d10 - d11 } | |||||
| vmla.f64 d4 , d29 , d9 | vmla.f64 d4 , d29 , d9 | ||||
| vmla.f64 d4 , d30, d10 | vmla.f64 d4 , d30, d10 | ||||
| vmla.f64 d4 , d31, d11 | vmla.f64 d4 , d31, d11 | ||||
| @@ -229,17 +229,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmiad XO! , { d2 } | |||||
| fldmiad AO1!, { d8 } | |||||
| vldmia.f64 XO! , { d2 } | |||||
| vldmia.f64 AO1!, { d8 } | |||||
| vmla.f64 d4 , d2 , d8 | vmla.f64 d4 , d2 , d8 | ||||
| .endm | .endm | ||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmiad YO, { d24 } | |||||
| vldmia.f64 YO, { d24 } | |||||
| vmla.f64 d24, d0, d4 | vmla.f64 d24, d0, d4 | ||||
| fstmiad YO!, { d24 } | |||||
| vstmia.f64 YO!, { d24 } | |||||
| .endm | .endm | ||||
| @@ -252,18 +252,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X4 | .macro KERNEL_S1X4 | ||||
| pld [ AO1 , #A_PRE ] | pld [ AO1 , #A_PRE ] | ||||
| fldmiad XO , { d28 } | |||||
| vldmia.f64 XO , { d28 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmiad AO1!, { d8 - d9 } | |||||
| vldmia.f64 AO1!, { d8 - d9 } | |||||
| vmla.f64 d4 , d28 , d8 | vmla.f64 d4 , d28 , d8 | ||||
| fldmiad XO , { d29 } | |||||
| vldmia.f64 XO , { d29 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmiad AO1!, { d10 - d11 } | |||||
| vldmia.f64 AO1!, { d10 - d11 } | |||||
| vmla.f64 d4 , d29 , d9 | vmla.f64 d4 , d29 , d9 | ||||
| fldmiad XO , { d30 } | |||||
| vldmia.f64 XO , { d30 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d4 , d30, d10 | vmla.f64 d4 , d30, d10 | ||||
| fldmiad XO , { d31 } | |||||
| vldmia.f64 XO , { d31 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d4 , d31, d11 | vmla.f64 d4 , d31, d11 | ||||
| @@ -272,8 +272,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmiad XO , { d2 } | |||||
| fldmiad AO1!, { d8 } | |||||
| vldmia.f64 XO , { d2 } | |||||
| vldmia.f64 AO1!, { d8 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f64 d4 , d2 , d8 | vmla.f64 d4 , d2 , d8 | ||||
| @@ -281,9 +281,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmiad YO, { d24 } | |||||
| vldmia.f64 YO, { d24 } | |||||
| vmla.f64 d24, d0, d4 | vmla.f64 d24, d0, d4 | ||||
| fstmiad YO, { d24 } | |||||
| vstmia.f64 YO, { d24 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -300,15 +300,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X4 | .macro KERNEL_F2X4 | ||||
| fldmias XO! , { s28 - s31 } | |||||
| fldmias AO1!, { s8 - s9 } | |||||
| fldmias AO2!, { s16 - s17 } | |||||
| vldmia.f32 XO! , { s28 - s31 } | |||||
| vldmia.f32 AO1!, { s8 - s9 } | |||||
| vldmia.f32 AO2!, { s16 - s17 } | |||||
| vmla.f32 s4 , s28 , s8 | vmla.f32 s4 , s28 , s8 | ||||
| vmla.f32 s5 , s28 , s16 | vmla.f32 s5 , s28 , s16 | ||||
| fldmias AO1!, { s10 - s11 } | |||||
| vldmia.f32 AO1!, { s10 - s11 } | |||||
| vmla.f32 s4 , s29 , s9 | vmla.f32 s4 , s29 , s9 | ||||
| vmla.f32 s5 , s29 , s17 | vmla.f32 s5 , s29 , s17 | ||||
| fldmias AO2!, { s18 - s19 } | |||||
| vldmia.f32 AO2!, { s18 - s19 } | |||||
| vmla.f32 s4 , s30, s10 | vmla.f32 s4 , s30, s10 | ||||
| vmla.f32 s5 , s30, s18 | vmla.f32 s5 , s30, s18 | ||||
| vmla.f32 s4 , s31, s11 | vmla.f32 s4 , s31, s11 | ||||
| @@ -319,9 +319,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X1 | .macro KERNEL_F2X1 | ||||
| fldmias XO! , { s2 } | |||||
| fldmias AO1!, { s8 } | |||||
| fldmias AO2!, { s16 } | |||||
| vldmia.f32 XO! , { s2 } | |||||
| vldmia.f32 AO1!, { s8 } | |||||
| vldmia.f32 AO2!, { s16 } | |||||
| vmla.f32 s4 , s2 , s8 | vmla.f32 s4 , s2 , s8 | ||||
| vmla.f32 s5 , s2 , s16 | vmla.f32 s5 , s2 , s16 | ||||
| @@ -329,10 +329,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F2 | .macro SAVE_F2 | ||||
| fldmias YO, { s24 - s25 } | |||||
| vldmia.f32 YO, { s24 - s25 } | |||||
| vmla.f32 s24, s0, s4 | vmla.f32 s24, s0, s4 | ||||
| vmla.f32 s25, s0, s5 | vmla.f32 s25, s0, s5 | ||||
| fstmias YO!, { s24 - s25 } | |||||
| vstmia.f32 YO!, { s24 - s25 } | |||||
| .endm | .endm | ||||
| @@ -345,22 +345,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X4 | .macro KERNEL_S2X4 | ||||
| fldmias XO , { s28 } | |||||
| vldmia.f32 XO , { s28 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias AO1!, { s8 - s9 } | |||||
| fldmias AO2!, { s16 - s17 } | |||||
| vldmia.f32 AO1!, { s8 - s9 } | |||||
| vldmia.f32 AO2!, { s16 - s17 } | |||||
| vmla.f32 s4 , s28 , s8 | vmla.f32 s4 , s28 , s8 | ||||
| fldmias XO , { s29 } | |||||
| vldmia.f32 XO , { s29 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s5 , s28 , s16 | vmla.f32 s5 , s28 , s16 | ||||
| fldmias AO1!, { s10 - s11 } | |||||
| vldmia.f32 AO1!, { s10 - s11 } | |||||
| vmla.f32 s4 , s29 , s9 | vmla.f32 s4 , s29 , s9 | ||||
| fldmias XO , { s30 } | |||||
| vldmia.f32 XO , { s30 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s5 , s29 , s17 | vmla.f32 s5 , s29 , s17 | ||||
| fldmias AO2!, { s18 - s19 } | |||||
| vldmia.f32 AO2!, { s18 - s19 } | |||||
| vmla.f32 s4 , s30, s10 | vmla.f32 s4 , s30, s10 | ||||
| fldmias XO , { s31 } | |||||
| vldmia.f32 XO , { s31 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s5 , s30, s18 | vmla.f32 s5 , s30, s18 | ||||
| vmla.f32 s4 , s31, s11 | vmla.f32 s4 , s31, s11 | ||||
| @@ -371,10 +371,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X1 | .macro KERNEL_S2X1 | ||||
| fldmias XO , { s2 } | |||||
| fldmias AO1!, { s8 } | |||||
| vldmia.f32 XO , { s2 } | |||||
| vldmia.f32 AO1!, { s8 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias AO2!, { s16 } | |||||
| vldmia.f32 AO2!, { s16 } | |||||
| vmla.f32 s4 , s2 , s8 | vmla.f32 s4 , s2 , s8 | ||||
| vmla.f32 s5 , s2 , s16 | vmla.f32 s5 , s2 , s16 | ||||
| @@ -382,14 +382,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S2 | .macro SAVE_S2 | ||||
| fldmias YO, { s24 } | |||||
| vldmia.f32 YO, { s24 } | |||||
| vmla.f32 s24, s0, s4 | vmla.f32 s24, s0, s4 | ||||
| fstmias YO, { s24 } | |||||
| vstmia.f32 YO, { s24 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmias YO, { s24 } | |||||
| vldmia.f32 YO, { s24 } | |||||
| vmla.f32 s24, s0, s5 | vmla.f32 s24, s0, s5 | ||||
| fstmias YO, { s24 } | |||||
| vstmia.f32 YO, { s24 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -402,10 +402,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X4 | .macro KERNEL_F1X4 | ||||
| fldmias XO! , { s28 - s31 } | |||||
| fldmias AO1!, { s8 - s9 } | |||||
| vldmia.f32 XO! , { s28 - s31 } | |||||
| vldmia.f32 AO1!, { s8 - s9 } | |||||
| vmla.f32 s4 , s28 , s8 | vmla.f32 s4 , s28 , s8 | ||||
| fldmias AO1!, { s10 - s11 } | |||||
| vldmia.f32 AO1!, { s10 - s11 } | |||||
| vmla.f32 s4 , s29 , s9 | vmla.f32 s4 , s29 , s9 | ||||
| vmla.f32 s4 , s30, s10 | vmla.f32 s4 , s30, s10 | ||||
| vmla.f32 s4 , s31, s11 | vmla.f32 s4 , s31, s11 | ||||
| @@ -415,17 +415,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmias XO! , { s2 } | |||||
| fldmias AO1!, { s8 } | |||||
| vldmia.f32 XO! , { s2 } | |||||
| vldmia.f32 AO1!, { s8 } | |||||
| vmla.f32 s4 , s2 , s8 | vmla.f32 s4 , s2 , s8 | ||||
| .endm | .endm | ||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmias YO, { s24 } | |||||
| vldmia.f32 YO, { s24 } | |||||
| vmla.f32 s24, s0, s4 | vmla.f32 s24, s0, s4 | ||||
| fstmias YO!, { s24 } | |||||
| vstmia.f32 YO!, { s24 } | |||||
| .endm | .endm | ||||
| @@ -437,18 +437,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X4 | .macro KERNEL_S1X4 | ||||
| fldmias XO , { s28 } | |||||
| vldmia.f32 XO , { s28 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias AO1!, { s8 - s9 } | |||||
| vldmia.f32 AO1!, { s8 - s9 } | |||||
| vmla.f32 s4 , s28 , s8 | vmla.f32 s4 , s28 , s8 | ||||
| fldmias XO , { s29 } | |||||
| vldmia.f32 XO , { s29 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| fldmias AO1!, { s10 - s11 } | |||||
| vldmia.f32 AO1!, { s10 - s11 } | |||||
| vmla.f32 s4 , s29 , s9 | vmla.f32 s4 , s29 , s9 | ||||
| fldmias XO , { s30 } | |||||
| vldmia.f32 XO , { s30 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s4 , s30, s10 | vmla.f32 s4 , s30, s10 | ||||
| fldmias XO , { s31 } | |||||
| vldmia.f32 XO , { s31 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s4 , s31, s11 | vmla.f32 s4 , s31, s11 | ||||
| @@ -457,8 +457,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmias XO , { s2 } | |||||
| fldmias AO1!, { s8 } | |||||
| vldmia.f32 XO , { s2 } | |||||
| vldmia.f32 AO1!, { s8 } | |||||
| add XO, XO, INC_X | add XO, XO, INC_X | ||||
| vmla.f32 s4 , s2 , s8 | vmla.f32 s4 , s2 , s8 | ||||
| @@ -466,9 +466,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmias YO, { s24 } | |||||
| vldmia.f32 YO, { s24 } | |||||
| vmla.f32 s24, s0, s4 | vmla.f32 s24, s0, s4 | ||||
| fstmias YO, { s24 } | |||||
| vstmia.f32 YO, { s24 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| .endm | .endm | ||||
| @@ -114,7 +114,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F | .macro INIT_F | ||||
| fldmiad X!, { d0 } | |||||
| vldmia.f64 X!, { d0 } | |||||
| VABS( d0, d0 ) | VABS( d0, d0 ) | ||||
| mov Z, #1 | mov Z, #1 | ||||
| mov INDEX, Z | mov INDEX, Z | ||||
| @@ -123,7 +123,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 } | |||||
| vldmia.f64 X!, { d4 } | |||||
| add Z, Z, #1 | add Z, Z, #1 | ||||
| VABS( d4, d4 ) | VABS( d4, d4 ) | ||||
| vcmpe.f64 d4, d0 | vcmpe.f64 d4, d0 | ||||
| @@ -135,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S | .macro INIT_S | ||||
| fldmiad X, { d0 } | |||||
| vldmia.f64 X, { d0 } | |||||
| VABS( d0, d0 ) | VABS( d0, d0 ) | ||||
| mov Z, #1 | mov Z, #1 | ||||
| mov INDEX, Z | mov INDEX, Z | ||||
| @@ -146,7 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| add Z, Z, #1 | add Z, Z, #1 | ||||
| VABS( d4, d4 ) | VABS( d4, d4 ) | ||||
| vcmpe.f64 d4, d0 | vcmpe.f64 d4, d0 | ||||
| @@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F | .macro INIT_F | ||||
| fldmias X!, { s0 } | |||||
| vldmia.f32 X!, { s0 } | |||||
| VABS( s0, s0 ) | VABS( s0, s0 ) | ||||
| mov Z, #1 | mov Z, #1 | ||||
| mov INDEX, Z | mov INDEX, Z | ||||
| @@ -170,7 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 } | |||||
| vldmia.f32 X!, { s4 } | |||||
| add Z, Z, #1 | add Z, Z, #1 | ||||
| VABS( s4, s4 ) | VABS( s4, s4 ) | ||||
| vcmpe.f32 s4, s0 | vcmpe.f32 s4, s0 | ||||
| @@ -182,7 +182,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S | .macro INIT_S | ||||
| fldmias X, { s0 } | |||||
| vldmia.f32 X, { s0 } | |||||
| VABS( s0, s0 ) | VABS( s0, s0 ) | ||||
| mov Z, #1 | mov Z, #1 | ||||
| mov INDEX, Z | mov INDEX, Z | ||||
| @@ -193,7 +193,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| add Z, Z, #1 | add Z, Z, #1 | ||||
| VABS( s4, s4 ) | VABS( s4, s4 ) | ||||
| vcmpe.f32 s4, s0 | vcmpe.f32 s4, s0 | ||||
| @@ -215,7 +215,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F | .macro INIT_F | ||||
| fldmiad X!, { d0 -d1 } | |||||
| vldmia.f64 X!, { d0 -d1 } | |||||
| vabs.f64 d0, d0 | vabs.f64 d0, d0 | ||||
| vabs.f64 d1, d1 | vabs.f64 d1, d1 | ||||
| vadd.f64 d0 , d0, d1 | vadd.f64 d0 , d0, d1 | ||||
| @@ -227,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 - d5 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| add Z, Z, #1 | add Z, Z, #1 | ||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| @@ -241,7 +241,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S | .macro INIT_S | ||||
| fldmiad X, { d0 -d1 } | |||||
| vldmia.f64 X, { d0 -d1 } | |||||
| vabs.f64 d0, d0 | vabs.f64 d0, d0 | ||||
| vabs.f64 d1, d1 | vabs.f64 d1, d1 | ||||
| vadd.f64 d0 , d0, d1 | vadd.f64 d0 , d0, d1 | ||||
| @@ -255,7 +255,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| add Z, Z, #1 | add Z, Z, #1 | ||||
| vabs.f64 d4, d4 | vabs.f64 d4, d4 | ||||
| vabs.f64 d5, d5 | vabs.f64 d5, d5 | ||||
| @@ -272,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_F | .macro INIT_F | ||||
| fldmias X!, { s0 -s1 } | |||||
| vldmia.f32 X!, { s0 -s1 } | |||||
| vabs.f32 s0, s0 | vabs.f32 s0, s0 | ||||
| vabs.f32 s1, s1 | vabs.f32 s1, s1 | ||||
| vadd.f32 s0 , s0, s1 | vadd.f32 s0 , s0, s1 | ||||
| @@ -284,7 +284,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 - s5 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| add Z, Z, #1 | add Z, Z, #1 | ||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| @@ -298,7 +298,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro INIT_S | .macro INIT_S | ||||
| fldmias X, { s0 -s1 } | |||||
| vldmia.f32 X, { s0 -s1 } | |||||
| vabs.f32 s0, s0 | vabs.f32 s0, s0 | ||||
| vabs.f32 s1, s1 | vabs.f32 s1, s1 | ||||
| vadd.f32 s0 , s0, s1 | vadd.f32 s0 , s0, s1 | ||||
| @@ -312,7 +312,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| add Z, Z, #1 | add Z, Z, #1 | ||||
| vabs.f32 s4, s4 | vabs.f32 s4, s4 | ||||
| vabs.f32 s5, s5 | vabs.f32 s5, s5 | ||||
| @@ -58,7 +58,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 } | |||||
| vldmia.f64 X!, { d4 } | |||||
| vcmpe.f64 d4, d6 // compare with 0.0 | vcmpe.f64 d4, d6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| beq KERNEL_F1_NEXT_\@ | beq KERNEL_F1_NEXT_\@ | ||||
| @@ -95,7 +95,7 @@ KERNEL_F1_NEXT_\@: | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vcmpe.f64 d4, d6 // compare with 0.0 | vcmpe.f64 d4, d6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| beq KERNEL_S1_NEXT | beq KERNEL_S1_NEXT | ||||
| @@ -121,7 +121,7 @@ KERNEL_S1_NEXT: | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 } | |||||
| vldmia.f32 X!, { s4 } | |||||
| vcmpe.f32 s4, s6 // compare with 0.0 | vcmpe.f32 s4, s6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| beq KERNEL_F1_NEXT_\@ | beq KERNEL_F1_NEXT_\@ | ||||
| @@ -158,7 +158,7 @@ KERNEL_F1_NEXT_\@: | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vcmpe.f32 s4, s6 // compare with 0.0 | vcmpe.f32 s4, s6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| beq KERNEL_S1_NEXT | beq KERNEL_S1_NEXT | ||||
| @@ -191,7 +191,7 @@ KERNEL_S1_NEXT: | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 - d5 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vcmpe.f64 d4, d6 // compare with 0.0 | vcmpe.f64 d4, d6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| @@ -249,7 +249,7 @@ KERNEL_F1_END_\@: | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vcmpe.f64 d4, d6 // compare with 0.0 | vcmpe.f64 d4, d6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| @@ -294,7 +294,7 @@ KERNEL_S1_END_\@: | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 - s5 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vcmpe.f32 s4, s6 // compare with 0.0 | vcmpe.f32 s4, s6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| @@ -350,7 +350,7 @@ KERNEL_F1_END_\@: | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vcmpe.f32 s4, s6 // compare with 0.0 | vcmpe.f32 s4, s6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| @@ -58,7 +58,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 } | |||||
| vldmia.f64 X!, { d4 } | |||||
| vcmpe.f64 d4, d6 // compare with 0.0 | vcmpe.f64 d4, d6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| beq KERNEL_F1_NEXT_\@ | beq KERNEL_F1_NEXT_\@ | ||||
| @@ -95,7 +95,7 @@ KERNEL_F1_NEXT_\@: | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vcmpe.f64 d4, d6 // compare with 0.0 | vcmpe.f64 d4, d6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| beq KERNEL_S1_NEXT | beq KERNEL_S1_NEXT | ||||
| @@ -121,7 +121,7 @@ KERNEL_S1_NEXT: | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 } | |||||
| vldmia.f32 X!, { s4 } | |||||
| vcmpe.f32 s4, s6 // compare with 0.0 | vcmpe.f32 s4, s6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| beq KERNEL_F1_NEXT_\@ | beq KERNEL_F1_NEXT_\@ | ||||
| @@ -158,7 +158,7 @@ KERNEL_F1_NEXT_\@: | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vcmpe.f32 s4, s6 // compare with 0.0 | vcmpe.f32 s4, s6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| beq KERNEL_S1_NEXT | beq KERNEL_S1_NEXT | ||||
| @@ -191,7 +191,7 @@ KERNEL_S1_NEXT: | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 - d5 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vcmpe.f64 d4, d6 // compare with 0.0 | vcmpe.f64 d4, d6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| @@ -249,7 +249,7 @@ KERNEL_F1_END_\@: | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vcmpe.f64 d4, d6 // compare with 0.0 | vcmpe.f64 d4, d6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| @@ -294,7 +294,7 @@ KERNEL_S1_END_\@: | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 - s5 } | |||||
| vldmia.f32 X!, { s4 - s5 } | |||||
| vcmpe.f32 s4, s6 // compare with 0.0 | vcmpe.f32 s4, s6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| @@ -350,7 +350,7 @@ KERNEL_F1_END_\@: | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vcmpe.f32 s4, s6 // compare with 0.0 | vcmpe.f32 s4, s6 // compare with 0.0 | ||||
| vmrs APSR_nzcv, fpscr | vmrs APSR_nzcv, fpscr | ||||
| @@ -77,68 +77,68 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad X, { d4 } | |||||
| fldmiad Y, { d5 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vldmia.f64 Y, { d5 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d5 | fmacd d2 , d1, d5 | ||||
| vmul.f64 d3 , d0, d5 | vmul.f64 d3 , d0, d5 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| fldmiad X, { d4 } | |||||
| fldmiad Y, { d5 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vldmia.f64 Y, { d5 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d5 | fmacd d2 , d1, d5 | ||||
| vmul.f64 d3 , d0, d5 | vmul.f64 d3 , d0, d5 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| fldmiad X, { d4 } | |||||
| fldmiad Y, { d5 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vldmia.f64 Y, { d5 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d5 | fmacd d2 , d1, d5 | ||||
| vmul.f64 d3 , d0, d5 | vmul.f64 d3 , d0, d5 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| fldmiad X, { d4 } | |||||
| fldmiad Y, { d5 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vldmia.f64 Y, { d5 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d5 | fmacd d2 , d1, d5 | ||||
| vmul.f64 d3 , d0, d5 | vmul.f64 d3 , d0, d5 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X, { d4 } | |||||
| fldmiad Y, { d5 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vldmia.f64 Y, { d5 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d5 | fmacd d2 , d1, d5 | ||||
| vmul.f64 d3 , d0, d5 | vmul.f64 d3 , d0, d5 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 } | |||||
| fldmiad Y, { d5 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vldmia.f64 Y, { d5 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d5 | fmacd d2 , d1, d5 | ||||
| vmul.f64 d3 , d0, d5 | vmul.f64 d3 , d0, d5 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X, { d2 } | |||||
| fstmiad Y, { d3 } | |||||
| vstmia.f64 X, { d2 } | |||||
| vstmia.f64 Y, { d3 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -149,68 +149,68 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| fldmias X, { s4 } | |||||
| fldmias Y, { s5 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vldmia.f32 Y, { s5 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s5 | fmacs s2 , s1, s5 | ||||
| vmul.f32 s3 , s0, s5 | vmul.f32 s3 , s0, s5 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| fldmias X, { s4 } | |||||
| fldmias Y, { s5 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vldmia.f32 Y, { s5 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s5 | fmacs s2 , s1, s5 | ||||
| vmul.f32 s3 , s0, s5 | vmul.f32 s3 , s0, s5 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| fldmias X, { s4 } | |||||
| fldmias Y, { s5 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vldmia.f32 Y, { s5 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s5 | fmacs s2 , s1, s5 | ||||
| vmul.f32 s3 , s0, s5 | vmul.f32 s3 , s0, s5 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| fldmias X, { s4 } | |||||
| fldmias Y, { s5 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vldmia.f32 Y, { s5 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s5 | fmacs s2 , s1, s5 | ||||
| vmul.f32 s3 , s0, s5 | vmul.f32 s3 , s0, s5 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X, { s4 } | |||||
| fldmias Y, { s5 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vldmia.f32 Y, { s5 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s5 | fmacs s2 , s1, s5 | ||||
| vmul.f32 s3 , s0, s5 | vmul.f32 s3 , s0, s5 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 } | |||||
| fldmias Y, { s5 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vldmia.f32 Y, { s5 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s5 | fmacs s2 , s1, s5 | ||||
| vmul.f32 s3 , s0, s5 | vmul.f32 s3 , s0, s5 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X, { s2 } | |||||
| fstmias Y, { s3 } | |||||
| vstmia.f32 X, { s2 } | |||||
| vstmia.f32 Y, { s3 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -230,96 +230,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d6 - d7 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d6 - d7 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d6 | fmacd d2 , d1, d6 | ||||
| vmul.f64 d3 , d0, d6 | vmul.f64 d3 , d0, d6 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| vmul.f64 d2 , d0, d5 | vmul.f64 d2 , d0, d5 | ||||
| fmacd d2 , d1, d7 | fmacd d2 , d1, d7 | ||||
| vmul.f64 d3 , d0, d7 | vmul.f64 d3 , d0, d7 | ||||
| vmls.f64 d3 , d1, d5 | vmls.f64 d3 , d1, d5 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d6 - d7 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d6 - d7 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d6 | fmacd d2 , d1, d6 | ||||
| vmul.f64 d3 , d0, d6 | vmul.f64 d3 , d0, d6 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| vmul.f64 d2 , d0, d5 | vmul.f64 d2 , d0, d5 | ||||
| fmacd d2 , d1, d7 | fmacd d2 , d1, d7 | ||||
| vmul.f64 d3 , d0, d7 | vmul.f64 d3 , d0, d7 | ||||
| vmls.f64 d3 , d1, d5 | vmls.f64 d3 , d1, d5 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d6 - d7 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d6 - d7 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d6 | fmacd d2 , d1, d6 | ||||
| vmul.f64 d3 , d0, d6 | vmul.f64 d3 , d0, d6 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| vmul.f64 d2 , d0, d5 | vmul.f64 d2 , d0, d5 | ||||
| fmacd d2 , d1, d7 | fmacd d2 , d1, d7 | ||||
| vmul.f64 d3 , d0, d7 | vmul.f64 d3 , d0, d7 | ||||
| vmls.f64 d3 , d1, d5 | vmls.f64 d3 , d1, d5 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d6 - d7 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d6 - d7 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d6 | fmacd d2 , d1, d6 | ||||
| vmul.f64 d3 , d0, d6 | vmul.f64 d3 , d0, d6 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| vmul.f64 d2 , d0, d5 | vmul.f64 d2 , d0, d5 | ||||
| fmacd d2 , d1, d7 | fmacd d2 , d1, d7 | ||||
| vmul.f64 d3 , d0, d7 | vmul.f64 d3 , d0, d7 | ||||
| vmls.f64 d3 , d1, d5 | vmls.f64 d3 , d1, d5 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d6 - d7 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d6 - d7 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d6 | fmacd d2 , d1, d6 | ||||
| vmul.f64 d3 , d0, d6 | vmul.f64 d3 , d0, d6 | ||||
| vmls.f64 d3 , d1, d4 | vmls.f64 d3 , d1, d4 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| vmul.f64 d2 , d0, d5 | vmul.f64 d2 , d0, d5 | ||||
| fmacd d2 , d1, d7 | fmacd d2 , d1, d7 | ||||
| vmul.f64 d3 , d0, d7 | vmul.f64 d3 , d0, d7 | ||||
| vmls.f64 d3 , d1, d5 | vmls.f64 d3 , d1, d5 | ||||
| fstmiad X!, { d2 } | |||||
| fstmiad Y!, { d3 } | |||||
| vstmia.f64 X!, { d2 } | |||||
| vstmia.f64 Y!, { d3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d6 - d7 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d6 - d7 } | |||||
| vmul.f64 d2 , d0, d4 | vmul.f64 d2 , d0, d4 | ||||
| fmacd d2 , d1, d6 | fmacd d2 , d1, d6 | ||||
| vmul.f64 d3 , d0, d6 | vmul.f64 d3 , d0, d6 | ||||
| @@ -347,96 +347,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s6 - s7 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s6 - s7 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s6 | fmacs s2 , s1, s6 | ||||
| vmul.f32 s3 , s0, s6 | vmul.f32 s3 , s0, s6 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| vmul.f32 s2 , s0, s5 | vmul.f32 s2 , s0, s5 | ||||
| fmacs s2 , s1, s7 | fmacs s2 , s1, s7 | ||||
| vmul.f32 s3 , s0, s7 | vmul.f32 s3 , s0, s7 | ||||
| vmls.f32 s3 , s1, s5 | vmls.f32 s3 , s1, s5 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s6 - s7 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s6 - s7 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s6 | fmacs s2 , s1, s6 | ||||
| vmul.f32 s3 , s0, s6 | vmul.f32 s3 , s0, s6 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| vmul.f32 s2 , s0, s5 | vmul.f32 s2 , s0, s5 | ||||
| fmacs s2 , s1, s7 | fmacs s2 , s1, s7 | ||||
| vmul.f32 s3 , s0, s7 | vmul.f32 s3 , s0, s7 | ||||
| vmls.f32 s3 , s1, s5 | vmls.f32 s3 , s1, s5 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s6 - s7 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s6 - s7 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s6 | fmacs s2 , s1, s6 | ||||
| vmul.f32 s3 , s0, s6 | vmul.f32 s3 , s0, s6 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| vmul.f32 s2 , s0, s5 | vmul.f32 s2 , s0, s5 | ||||
| fmacs s2 , s1, s7 | fmacs s2 , s1, s7 | ||||
| vmul.f32 s3 , s0, s7 | vmul.f32 s3 , s0, s7 | ||||
| vmls.f32 s3 , s1, s5 | vmls.f32 s3 , s1, s5 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s6 - s7 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s6 - s7 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s6 | fmacs s2 , s1, s6 | ||||
| vmul.f32 s3 , s0, s6 | vmul.f32 s3 , s0, s6 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| vmul.f32 s2 , s0, s5 | vmul.f32 s2 , s0, s5 | ||||
| fmacs s2 , s1, s7 | fmacs s2 , s1, s7 | ||||
| vmul.f32 s3 , s0, s7 | vmul.f32 s3 , s0, s7 | ||||
| vmls.f32 s3 , s1, s5 | vmls.f32 s3 , s1, s5 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s6 - s7 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s6 - s7 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s6 | fmacs s2 , s1, s6 | ||||
| vmul.f32 s3 , s0, s6 | vmul.f32 s3 , s0, s6 | ||||
| vmls.f32 s3 , s1, s4 | vmls.f32 s3 , s1, s4 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| vmul.f32 s2 , s0, s5 | vmul.f32 s2 , s0, s5 | ||||
| fmacs s2 , s1, s7 | fmacs s2 , s1, s7 | ||||
| vmul.f32 s3 , s0, s7 | vmul.f32 s3 , s0, s7 | ||||
| vmls.f32 s3 , s1, s5 | vmls.f32 s3 , s1, s5 | ||||
| fstmias X!, { s2 } | |||||
| fstmias Y!, { s3 } | |||||
| vstmia.f32 X!, { s2 } | |||||
| vstmia.f32 Y!, { s3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 - s5 } | |||||
| fldmias Y, { s6 - s7 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vldmia.f32 Y, { s6 - s7 } | |||||
| vmul.f32 s2 , s0, s4 | vmul.f32 s2 , s0, s4 | ||||
| fmacs s2 , s1, s6 | fmacs s2 , s1, s6 | ||||
| vmul.f32 s3 , s0, s6 | vmul.f32 s3 , s0, s6 | ||||
| @@ -64,30 +64,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X, { d4 - d7 } | |||||
| vldmia.f64 X, { d4 - d7 } | |||||
| vmul.f64 d4, d4, d0 | vmul.f64 d4, d4, d0 | ||||
| vmul.f64 d5, d5, d0 | vmul.f64 d5, d5, d0 | ||||
| vmul.f64 d6, d6, d0 | vmul.f64 d6, d6, d0 | ||||
| fstmiad X!, { d4 - d5 } | |||||
| vstmia.f64 X!, { d4 - d5 } | |||||
| vmul.f64 d7, d7, d0 | vmul.f64 d7, d7, d0 | ||||
| fstmiad X!, { d6 - d7 } | |||||
| vstmia.f64 X!, { d6 - d7 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vmul.f64 d4, d4, d0 | vmul.f64 d4, d4, d0 | ||||
| fstmiad X!, { d4 } | |||||
| vstmia.f64 X!, { d4 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 } | |||||
| vldmia.f64 X, { d4 } | |||||
| vmul.f64 d4, d4, d0 | vmul.f64 d4, d4, d0 | ||||
| fstmiad X, { d4 } | |||||
| vstmia.f64 X, { d4 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| .endm | .endm | ||||
| @@ -96,30 +96,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| fldmias X, { s4 - s7 } | |||||
| vldmia.f32 X, { s4 - s7 } | |||||
| vmul.f32 s4, s4, s0 | vmul.f32 s4, s4, s0 | ||||
| vmul.f32 s5, s5, s0 | vmul.f32 s5, s5, s0 | ||||
| vmul.f32 s6, s6, s0 | vmul.f32 s6, s6, s0 | ||||
| fstmias X!, { s4 - s5 } | |||||
| vstmia.f32 X!, { s4 - s5 } | |||||
| vmul.f32 s7, s7, s0 | vmul.f32 s7, s7, s0 | ||||
| fstmias X!, { s6 - s7 } | |||||
| vstmia.f32 X!, { s6 - s7 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vmul.f32 s4, s4, s0 | vmul.f32 s4, s4, s0 | ||||
| fstmias X!, { s4 } | |||||
| vstmia.f32 X!, { s4 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vmul.f32 s4, s4, s0 | vmul.f32 s4, s4, s0 | ||||
| fstmias X, { s4 } | |||||
| vstmia.f32 X, { s4 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| .endm | .endm | ||||
| @@ -136,58 +136,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vmul.f64 d2, d0, d4 | vmul.f64 d2, d0, d4 | ||||
| vmls.f64 d2, d1, d5 | vmls.f64 d2, d1, d5 | ||||
| vmul.f64 d3, d0, d5 | vmul.f64 d3, d0, d5 | ||||
| fmacd d3, d1, d4 | fmacd d3, d1, d4 | ||||
| fstmiad X!, { d2 - d3 } | |||||
| vstmia.f64 X!, { d2 - d3 } | |||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vmul.f64 d2, d0, d4 | vmul.f64 d2, d0, d4 | ||||
| vmls.f64 d2, d1, d5 | vmls.f64 d2, d1, d5 | ||||
| vmul.f64 d3, d0, d5 | vmul.f64 d3, d0, d5 | ||||
| fmacd d3, d1, d4 | fmacd d3, d1, d4 | ||||
| fstmiad X!, { d2 - d3 } | |||||
| vstmia.f64 X!, { d2 - d3 } | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vmul.f64 d2, d0, d4 | vmul.f64 d2, d0, d4 | ||||
| vmls.f64 d2, d1, d5 | vmls.f64 d2, d1, d5 | ||||
| vmul.f64 d3, d0, d5 | vmul.f64 d3, d0, d5 | ||||
| fmacd d3, d1, d4 | fmacd d3, d1, d4 | ||||
| fstmiad X!, { d2 - d3 } | |||||
| vstmia.f64 X!, { d2 - d3 } | |||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vmul.f64 d2, d0, d4 | vmul.f64 d2, d0, d4 | ||||
| vmls.f64 d2, d1, d5 | vmls.f64 d2, d1, d5 | ||||
| vmul.f64 d3, d0, d5 | vmul.f64 d3, d0, d5 | ||||
| fmacd d3, d1, d4 | fmacd d3, d1, d4 | ||||
| fstmiad X!, { d2 - d3 } | |||||
| vstmia.f64 X!, { d2 - d3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vmul.f64 d2, d0, d4 | vmul.f64 d2, d0, d4 | ||||
| vmls.f64 d2, d1, d5 | vmls.f64 d2, d1, d5 | ||||
| vmul.f64 d3, d0, d5 | vmul.f64 d3, d0, d5 | ||||
| fmacd d3, d1, d4 | fmacd d3, d1, d4 | ||||
| fstmiad X!, { d2 - d3 } | |||||
| vstmia.f64 X!, { d2 - d3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vmul.f64 d2, d0, d4 | vmul.f64 d2, d0, d4 | ||||
| vmls.f64 d2, d1, d5 | vmls.f64 d2, d1, d5 | ||||
| vmul.f64 d3, d0, d5 | vmul.f64 d3, d0, d5 | ||||
| fmacd d3, d1, d4 | fmacd d3, d1, d4 | ||||
| fstmiad X, { d2 - d3 } | |||||
| vstmia.f64 X, { d2 - d3 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| .endm | .endm | ||||
| @@ -199,56 +199,56 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vmul.f32 s2, s0, s4 | vmul.f32 s2, s0, s4 | ||||
| vmls.f32 s2, s1, s5 | vmls.f32 s2, s1, s5 | ||||
| vmul.f32 s3, s0, s5 | vmul.f32 s3, s0, s5 | ||||
| fmacs s3, s1, s4 | fmacs s3, s1, s4 | ||||
| fstmias X!, { s2 - s3 } | |||||
| vstmia.f32 X!, { s2 - s3 } | |||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vmul.f32 s2, s0, s4 | vmul.f32 s2, s0, s4 | ||||
| vmls.f32 s2, s1, s5 | vmls.f32 s2, s1, s5 | ||||
| vmul.f32 s3, s0, s5 | vmul.f32 s3, s0, s5 | ||||
| fmacs s3, s1, s4 | fmacs s3, s1, s4 | ||||
| fstmias X!, { s2 - s3 } | |||||
| vstmia.f32 X!, { s2 - s3 } | |||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vmul.f32 s2, s0, s4 | vmul.f32 s2, s0, s4 | ||||
| vmls.f32 s2, s1, s5 | vmls.f32 s2, s1, s5 | ||||
| vmul.f32 s3, s0, s5 | vmul.f32 s3, s0, s5 | ||||
| fmacs s3, s1, s4 | fmacs s3, s1, s4 | ||||
| fstmias X!, { s2 - s3 } | |||||
| vstmia.f32 X!, { s2 - s3 } | |||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vmul.f32 s2, s0, s4 | vmul.f32 s2, s0, s4 | ||||
| vmls.f32 s2, s1, s5 | vmls.f32 s2, s1, s5 | ||||
| vmul.f32 s3, s0, s5 | vmul.f32 s3, s0, s5 | ||||
| fmacs s3, s1, s4 | fmacs s3, s1, s4 | ||||
| fstmias X!, { s2 - s3 } | |||||
| vstmia.f32 X!, { s2 - s3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vmul.f32 s2, s0, s4 | vmul.f32 s2, s0, s4 | ||||
| vmls.f32 s2, s1, s5 | vmls.f32 s2, s1, s5 | ||||
| vmul.f32 s3, s0, s5 | vmul.f32 s3, s0, s5 | ||||
| fmacs s3, s1, s4 | fmacs s3, s1, s4 | ||||
| fstmias X!, { s2 - s3 } | |||||
| vstmia.f32 X!, { s2 - s3 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s4 - s5 } | |||||
| vmul.f32 s2, s0, s4 | vmul.f32 s2, s0, s4 | ||||
| vmls.f32 s2, s1, s5 | vmls.f32 s2, s1, s5 | ||||
| vmul.f32 s3, s0, s5 | vmul.f32 s3, s0, s5 | ||||
| fmacs s3, s1, s4 | fmacs s3, s1, s4 | ||||
| fstmias X, { s2 - s3 } | |||||
| vstmia.f32 X, { s2 - s3 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| .endm | .endm | ||||
| @@ -65,17 +65,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_F8 | .macro COPY_F8 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| fldmias X!, { s0 - s3 } | |||||
| fldmias X!, { s4 - s7 } | |||||
| fstmias Y!, { s0 - s3 } | |||||
| fstmias Y!, { s4 - s7 } | |||||
| vldmia.f32 X!, { s0 - s3 } | |||||
| vldmia.f32 X!, { s4 - s7 } | |||||
| vstmia.f32 Y!, { s0 - s3 } | |||||
| vstmia.f32 Y!, { s4 - s7 } | |||||
| .endm | .endm | ||||
| .macro COPY_F1 | .macro COPY_F1 | ||||
| fldmias X!, { s0 } | |||||
| fstmias Y!, { s0 } | |||||
| vldmia.f32 X!, { s0 } | |||||
| vstmia.f32 Y!, { s0 } | |||||
| .endm | .endm | ||||
| @@ -85,23 +85,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_S4 | .macro COPY_S4 | ||||
| nop | nop | ||||
| fldmias X, { s0 } | |||||
| fstmias Y, { s0 } | |||||
| vldmia.f32 X, { s0 } | |||||
| vstmia.f32 Y, { s0 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s1 } | |||||
| fstmias Y, { s1 } | |||||
| vldmia.f32 X, { s1 } | |||||
| vstmia.f32 Y, { s1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s0 } | |||||
| fstmias Y, { s0 } | |||||
| vldmia.f32 X, { s0 } | |||||
| vstmia.f32 Y, { s0 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s1 } | |||||
| fstmias Y, { s1 } | |||||
| vldmia.f32 X, { s1 } | |||||
| vstmia.f32 Y, { s1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -110,8 +110,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_S1 | .macro COPY_S1 | ||||
| fldmias X, { s0 } | |||||
| fstmias Y, { s0 } | |||||
| vldmia.f32 X, { s0 } | |||||
| vstmia.f32 Y, { s0 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -68,26 +68,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| fldmias X!, { s14 } | |||||
| fldmias Y!, { s15 } | |||||
| vldmia.f32 X!, { s14 } | |||||
| vldmia.f32 Y!, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| fldmias X!, { s14 } | |||||
| fldmias Y!, { s15 } | |||||
| vldmia.f32 X!, { s14 } | |||||
| vldmia.f32 Y!, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| fldmias X!, { s14 } | |||||
| fldmias Y!, { s15 } | |||||
| vldmia.f32 X!, { s14 } | |||||
| vldmia.f32 Y!, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| fldmias X!, { s14 } | |||||
| fldmias Y!, { s15 } | |||||
| vldmia.f32 X!, { s14 } | |||||
| vldmia.f32 Y!, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| @@ -96,8 +96,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s14 } | |||||
| fldmias Y!, { s15 } | |||||
| vldmia.f32 X!, { s14 } | |||||
| vldmia.f32 Y!, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| @@ -109,32 +109,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| nop | nop | ||||
| fldmias X, { s14 } | |||||
| fldmias Y, { s15 } | |||||
| vldmia.f32 X, { s14 } | |||||
| vldmia.f32 Y, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s14 } | |||||
| fldmias Y, { s15 } | |||||
| vldmia.f32 X, { s14 } | |||||
| vldmia.f32 Y, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s14 } | |||||
| fldmias Y, { s15 } | |||||
| vldmia.f32 X, { s14 } | |||||
| vldmia.f32 Y, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmias X, { s14 } | |||||
| fldmias Y, { s15 } | |||||
| vldmia.f32 X, { s14 } | |||||
| vldmia.f32 Y, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| @@ -146,8 +146,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s14 } | |||||
| fldmias Y, { s15 } | |||||
| vldmia.f32 X, { s14 } | |||||
| vldmia.f32 Y, { s15 } | |||||
| vmul.f32 s15, s14, s15 | vmul.f32 s15, s14, s15 | ||||
| vcvt.f64.f32 d4, s15 | vcvt.f64.f32 d4, s15 | ||||
| vadd.f64 d0 , d0, d4 | vadd.f64 d0 , d0, d4 | ||||
| @@ -162,12 +162,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| fldmias X!, { s8 - s9 } | |||||
| fldmias Y!, { s4 - s5} | |||||
| vldmia.f32 X!, { s8 - s9 } | |||||
| vldmia.f32 Y!, { s4 - s5} | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fldmias X!, { s10 - s11 } | |||||
| vldmia.f32 X!, { s10 - s11 } | |||||
| fmacs s1 , s5, s9 | fmacs s1 , s5, s9 | ||||
| fldmias Y!, { s6 - s7 } | |||||
| vldmia.f32 Y!, { s6 - s7 } | |||||
| fmacs s0 , s6, s10 | fmacs s0 , s6, s10 | ||||
| fmacs s1 , s7, s11 | fmacs s1 , s7, s11 | ||||
| @@ -175,8 +175,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X!, { s4 } | |||||
| fldmias Y!, { s8 } | |||||
| vldmia.f32 X!, { s4 } | |||||
| vldmia.f32 Y!, { s8 } | |||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| .endm | .endm | ||||
| @@ -185,26 +185,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S4 | .macro KERNEL_S4 | ||||
| nop | nop | ||||
| fldmias X, { s4 } | |||||
| fldmias Y, { s8 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vldmia.f32 Y, { s8 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| fldmias X, { s5 } | |||||
| fldmias Y, { s9 } | |||||
| vldmia.f32 X, { s5 } | |||||
| vldmia.f32 Y, { s9 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fmacs s1 , s5, s9 | fmacs s1 , s5, s9 | ||||
| fldmias X, { s6 } | |||||
| fldmias Y, { s10 } | |||||
| vldmia.f32 X, { s6 } | |||||
| vldmia.f32 Y, { s10 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fmacs s0 , s6, s10 | fmacs s0 , s6, s10 | ||||
| fldmias X, { s7 } | |||||
| fldmias Y, { s11 } | |||||
| vldmia.f32 X, { s7 } | |||||
| vldmia.f32 Y, { s11 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fmacs s1 , s7, s11 | fmacs s1 , s7, s11 | ||||
| @@ -214,8 +214,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s4 } | |||||
| fldmias Y, { s8 } | |||||
| vldmia.f32 X, { s4 } | |||||
| vldmia.f32 Y, { s8 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| fmacs s0 , s4, s8 | fmacs s0 , s4, s8 | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -112,8 +112,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL4x2_SUB | .macro KERNEL4x2_SUB | ||||
| fldmias AO! , { s0 - s3 } | |||||
| fldmias BO! , { s4 - s5 } | |||||
| vldmia.f32 AO! , { s0 - s3 } | |||||
| vldmia.f32 BO! , { s4 - s5 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s1, s4 | fmacs s9 , s1, s4 | ||||
| @@ -136,29 +136,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL4x4_I | .macro KERNEL4x4_I | ||||
| pld [ AO , #A_PRE ] | pld [ AO , #A_PRE ] | ||||
| fldmias AO!, { s0 - s1 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| pld [ BO , #B_PRE ] | pld [ BO , #B_PRE ] | ||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| fmuls s16 , s0, s8 | fmuls s16 , s0, s8 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmuls s17 , s1, s8 | fmuls s17 , s1, s8 | ||||
| fmuls s18 , s2, s8 | fmuls s18 , s2, s8 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmuls s19 , s3, s8 | fmuls s19 , s3, s8 | ||||
| fmuls s20 , s0, s9 | fmuls s20 , s0, s9 | ||||
| fldmias AO!, { s4 - s5 } | |||||
| vldmia.f32 AO!, { s4 - s5 } | |||||
| fmuls s21 , s1, s9 | fmuls s21 , s1, s9 | ||||
| fmuls s22 , s2, s9 | fmuls s22 , s2, s9 | ||||
| fldmias AO!, { s6 - s7 } | |||||
| vldmia.f32 AO!, { s6 - s7 } | |||||
| fmuls s23 , s3, s9 | fmuls s23 , s3, s9 | ||||
| fmuls s24 , s0, s10 | fmuls s24 , s0, s10 | ||||
| fldmias BO!, { s12 - s13 } | |||||
| vldmia.f32 BO!, { s12 - s13 } | |||||
| fmuls s25 , s1, s10 | fmuls s25 , s1, s10 | ||||
| fmuls s26 , s2, s10 | fmuls s26 , s2, s10 | ||||
| fldmias BO!, { s14 - s15 } | |||||
| vldmia.f32 BO!, { s14 - s15 } | |||||
| fmuls s27 , s3, s10 | fmuls s27 , s3, s10 | ||||
| fmuls s28 , s0, s11 | fmuls s28 , s0, s11 | ||||
| @@ -174,20 +174,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ AO , #A_PRE ] | pld [ AO , #A_PRE ] | ||||
| fmacs s16 , s4, s12 | fmacs s16 , s4, s12 | ||||
| fmacs s17 , s5, s12 | fmacs s17 , s5, s12 | ||||
| fldmias AO!, { s0 - s3 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| fmacs s18 , s6, s12 | fmacs s18 , s6, s12 | ||||
| pld [ BO , #B_PRE ] | pld [ BO , #B_PRE ] | ||||
| fmacs s19 , s7, s12 | fmacs s19 , s7, s12 | ||||
| fmacs s20 , s4, s13 | fmacs s20 , s4, s13 | ||||
| fldmias BO!, { s8 - s11 } | |||||
| vldmia.f32 BO!, { s8 - s11 } | |||||
| fmacs s21 , s5, s13 | fmacs s21 , s5, s13 | ||||
| fmacs s22 , s6, s13 | fmacs s22 , s6, s13 | ||||
| //fldmias AO!, { s2 - s3 } | |||||
| //vldmia.f32 AO!, { s2 - s3 } | |||||
| fmacs s23 , s7, s13 | fmacs s23 , s7, s13 | ||||
| fmacs s24 , s4, s14 | fmacs s24 , s4, s14 | ||||
| //fldmias BO!, { s10 - s11 } | |||||
| //vldmia.f32 BO!, { s10 - s11 } | |||||
| fmacs s25 , s5, s14 | fmacs s25 , s5, s14 | ||||
| fmacs s26 , s6, s14 | fmacs s26 , s6, s14 | ||||
| fmacs s27 , s7, s14 | fmacs s27 , s7, s14 | ||||
| @@ -203,17 +203,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL4x4_M1 | .macro KERNEL4x4_M1 | ||||
| fmacs s16 , s0, s8 | fmacs s16 , s0, s8 | ||||
| fldmias AO!, { s4 - s7 } | |||||
| vldmia.f32 AO!, { s4 - s7 } | |||||
| fmacs s17 , s1, s8 | fmacs s17 , s1, s8 | ||||
| fmacs s18 , s2, s8 | fmacs s18 , s2, s8 | ||||
| fldmias BO!, { s12 - s15 } | |||||
| //fldmias AO!, { s6 - s7 } | |||||
| vldmia.f32 BO!, { s12 - s15 } | |||||
| //vldmia.f32 AO!, { s6 - s7 } | |||||
| fmacs s19 , s3, s8 | fmacs s19 , s3, s8 | ||||
| fmacs s20 , s0, s9 | fmacs s20 , s0, s9 | ||||
| fmacs s21 , s1, s9 | fmacs s21 , s1, s9 | ||||
| fmacs s22 , s2, s9 | fmacs s22 , s2, s9 | ||||
| //fldmias BO!, { s14 - s15 } | |||||
| //vldmia.f32 BO!, { s14 - s15 } | |||||
| fmacs s23 , s3, s9 | fmacs s23 , s3, s9 | ||||
| fmacs s24 , s0, s10 | fmacs s24 , s0, s10 | ||||
| @@ -300,7 +300,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0, ALPHA | flds s0, ALPHA | ||||
| add r4 , CO2, r3 | add r4 , CO2, r3 | ||||
| fldmias CO1, { s8 - s11 } | |||||
| vldmia.f32 CO1, { s8 - s11 } | |||||
| fmacs s8 , s0 , s16 | fmacs s8 , s0 , s16 | ||||
| flds s12, [CO2] | flds s12, [CO2] | ||||
| @@ -322,7 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ CO1 , #C_PRE ] | pld [ CO1 , #C_PRE ] | ||||
| fldmias r4, { s8 - s11 } | |||||
| vldmia.f32 r4, { s8 - s11 } | |||||
| fmacs s8 , s0 , s24 | fmacs s8 , s0 , s24 | ||||
| fsts s12, [CO2] | fsts s12, [CO2] | ||||
| @@ -338,7 +338,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add CO2, r4 , r3 | add CO2, r4 , r3 | ||||
| fldmias CO2, { s12 - s15 } | |||||
| vldmia.f32 CO2, { s12 - s15 } | |||||
| fsts s8 , [r4 ] | fsts s8 , [r4 ] | ||||
| fmacs s12, s0 , s28 | fmacs s12, s0 , s28 | ||||
| @@ -350,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fmacs s15, s0 , s31 | fmacs s15, s0 , s31 | ||||
| pld [ r4 , #C_PRE ] | pld [ r4 , #C_PRE ] | ||||
| fstmias CO2, { s12 - s15 } | |||||
| vstmia.f32 CO2, { s12 - s15 } | |||||
| pld [ CO2 , #C_PRE ] | pld [ CO2 , #C_PRE ] | ||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -73,7 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s3 , [ AO2, #4 ] | flds s3 , [ AO2, #4 ] | ||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| fstmias BO!, { s0 - s3 } | |||||
| vstmia.f32 BO!, { s0 - s3 } | |||||
| add AO2, AO2, #8 | add AO2, AO2, #8 | ||||
| .endm | .endm | ||||
| @@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s1 , [ AO2, #0 ] | flds s1 , [ AO2, #0 ] | ||||
| add AO1, AO1, #4 | add AO1, AO1, #4 | ||||
| fstmias BO!, { s0 - s1 } | |||||
| vstmia.f32 BO!, { s0 - s1 } | |||||
| add AO2, AO2, #4 | add AO2, AO2, #4 | ||||
| .endm | .endm | ||||
| @@ -95,7 +95,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0 , [ AO1, #0 ] | flds s0 , [ AO1, #0 ] | ||||
| flds s1 , [ AO1, #4 ] | flds s1 , [ AO1, #4 ] | ||||
| fstmias BO!, { s0 - s1 } | |||||
| vstmia.f32 BO!, { s0 - s1 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| .endm | .endm | ||||
| @@ -105,7 +105,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0 , [ AO1, #0 ] | flds s0 , [ AO1, #0 ] | ||||
| fstmias BO!, { s0 } | |||||
| vstmia.f32 BO!, { s0 } | |||||
| add AO1, AO1, #4 | add AO1, AO1, #4 | ||||
| .endm | .endm | ||||
| @@ -100,10 +100,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s11, [ AO4, #8 ] | flds s11, [ AO4, #8 ] | ||||
| flds s15, [ AO4, #12 ] | flds s15, [ AO4, #12 ] | ||||
| fstmias BO!, { s0 - s3 } | |||||
| vstmia.f32 BO!, { s0 - s3 } | |||||
| add AO4, AO4, #16 | add AO4, AO4, #16 | ||||
| fstmias BO!, { s4 - s7 } | |||||
| fstmias BO!, { s8 - s15 } | |||||
| vstmia.f32 BO!, { s4 - s7 } | |||||
| vstmia.f32 BO!, { s8 - s15 } | |||||
| .endm | .endm | ||||
| @@ -117,7 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s3 , [ AO4, #0 ] | flds s3 , [ AO4, #0 ] | ||||
| add AO3, AO3, #4 | add AO3, AO3, #4 | ||||
| fstmias BO!, { s0 - s3 } | |||||
| vstmia.f32 BO!, { s0 - s3 } | |||||
| add AO4, AO4, #4 | add AO4, AO4, #4 | ||||
| .endm | .endm | ||||
| @@ -135,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s5 , [ AO2, #8 ] | flds s5 , [ AO2, #8 ] | ||||
| flds s7 , [ AO2, #12 ] | flds s7 , [ AO2, #12 ] | ||||
| fstmias BO!, { s0 - s7 } | |||||
| vstmia.f32 BO!, { s0 - s7 } | |||||
| add AO2, AO2, #16 | add AO2, AO2, #16 | ||||
| .endm | .endm | ||||
| @@ -147,7 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s1 , [ AO2, #0 ] | flds s1 , [ AO2, #0 ] | ||||
| add AO1, AO1, #4 | add AO1, AO1, #4 | ||||
| fstmias BO!, { s0 - s1 } | |||||
| vstmia.f32 BO!, { s0 - s1 } | |||||
| add AO2, AO2, #4 | add AO2, AO2, #4 | ||||
| .endm | .endm | ||||
| @@ -159,7 +159,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s2 , [ AO1, #8 ] | flds s2 , [ AO1, #8 ] | ||||
| flds s3 , [ AO1, #12 ] | flds s3 , [ AO1, #12 ] | ||||
| fstmias BO!, { s0 - s3 } | |||||
| vstmia.f32 BO!, { s0 - s3 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| .endm | .endm | ||||
| @@ -169,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| flds s0 , [ AO1, #0 ] | flds s0 , [ AO1, #0 ] | ||||
| fstmias BO!, { s0 } | |||||
| vstmia.f32 BO!, { s0 } | |||||
| add AO1, AO1, #4 | add AO1, AO1, #4 | ||||
| .endm | .endm | ||||
| @@ -76,21 +76,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY4x4_1 | .macro COPY4x4_1 | ||||
| pld [ AO1, #A_PRE ] | pld [ AO1, #A_PRE ] | ||||
| fldmias AO1, { s0 - s3 } | |||||
| vldmia.f32 AO1, { s0 - s3 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| pld [ r3, #A_PRE ] | pld [ r3, #A_PRE ] | ||||
| fldmias r3, { s4 - s7 } | |||||
| vldmia.f32 r3, { s4 - s7 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| pld [ r3, #A_PRE ] | pld [ r3, #A_PRE ] | ||||
| fldmias r3, { s8 - s11 } | |||||
| vldmia.f32 r3, { s8 - s11 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| pld [ r3, #A_PRE ] | pld [ r3, #A_PRE ] | ||||
| fldmias r3, { s12 - s15 } | |||||
| vldmia.f32 r3, { s12 - s15 } | |||||
| fstmias BO1, { s0 - s15 } | |||||
| vstmia.f32 BO1, { s0 - s15 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -98,18 +98,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY4x4_2 | .macro COPY4x4_2 | ||||
| fldmias AO1, { s0 - s3 } | |||||
| vldmia.f32 AO1, { s0 - s3 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmias r3, { s4 - s7 } | |||||
| vldmia.f32 r3, { s4 - s7 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmias r3, { s8 - s11 } | |||||
| vldmia.f32 r3, { s8 - s11 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmias r3, { s12 - s15 } | |||||
| vldmia.f32 r3, { s12 - s15 } | |||||
| fstmias BO1, { s0 - s15 } | |||||
| vstmia.f32 BO1, { s0 - s15 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -118,18 +118,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY2x4 | .macro COPY2x4 | ||||
| fldmias AO1, { s0 - s1 } | |||||
| vldmia.f32 AO1, { s0 - s1 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmias r3, { s2 - s3 } | |||||
| vldmia.f32 r3, { s2 - s3 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmias r3, { s4 - s5 } | |||||
| vldmia.f32 r3, { s4 - s5 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmias r3, { s6 - s7 } | |||||
| vldmia.f32 r3, { s6 - s7 } | |||||
| fstmias BO2, { s0 - s7 } | |||||
| vstmia.f32 BO2, { s0 - s7 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| add BO2, BO2, #32 | add BO2, BO2, #32 | ||||
| @@ -137,18 +137,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x4 | .macro COPY1x4 | ||||
| fldmias AO1, { s0 } | |||||
| vldmia.f32 AO1, { s0 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmias r3, { s1 } | |||||
| vldmia.f32 r3, { s1 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmias r3, { s2 } | |||||
| vldmia.f32 r3, { s2 } | |||||
| add r3, r3, LDA | add r3, r3, LDA | ||||
| fldmias r3, { s3 } | |||||
| vldmia.f32 r3, { s3 } | |||||
| fstmias BO3, { s0 - s3 } | |||||
| vstmia.f32 BO3, { s0 - s3 } | |||||
| add AO1, AO1, #4 | add AO1, AO1, #4 | ||||
| add BO3, BO3, #16 | add BO3, BO3, #16 | ||||
| @@ -158,12 +158,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY4x2 | .macro COPY4x2 | ||||
| fldmias AO1, { s0 - s3 } | |||||
| vldmia.f32 AO1, { s0 - s3 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmias r3, { s4 - s7 } | |||||
| vldmia.f32 r3, { s4 - s7 } | |||||
| fstmias BO1, { s0 - s7 } | |||||
| vstmia.f32 BO1, { s0 - s7 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -171,12 +171,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY2x2 | .macro COPY2x2 | ||||
| fldmias AO1, { s0 - s1 } | |||||
| vldmia.f32 AO1, { s0 - s1 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmias r3, { s2 - s3 } | |||||
| vldmia.f32 r3, { s2 - s3 } | |||||
| fstmias BO2, { s0 - s3 } | |||||
| vstmia.f32 BO2, { s0 - s3 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| add BO2, BO2, #16 | add BO2, BO2, #16 | ||||
| @@ -184,12 +184,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x2 | .macro COPY1x2 | ||||
| fldmias AO1, { s0 } | |||||
| vldmia.f32 AO1, { s0 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmias r3, { s1 } | |||||
| vldmia.f32 r3, { s1 } | |||||
| fstmias BO3, { s0 - s1 } | |||||
| vstmia.f32 BO3, { s0 - s1 } | |||||
| add AO1, AO1, #4 | add AO1, AO1, #4 | ||||
| add BO3, BO3, #8 | add BO3, BO3, #8 | ||||
| @@ -199,9 +199,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY4x1 | .macro COPY4x1 | ||||
| fldmias AO1, { s0 - s3 } | |||||
| vldmia.f32 AO1, { s0 - s3 } | |||||
| fstmias BO1, { s0 - s3 } | |||||
| vstmia.f32 BO1, { s0 - s3 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -209,9 +209,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY2x1 | .macro COPY2x1 | ||||
| fldmias AO1, { s0 - s1 } | |||||
| vldmia.f32 AO1, { s0 - s1 } | |||||
| fstmias BO2, { s0 - s1 } | |||||
| vstmia.f32 BO2, { s0 - s1 } | |||||
| add AO1, AO1, #8 | add AO1, AO1, #8 | ||||
| add BO2, BO2, #8 | add BO2, BO2, #8 | ||||
| @@ -219,9 +219,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x1 | .macro COPY1x1 | ||||
| fldmias AO1, { s0 } | |||||
| vldmia.f32 AO1, { s0 } | |||||
| fstmias BO3, { s0 } | |||||
| vstmia.f32 BO3, { s0 } | |||||
| add AO1, AO1, #4 | add AO1, AO1, #4 | ||||
| add BO3, BO3, #4 | add BO3, BO3, #4 | ||||
| @@ -118,8 +118,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL4x2_SUB | .macro KERNEL4x2_SUB | ||||
| fldmias AO!, { s0 - s3 } | |||||
| fldmias BO!, { s4 - s5 } | |||||
| vldmia.f32 AO!, { s0 - s3 } | |||||
| vldmia.f32 BO!, { s4 - s5 } | |||||
| fmacs s8 , s0, s4 | fmacs s8 , s0, s4 | ||||
| fmacs s9 , s1, s4 | fmacs s9 , s1, s4 | ||||
| @@ -122,30 +122,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL4x4_I | .macro KERNEL4x4_I | ||||
| fldmias AO!, { s0 - s1 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| pld [ AO , #A_PRE-8 ] | pld [ AO , #A_PRE-8 ] | ||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| pld [ BO , #B_PRE-8 ] | pld [ BO , #B_PRE-8 ] | ||||
| fmuls s16 , s0, s8 | fmuls s16 , s0, s8 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmuls s17 , s1, s8 | fmuls s17 , s1, s8 | ||||
| fmuls s18 , s2, s8 | fmuls s18 , s2, s8 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmuls s19 , s3, s8 | fmuls s19 , s3, s8 | ||||
| fmuls s20 , s0, s9 | fmuls s20 , s0, s9 | ||||
| fldmias AO!, { s4 - s5 } | |||||
| vldmia.f32 AO!, { s4 - s5 } | |||||
| fmuls s21 , s1, s9 | fmuls s21 , s1, s9 | ||||
| fmuls s22 , s2, s9 | fmuls s22 , s2, s9 | ||||
| fldmias AO!, { s6 - s7 } | |||||
| vldmia.f32 AO!, { s6 - s7 } | |||||
| fmuls s23 , s3, s9 | fmuls s23 , s3, s9 | ||||
| fmuls s24 , s0, s10 | fmuls s24 , s0, s10 | ||||
| fldmias BO!, { s12 - s13 } | |||||
| vldmia.f32 BO!, { s12 - s13 } | |||||
| fmuls s25 , s1, s10 | fmuls s25 , s1, s10 | ||||
| fmuls s26 , s2, s10 | fmuls s26 , s2, s10 | ||||
| fldmias BO!, { s14 - s15 } | |||||
| vldmia.f32 BO!, { s14 - s15 } | |||||
| fmuls s27 , s3, s10 | fmuls s27 , s3, s10 | ||||
| fmuls s28 , s0, s11 | fmuls s28 , s0, s11 | ||||
| @@ -161,20 +161,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ AO , #A_PRE ] | pld [ AO , #A_PRE ] | ||||
| fmacs s16 , s4, s12 | fmacs s16 , s4, s12 | ||||
| fmacs s17 , s5, s12 | fmacs s17 , s5, s12 | ||||
| fldmias AO!, { s0 - s1 } | |||||
| vldmia.f32 AO!, { s0 - s1 } | |||||
| fmacs s18 , s6, s12 | fmacs s18 , s6, s12 | ||||
| pld [ BO , #B_PRE ] | pld [ BO , #B_PRE ] | ||||
| fmacs s19 , s7, s12 | fmacs s19 , s7, s12 | ||||
| fmacs s20 , s4, s13 | fmacs s20 , s4, s13 | ||||
| fldmias AO!, { s2 - s3 } | |||||
| vldmia.f32 AO!, { s2 - s3 } | |||||
| fmacs s21 , s5, s13 | fmacs s21 , s5, s13 | ||||
| fmacs s22 , s6, s13 | fmacs s22 , s6, s13 | ||||
| fldmias BO!, { s8 - s9 } | |||||
| vldmia.f32 BO!, { s8 - s9 } | |||||
| fmacs s23 , s7, s13 | fmacs s23 , s7, s13 | ||||
| fmacs s24 , s4, s14 | fmacs s24 , s4, s14 | ||||
| fldmias BO!, { s10 - s11 } | |||||
| vldmia.f32 BO!, { s10 - s11 } | |||||
| fmacs s25 , s5, s14 | fmacs s25 , s5, s14 | ||||
| fmacs s26 , s6, s14 | fmacs s26 , s6, s14 | ||||
| fmacs s27 , s7, s14 | fmacs s27 , s7, s14 | ||||
| @@ -190,17 +190,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL4x4_M1 | .macro KERNEL4x4_M1 | ||||
| fmacs s16 , s0, s8 | fmacs s16 , s0, s8 | ||||
| fldmias AO!, { s4 - s5 } | |||||
| vldmia.f32 AO!, { s4 - s5 } | |||||
| fmacs s17 , s1, s8 | fmacs s17 , s1, s8 | ||||
| fmacs s18 , s2, s8 | fmacs s18 , s2, s8 | ||||
| fldmias AO!, { s6 - s7 } | |||||
| vldmia.f32 AO!, { s6 - s7 } | |||||
| fmacs s19 , s3, s8 | fmacs s19 , s3, s8 | ||||
| fmacs s20 , s0, s9 | fmacs s20 , s0, s9 | ||||
| fldmias BO!, { s12 - s13 } | |||||
| vldmia.f32 BO!, { s12 - s13 } | |||||
| fmacs s21 , s1, s9 | fmacs s21 , s1, s9 | ||||
| fmacs s22 , s2, s9 | fmacs s22 , s2, s9 | ||||
| fldmias BO!, { s14 - s15 } | |||||
| vldmia.f32 BO!, { s14 - s15 } | |||||
| fmacs s23 , s3, s9 | fmacs s23 , s3, s9 | ||||
| fmacs s24 , s0, s10 | fmacs s24 , s0, s10 | ||||
| @@ -325,7 +325,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fsts s11, [r4 , #12 ] | fsts s11, [r4 , #12 ] | ||||
| fmuls s15, s0 , s31 | fmuls s15, s0 , s31 | ||||
| fstmias CO2, { s12 - s15 } | |||||
| vstmia.f32 CO2, { s12 - s15 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -103,29 +103,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad X, { d0 - d3 } | |||||
| fldmiad Y, { d4 - d7 } | |||||
| fstmiad Y!, { d0 - d3 } | |||||
| fstmiad X!, { d4 - d7} | |||||
| vldmia.f64 X, { d0 - d3 } | |||||
| vldmia.f64 Y, { d4 - d7 } | |||||
| vstmia.f64 Y!, { d0 - d3 } | |||||
| vstmia.f64 X!, { d4 - d7} | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X, { d0 } | |||||
| fldmiad Y, { d4 } | |||||
| fstmiad Y!, { d0 } | |||||
| fstmiad X!, { d4 } | |||||
| vldmia.f64 X, { d0 } | |||||
| vldmia.f64 Y, { d4 } | |||||
| vstmia.f64 Y!, { d0 } | |||||
| vstmia.f64 X!, { d4 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d0 } | |||||
| fldmiad Y, { d4 } | |||||
| fstmiad Y, { d0 } | |||||
| fstmiad X, { d4 } | |||||
| vldmia.f64 X, { d0 } | |||||
| vldmia.f64 Y, { d4 } | |||||
| vstmia.f64 Y, { d0 } | |||||
| vstmia.f64 X, { d4 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -135,29 +135,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F4 | .macro KERNEL_F4 | ||||
| fldmias X, { s0 - s3 } | |||||
| fldmias Y, { s4 - s7 } | |||||
| fstmias Y!, { s0 - s3 } | |||||
| fstmias X!, { s4 - s7} | |||||
| vldmia.f32 X, { s0 - s3 } | |||||
| vldmia.f32 Y, { s4 - s7 } | |||||
| vstmia.f32 Y!, { s0 - s3 } | |||||
| vstmia.f32 X!, { s4 - s7} | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X, { s0 } | |||||
| fldmias Y, { s4 } | |||||
| fstmias Y!, { s0 } | |||||
| fstmias X!, { s4 } | |||||
| vldmia.f32 X, { s0 } | |||||
| vldmia.f32 Y, { s4 } | |||||
| vstmia.f32 Y!, { s0 } | |||||
| vstmia.f32 X!, { s4 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s0 } | |||||
| fldmias Y, { s4 } | |||||
| fstmias Y, { s0 } | |||||
| fstmias X, { s4 } | |||||
| vldmia.f32 X, { s0 } | |||||
| vldmia.f32 Y, { s4 } | |||||
| vstmia.f32 Y, { s0 } | |||||
| vstmia.f32 X, { s4 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -174,35 +174,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad X, { d0 - d3 } | |||||
| fldmiad Y, { d4 - d7 } | |||||
| fstmiad Y!, { d0 - d3 } | |||||
| fstmiad X!, { d4 - d7} | |||||
| vldmia.f64 X, { d0 - d3 } | |||||
| vldmia.f64 Y, { d4 - d7 } | |||||
| vstmia.f64 Y!, { d0 - d3 } | |||||
| vstmia.f64 X!, { d4 - d7} | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad X, { d0 - d3 } | |||||
| fldmiad Y, { d4 - d7 } | |||||
| fstmiad Y!, { d0 - d3 } | |||||
| fstmiad X!, { d4 - d7} | |||||
| vldmia.f64 X, { d0 - d3 } | |||||
| vldmia.f64 Y, { d4 - d7 } | |||||
| vstmia.f64 Y!, { d0 - d3 } | |||||
| vstmia.f64 X!, { d4 - d7} | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X, { d0 - d1 } | |||||
| fldmiad Y, { d4 - d5 } | |||||
| fstmiad Y!, { d0 - d1 } | |||||
| fstmiad X!, { d4 - d5 } | |||||
| vldmia.f64 X, { d0 - d1 } | |||||
| vldmia.f64 Y, { d4 - d5 } | |||||
| vstmia.f64 Y!, { d0 - d1 } | |||||
| vstmia.f64 X!, { d4 - d5 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d0 - d1 } | |||||
| fldmiad Y, { d4 - d5 } | |||||
| fstmiad Y, { d0 - d1 } | |||||
| fstmiad X, { d4 - d5 } | |||||
| vldmia.f64 X, { d0 - d1 } | |||||
| vldmia.f64 Y, { d4 - d5 } | |||||
| vstmia.f64 Y, { d0 - d1 } | |||||
| vstmia.f64 X, { d4 - d5 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -215,33 +215,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmias X, { s0 - s3 } | |||||
| fldmias Y, { s4 - s7 } | |||||
| fstmias Y!, { s0 - s3 } | |||||
| fstmias X!, { s4 - s7} | |||||
| vldmia.f32 X, { s0 - s3 } | |||||
| vldmia.f32 Y, { s4 - s7 } | |||||
| vstmia.f32 Y!, { s0 - s3 } | |||||
| vstmia.f32 X!, { s4 - s7} | |||||
| fldmias X, { s0 - s3 } | |||||
| fldmias Y, { s4 - s7 } | |||||
| fstmias Y!, { s0 - s3 } | |||||
| fstmias X!, { s4 - s7} | |||||
| vldmia.f32 X, { s0 - s3 } | |||||
| vldmia.f32 Y, { s4 - s7 } | |||||
| vstmia.f32 Y!, { s0 - s3 } | |||||
| vstmia.f32 X!, { s4 - s7} | |||||
| .endm | .endm | ||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmias X, { s0 - s1 } | |||||
| fldmias Y, { s4 - s5 } | |||||
| fstmias Y!, { s0 - s1 } | |||||
| fstmias X!, { s4 - s5 } | |||||
| vldmia.f32 X, { s0 - s1 } | |||||
| vldmia.f32 Y, { s4 - s5 } | |||||
| vstmia.f32 Y!, { s0 - s1 } | |||||
| vstmia.f32 X!, { s4 - s5 } | |||||
| .endm | .endm | ||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmias X, { s0 - s1 } | |||||
| fldmias Y, { s4 - s5 } | |||||
| fstmias Y, { s0 - s1 } | |||||
| fstmias X, { s4 - s5 } | |||||
| vldmia.f32 X, { s0 - s1 } | |||||
| vldmia.f32 Y, { s4 - s5 } | |||||
| vstmia.f32 Y, { s0 - s1 } | |||||
| vstmia.f32 X, { s4 - s5 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -66,15 +66,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ X, #X_PRE+32 ] | pld [ X, #X_PRE+32 ] | ||||
| fldmiad X!, { d0 - d7 } | |||||
| fstmiad Y!, { d0 - d7 } | |||||
| vldmia.f64 X!, { d0 - d7 } | |||||
| vstmia.f64 Y!, { d0 - d7 } | |||||
| .endm | .endm | ||||
| .macro COPY_F1 | .macro COPY_F1 | ||||
| fldmiad X!, { d0 - d1 } | |||||
| fstmiad Y!, { d0 - d1 } | |||||
| vldmia.f64 X!, { d0 - d1 } | |||||
| vstmia.f64 Y!, { d0 - d1 } | |||||
| .endm | .endm | ||||
| @@ -84,23 +84,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_S4 | .macro COPY_S4 | ||||
| nop | nop | ||||
| fldmiad X, { d0 - d1 } | |||||
| fstmiad Y, { d0 - d1 } | |||||
| vldmia.f64 X, { d0 - d1 } | |||||
| vstmia.f64 Y, { d0 - d1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d2 - d3 } | |||||
| fstmiad Y, { d2 - d3 } | |||||
| vldmia.f64 X, { d2 - d3 } | |||||
| vstmia.f64 Y, { d2 - d3 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d0 - d1 } | |||||
| fstmiad Y, { d0 - d1 } | |||||
| vldmia.f64 X, { d0 - d1 } | |||||
| vstmia.f64 Y, { d0 - d1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d2 - d3 } | |||||
| fstmiad Y, { d2 - d3 } | |||||
| vldmia.f64 X, { d2 - d3 } | |||||
| vstmia.f64 Y, { d2 - d3 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -109,8 +109,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY_S1 | .macro COPY_S1 | ||||
| fldmiad X, { d0 - d1 } | |||||
| fstmiad Y, { d0 - d1 } | |||||
| vldmia.f64 X, { d0 - d1 } | |||||
| vstmia.f64 Y, { d0 - d1 } | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| @@ -76,15 +76,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad X!, { d4 - d5 } | |||||
| fldmiad Y!, { d8 - d9 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vldmia.f64 Y!, { d8 - d9 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fmacd d1 , d4, d9 | fmacd d1 , d4, d9 | ||||
| fldmiad X!, { d6 - d7 } | |||||
| vldmia.f64 X!, { d6 - d7 } | |||||
| fmacd d2 , d5, d9 | fmacd d2 , d5, d9 | ||||
| fmacd d3 , d5, d8 | fmacd d3 , d5, d8 | ||||
| fldmiad Y!, { d10 - d11 } | |||||
| vldmia.f64 Y!, { d10 - d11 } | |||||
| fmacd d0 , d6, d10 | fmacd d0 , d6, d10 | ||||
| fmacd d1 , d6, d11 | fmacd d1 , d6, d11 | ||||
| pld [ X, #X_PRE ] | pld [ X, #X_PRE ] | ||||
| @@ -93,15 +93,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| pld [ Y, #X_PRE ] | pld [ Y, #X_PRE ] | ||||
| fldmiad X!, { d4 - d5 } | |||||
| fldmiad Y!, { d8 - d9 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vldmia.f64 Y!, { d8 - d9 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fmacd d1 , d4, d9 | fmacd d1 , d4, d9 | ||||
| fldmiad X!, { d6 - d7 } | |||||
| vldmia.f64 X!, { d6 - d7 } | |||||
| fmacd d2 , d5, d9 | fmacd d2 , d5, d9 | ||||
| fmacd d3 , d5, d8 | fmacd d3 , d5, d8 | ||||
| fldmiad Y!, { d10 - d11 } | |||||
| vldmia.f64 Y!, { d10 - d11 } | |||||
| fmacd d0 , d6, d10 | fmacd d0 , d6, d10 | ||||
| fmacd d1 , d6, d11 | fmacd d1 , d6, d11 | ||||
| fmacd d2 , d7, d11 | fmacd d2 , d7, d11 | ||||
| @@ -111,8 +111,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1 | .macro KERNEL_F1 | ||||
| fldmiad X!, { d4 - d5 } | |||||
| fldmiad Y!, { d8 - d9 } | |||||
| vldmia.f64 X!, { d4 - d5 } | |||||
| vldmia.f64 Y!, { d8 - d9 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fmacd d1 , d4, d9 | fmacd d1 , d4, d9 | ||||
| fmacd d2 , d5, d9 | fmacd d2 , d5, d9 | ||||
| @@ -127,8 +127,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| nop | nop | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d8 - d9 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d8 - d9 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fmacd d1 , d4, d9 | fmacd d1 , d4, d9 | ||||
| fmacd d2 , d5, d9 | fmacd d2 , d5, d9 | ||||
| @@ -136,8 +136,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d8 - d9 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d8 - d9 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fmacd d1 , d4, d9 | fmacd d1 , d4, d9 | ||||
| fmacd d2 , d5, d9 | fmacd d2 , d5, d9 | ||||
| @@ -145,8 +145,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d8 - d9 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d8 - d9 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fmacd d1 , d4, d9 | fmacd d1 , d4, d9 | ||||
| fmacd d2 , d5, d9 | fmacd d2 , d5, d9 | ||||
| @@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add X, X, INC_X | add X, X, INC_X | ||||
| add Y, Y, INC_Y | add Y, Y, INC_Y | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d8 - d9 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d8 - d9 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fmacd d1 , d4, d9 | fmacd d1 , d4, d9 | ||||
| fmacd d2 , d5, d9 | fmacd d2 , d5, d9 | ||||
| @@ -168,8 +168,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1 | .macro KERNEL_S1 | ||||
| fldmiad X, { d4 - d5 } | |||||
| fldmiad Y, { d8 - d9 } | |||||
| vldmia.f64 X, { d4 - d5 } | |||||
| vldmia.f64 Y, { d8 - d9 } | |||||
| fmacd d0 , d4, d8 | fmacd d0 , d4, d8 | ||||
| fmacd d1 , d4, d9 | fmacd d1 , d4, d9 | ||||
| fmacd d2 , d5, d9 | fmacd d2 , d5, d9 | ||||
| @@ -360,7 +360,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad CO1, { d4 - d7 } | |||||
| vldmia.f64 CO1, { d4 - d7 } | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| @@ -372,9 +372,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d11 | FMAC_R2 d6 , d1 , d11 | ||||
| FMAC_I2 d7 , d1 , d10 | FMAC_I2 d7 , d1 , d10 | ||||
| fstmiad CO1, { d4 - d7 } | |||||
| vstmia.f64 CO1, { d4 - d7 } | |||||
| fldmiad CO2, { d4 - d7 } | |||||
| vldmia.f64 CO2, { d4 - d7 } | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| @@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d15 | FMAC_R2 d6 , d1 , d15 | ||||
| FMAC_I2 d7 , d1 , d14 | FMAC_I2 d7 , d1 , d14 | ||||
| fstmiad CO2, { d4 - d7 } | |||||
| vstmia.f64 CO2, { d4 - d7 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -543,23 +543,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad CO1, { d4 - d5 } | |||||
| vldmia.f64 CO1, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| FMAC_R2 d4 , d1 , d9 | FMAC_R2 d4 , d1 , d9 | ||||
| FMAC_I2 d5 , d1 , d8 | FMAC_I2 d5 , d1 , d8 | ||||
| fstmiad CO1, { d4 - d5 } | |||||
| vstmia.f64 CO1, { d4 - d5 } | |||||
| fldmiad CO2, { d4 - d5 } | |||||
| vldmia.f64 CO2, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| FMAC_R2 d4 , d1 , d13 | FMAC_R2 d4 , d1 , d13 | ||||
| FMAC_I2 d5 , d1 , d12 | FMAC_I2 d5 , d1 , d12 | ||||
| fstmiad CO2, { d4 - d5 } | |||||
| vstmia.f64 CO2, { d4 - d5 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -714,7 +714,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad CO1, { d4 - d7 } | |||||
| vldmia.f64 CO1, { d4 - d7 } | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| @@ -726,7 +726,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d11 | FMAC_R2 d6 , d1 , d11 | ||||
| FMAC_I2 d7 , d1 , d10 | FMAC_I2 d7 , d1 , d10 | ||||
| fstmiad CO1, { d4 - d7 } | |||||
| vstmia.f64 CO1, { d4 - d7 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -843,14 +843,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad CO1, { d4 - d5 } | |||||
| vldmia.f64 CO1, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| FMAC_R2 d4 , d1 , d9 | FMAC_R2 d4 , d1 , d9 | ||||
| FMAC_I2 d5 , d1 , d8 | FMAC_I2 d5 , d1 , d8 | ||||
| fstmiad CO1, { d4 - d5 } | |||||
| vstmia.f64 CO1, { d4 - d5 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -374,8 +374,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad CO1, { d4 - d7 } | |||||
| fldmiad CO2, { d8 - d11 } | |||||
| vldmia.f64 CO1, { d4 - d7 } | |||||
| vldmia.f64 CO2, { d8 - d11 } | |||||
| FADD_R d16, d24 , d16 | FADD_R d16, d24 , d16 | ||||
| FADD_I d17, d25 , d17 | FADD_I d17, d25 , d17 | ||||
| @@ -406,8 +406,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d10, d1 , d23 | FMAC_R2 d10, d1 , d23 | ||||
| FMAC_I2 d11, d1 , d22 | FMAC_I2 d11, d1 , d22 | ||||
| fstmiad CO1, { d4 - d7 } | |||||
| fstmiad CO2, { d8 - d11 } | |||||
| vstmia.f64 CO1, { d4 - d7 } | |||||
| vstmia.f64 CO2, { d8 - d11 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -570,8 +570,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad CO1, { d4 - d5 } | |||||
| fldmiad CO2, { d8 - d9 } | |||||
| vldmia.f64 CO1, { d4 - d5 } | |||||
| vldmia.f64 CO2, { d8 - d9 } | |||||
| FADD_R d16, d24 , d16 | FADD_R d16, d24 , d16 | ||||
| FADD_I d17, d25 , d17 | FADD_I d17, d25 , d17 | ||||
| @@ -588,8 +588,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d8 , d1 , d21 | FMAC_R2 d8 , d1 , d21 | ||||
| FMAC_I2 d9 , d1 , d20 | FMAC_I2 d9 , d1 , d20 | ||||
| fstmiad CO1, { d4 - d5 } | |||||
| fstmiad CO2, { d8 - d9 } | |||||
| vstmia.f64 CO1, { d4 - d5 } | |||||
| vstmia.f64 CO2, { d8 - d9 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -752,7 +752,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad CO1, { d4 - d7 } | |||||
| vldmia.f64 CO1, { d4 - d7 } | |||||
| FADD_R d16, d24 , d16 | FADD_R d16, d24 , d16 | ||||
| FADD_I d17, d25 , d17 | FADD_I d17, d25 , d17 | ||||
| @@ -769,7 +769,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d19 | FMAC_R2 d6 , d1 , d19 | ||||
| FMAC_I2 d7 , d1 , d18 | FMAC_I2 d7 , d1 , d18 | ||||
| fstmiad CO1, { d4 - d7 } | |||||
| vstmia.f64 CO1, { d4 - d7 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -887,7 +887,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad CO1, { d4 - d5 } | |||||
| vldmia.f64 CO1, { d4 - d5 } | |||||
| FADD_R d16, d24 , d16 | FADD_R d16, d24 , d16 | ||||
| FADD_I d17, d25 , d17 | FADD_I d17, d25 , d17 | ||||
| @@ -897,7 +897,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d4 , d1 , d17 | FMAC_R2 d4 , d1 , d17 | ||||
| FMAC_I2 d5 , d1 , d16 | FMAC_I2 d5 , d1 , d16 | ||||
| fstmiad CO1, { d4 - d5 } | |||||
| vstmia.f64 CO1, { d4 - d5 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -87,7 +87,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d6 , [ AO2, #16 ] | fldd d6 , [ AO2, #16 ] | ||||
| fldd d7 , [ AO2, #24 ] | fldd d7 , [ AO2, #24 ] | ||||
| fstmiad BO!, { d0 - d7 } | |||||
| vstmia.f64 BO!, { d0 - d7 } | |||||
| add AO2, AO2, #32 | add AO2, AO2, #32 | ||||
| .endm | .endm | ||||
| @@ -101,7 +101,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d3 , [ AO2, #8 ] | fldd d3 , [ AO2, #8 ] | ||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| fstmiad BO!, { d0 - d3 } | |||||
| vstmia.f64 BO!, { d0 - d3 } | |||||
| add AO2, AO2, #16 | add AO2, AO2, #16 | ||||
| .endm | .endm | ||||
| @@ -113,7 +113,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d2 , [ AO1, #16 ] | fldd d2 , [ AO1, #16 ] | ||||
| fldd d3 , [ AO1, #24 ] | fldd d3 , [ AO1, #24 ] | ||||
| fstmiad BO!, { d0 - d3 } | |||||
| vstmia.f64 BO!, { d0 - d3 } | |||||
| add AO1, AO1, #32 | add AO1, AO1, #32 | ||||
| .endm | .endm | ||||
| @@ -124,7 +124,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0 , [ AO1, #0 ] | fldd d0 , [ AO1, #0 ] | ||||
| fldd d1 , [ AO1, #8 ] | fldd d1 , [ AO1, #8 ] | ||||
| fstmiad BO!, { d0 - d1 } | |||||
| vstmia.f64 BO!, { d0 - d1 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| .endm | .endm | ||||
| @@ -74,13 +74,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY2x2 | .macro COPY2x2 | ||||
| pld [ AO1, #A_PRE ] | pld [ AO1, #A_PRE ] | ||||
| fldmiad AO1, { d0 - d3 } | |||||
| vldmia.f64 AO1, { d0 - d3 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| pld [ r3, #A_PRE ] | pld [ r3, #A_PRE ] | ||||
| fldmiad r3, { d4 - d7 } | |||||
| vldmia.f64 r3, { d4 - d7 } | |||||
| fstmiad BO1, { d0 - d7 } | |||||
| vstmia.f64 BO1, { d0 - d7 } | |||||
| add AO1, AO1, #32 | add AO1, AO1, #32 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -88,12 +88,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x2 | .macro COPY1x2 | ||||
| fldmiad AO1, { d0 -d1 } | |||||
| vldmia.f64 AO1, { d0 -d1 } | |||||
| add r3, AO1, LDA | add r3, AO1, LDA | ||||
| fldmiad r3, { d2 - d3 } | |||||
| vldmia.f64 r3, { d2 - d3 } | |||||
| fstmiad BO2, { d0 - d3 } | |||||
| vstmia.f64 BO2, { d0 - d3 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO2, BO2, #32 | add BO2, BO2, #32 | ||||
| @@ -102,9 +102,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /*************************************************************************************************************************/ | /*************************************************************************************************************************/ | ||||
| .macro COPY2x1 | .macro COPY2x1 | ||||
| fldmiad AO1, { d0 - d3 } | |||||
| vldmia.f64 AO1, { d0 - d3 } | |||||
| fstmiad BO1, { d0 - d3 } | |||||
| vstmia.f64 BO1, { d0 - d3 } | |||||
| add AO1, AO1, #32 | add AO1, AO1, #32 | ||||
| add BO1, BO1, M4 | add BO1, BO1, M4 | ||||
| @@ -112,9 +112,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro COPY1x1 | .macro COPY1x1 | ||||
| fldmiad AO1, { d0 - d1 } | |||||
| vldmia.f64 AO1, { d0 - d1 } | |||||
| fstmiad BO2, { d0 - d1 } | |||||
| vstmia.f64 BO2, { d0 - d1 } | |||||
| add AO1, AO1, #16 | add AO1, AO1, #16 | ||||
| add BO2, BO2, #16 | add BO2, BO2, #16 | ||||
| @@ -204,7 +204,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad YO, { d4 - d7 } | |||||
| vldmia.f64 YO, { d4 - d7 } | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| @@ -216,9 +216,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d11 | FMAC_R2 d6 , d1 , d11 | ||||
| FMAC_I2 d7 , d1 , d10 | FMAC_I2 d7 , d1 , d10 | ||||
| fstmiad YO!, { d4 - d7 } | |||||
| vstmia.f64 YO!, { d4 - d7 } | |||||
| fldmiad YO, { d4 - d7 } | |||||
| vldmia.f64 YO, { d4 - d7 } | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| @@ -230,7 +230,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d15 | FMAC_R2 d6 , d1 , d15 | ||||
| FMAC_I2 d7 , d1 , d14 | FMAC_I2 d7 , d1 , d14 | ||||
| fstmiad YO!, { d4 - d7 } | |||||
| vstmia.f64 YO!, { d4 - d7 } | |||||
| .endm | .endm | ||||
| @@ -269,14 +269,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad YO, { d4 - d5 } | |||||
| vldmia.f64 YO, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| FMAC_R2 d4 , d1 , d9 | FMAC_R2 d4 , d1 , d9 | ||||
| FMAC_I2 d5 , d1 , d8 | FMAC_I2 d5 , d1 , d8 | ||||
| fstmiad YO, { d4 - d5 } | |||||
| vstmia.f64 YO, { d4 - d5 } | |||||
| add YO, YO, #16 | add YO, YO, #16 | ||||
| @@ -352,47 +352,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad YO, { d4 - d5 } | |||||
| vldmia.f64 YO, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| FMAC_R2 d4 , d1 , d9 | FMAC_R2 d4 , d1 , d9 | ||||
| FMAC_I2 d5 , d1 , d8 | FMAC_I2 d5 , d1 , d8 | ||||
| fstmiad YO, { d4 - d5 } | |||||
| vstmia.f64 YO, { d4 - d5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d6 - d7 } | |||||
| vldmia.f64 YO, { d6 - d7 } | |||||
| FMAC_R1 d6 , d0 , d10 | FMAC_R1 d6 , d0 , d10 | ||||
| FMAC_I1 d7 , d0 , d11 | FMAC_I1 d7 , d0 , d11 | ||||
| FMAC_R2 d6 , d1 , d11 | FMAC_R2 d6 , d1 , d11 | ||||
| FMAC_I2 d7 , d1 , d10 | FMAC_I2 d7 , d1 , d10 | ||||
| fstmiad YO, { d6 - d7 } | |||||
| vstmia.f64 YO, { d6 - d7 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d4 - d5 } | |||||
| vldmia.f64 YO, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| FMAC_R2 d4 , d1 , d13 | FMAC_R2 d4 , d1 , d13 | ||||
| FMAC_I2 d5 , d1 , d12 | FMAC_I2 d5 , d1 , d12 | ||||
| fstmiad YO, { d4 - d5 } | |||||
| vstmia.f64 YO, { d4 - d5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d6 - d7 } | |||||
| vldmia.f64 YO, { d6 - d7 } | |||||
| FMAC_R1 d6 , d0 , d14 | FMAC_R1 d6 , d0 , d14 | ||||
| FMAC_I1 d7 , d0 , d15 | FMAC_I1 d7 , d0 , d15 | ||||
| FMAC_R2 d6 , d1 , d15 | FMAC_R2 d6 , d1 , d15 | ||||
| FMAC_I2 d7 , d1 , d14 | FMAC_I2 d7 , d1 , d14 | ||||
| fstmiad YO, { d6 - d7 } | |||||
| vstmia.f64 YO, { d6 - d7 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| @@ -433,14 +433,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| fldd d0, ALPHA_R | fldd d0, ALPHA_R | ||||
| fldd d1, ALPHA_I | fldd d1, ALPHA_I | ||||
| fldmiad YO, { d4 - d5 } | |||||
| vldmia.f64 YO, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d8 | FMAC_R1 d4 , d0 , d8 | ||||
| FMAC_I1 d5 , d0 , d9 | FMAC_I1 d5 , d0 , d9 | ||||
| FMAC_R2 d4 , d1 , d9 | FMAC_R2 d4 , d1 , d9 | ||||
| FMAC_I2 d5 , d1 , d8 | FMAC_I2 d5 , d1 , d8 | ||||
| fstmiad YO, { d4 - d5 } | |||||
| vstmia.f64 YO, { d4 - d5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| @@ -151,12 +151,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F2X1 | .macro KERNEL_F2X1 | ||||
| fldmiad XO! , { d2 - d3 } | |||||
| fldmiad AO1!, { d4 - d5 } | |||||
| vldmia.f64 XO! , { d2 - d3 } | |||||
| vldmia.f64 AO1!, { d4 - d5 } | |||||
| fmacd d12 , d4 , d2 | fmacd d12 , d4 , d2 | ||||
| fmacd d13 , d4 , d3 | fmacd d13 , d4 , d3 | ||||
| fldmiad AO2!, { d8 - d9 } | |||||
| vldmia.f64 AO2!, { d8 - d9 } | |||||
| KMAC_R d12 , d5 , d3 | KMAC_R d12 , d5 , d3 | ||||
| KMAC_I d13 , d5 , d2 | KMAC_I d13 , d5 , d2 | ||||
| @@ -169,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F2 | .macro SAVE_F2 | ||||
| fldmiad YO, { d4 - d7 } | |||||
| vldmia.f64 YO, { d4 - d7 } | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| @@ -181,7 +181,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d15 | FMAC_R2 d6 , d1 , d15 | ||||
| FMAC_I2 d7 , d1 , d14 | FMAC_I2 d7 , d1 , d14 | ||||
| fstmiad YO!, { d4 - d7 } | |||||
| vstmia.f64 YO!, { d4 - d7 } | |||||
| .endm | .endm | ||||
| @@ -205,8 +205,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_F1X1 | .macro KERNEL_F1X1 | ||||
| fldmiad XO! , { d2 - d3 } | |||||
| fldmiad AO1!, { d4 - d5 } | |||||
| vldmia.f64 XO! , { d2 - d3 } | |||||
| vldmia.f64 AO1!, { d4 - d5 } | |||||
| fmacd d12 , d4 , d2 | fmacd d12 , d4 , d2 | ||||
| fmacd d13 , d4 , d3 | fmacd d13 , d4 , d3 | ||||
| @@ -217,14 +217,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_F1 | .macro SAVE_F1 | ||||
| fldmiad YO, { d4 - d5 } | |||||
| vldmia.f64 YO, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| FMAC_R2 d4 , d1 , d13 | FMAC_R2 d4 , d1 , d13 | ||||
| FMAC_I2 d5 , d1 , d12 | FMAC_I2 d5 , d1 , d12 | ||||
| fstmiad YO!, { d4 - d5 } | |||||
| vstmia.f64 YO!, { d4 - d5 } | |||||
| .endm | .endm | ||||
| @@ -250,9 +250,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S2X1 | .macro KERNEL_S2X1 | ||||
| fldmiad XO , { d2 - d3 } | |||||
| fldmiad AO1!, { d4 - d5 } | |||||
| fldmiad AO2!, { d8 - d9 } | |||||
| vldmia.f64 XO , { d2 - d3 } | |||||
| vldmia.f64 AO1!, { d4 - d5 } | |||||
| vldmia.f64 AO2!, { d8 - d9 } | |||||
| fmacd d12 , d4 , d2 | fmacd d12 , d4 , d2 | ||||
| fmacd d13 , d4 , d3 | fmacd d13 , d4 , d3 | ||||
| @@ -270,25 +270,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S2 | .macro SAVE_S2 | ||||
| fldmiad YO, { d4 - d5 } | |||||
| vldmia.f64 YO, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| FMAC_R2 d4 , d1 , d13 | FMAC_R2 d4 , d1 , d13 | ||||
| FMAC_I2 d5 , d1 , d12 | FMAC_I2 d5 , d1 , d12 | ||||
| fstmiad YO, { d4 - d5 } | |||||
| vstmia.f64 YO, { d4 - d5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| fldmiad YO, { d6 - d7 } | |||||
| vldmia.f64 YO, { d6 - d7 } | |||||
| FMAC_R1 d6 , d0 , d14 | FMAC_R1 d6 , d0 , d14 | ||||
| FMAC_I1 d7 , d0 , d15 | FMAC_I1 d7 , d0 , d15 | ||||
| FMAC_R2 d6 , d1 , d15 | FMAC_R2 d6 , d1 , d15 | ||||
| FMAC_I2 d7 , d1 , d14 | FMAC_I2 d7 , d1 , d14 | ||||
| fstmiad YO, { d6 - d7 } | |||||
| vstmia.f64 YO, { d6 - d7 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| @@ -314,8 +314,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro KERNEL_S1X1 | .macro KERNEL_S1X1 | ||||
| fldmiad XO , { d2 - d3 } | |||||
| fldmiad AO1!, { d4 - d5 } | |||||
| vldmia.f64 XO , { d2 - d3 } | |||||
| vldmia.f64 AO1!, { d4 - d5 } | |||||
| fmacd d12 , d4 , d2 | fmacd d12 , d4 , d2 | ||||
| fmacd d13 , d4 , d3 | fmacd d13 , d4 , d3 | ||||
| @@ -328,14 +328,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| .macro SAVE_S1 | .macro SAVE_S1 | ||||
| fldmiad YO, { d4 - d5 } | |||||
| vldmia.f64 YO, { d4 - d5 } | |||||
| FMAC_R1 d4 , d0 , d12 | FMAC_R1 d4 , d0 , d12 | ||||
| FMAC_I1 d5 , d0 , d13 | FMAC_I1 d5 , d0 , d13 | ||||
| FMAC_R2 d4 , d1 , d13 | FMAC_R2 d4 , d1 , d13 | ||||
| FMAC_I2 d5 , d1 , d12 | FMAC_I2 d5 , d1 , d12 | ||||
| fstmiad YO, { d4 - d5 } | |||||
| vstmia.f64 YO, { d4 - d5 } | |||||
| add YO, YO, INC_Y | add YO, YO, INC_Y | ||||
| @@ -385,7 +385,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d11 | FMAC_R2 d6 , d1 , d11 | ||||
| FMAC_I2 d7 , d1 , d10 | FMAC_I2 d7 , d1 , d10 | ||||
| fstmiad CO1, { d4 - d7 } | |||||
| vstmia.f64 CO1, { d4 - d7 } | |||||
| fldd d4 , FP_ZERO | fldd d4 , FP_ZERO | ||||
| vmov.f64 d5 , d4 | vmov.f64 d5 , d4 | ||||
| @@ -402,7 +402,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d15 | FMAC_R2 d6 , d1 , d15 | ||||
| FMAC_I2 d7 , d1 , d14 | FMAC_I2 d7 , d1 , d14 | ||||
| fstmiad CO2, { d4 - d7 } | |||||
| vstmia.f64 CO2, { d4 - d7 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -567,7 +567,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d4 , d1 , d9 | FMAC_R2 d4 , d1 , d9 | ||||
| FMAC_I2 d5 , d1 , d8 | FMAC_I2 d5 , d1 , d8 | ||||
| fstmiad CO1, { d4 - d5 } | |||||
| vstmia.f64 CO1, { d4 - d5 } | |||||
| fldd d4 , FP_ZERO | fldd d4 , FP_ZERO | ||||
| vmov.f64 d5 , d4 | vmov.f64 d5 , d4 | ||||
| @@ -577,7 +577,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d4 , d1 , d13 | FMAC_R2 d4 , d1 , d13 | ||||
| FMAC_I2 d5 , d1 , d12 | FMAC_I2 d5 , d1 , d12 | ||||
| fstmiad CO2, { d4 - d5 } | |||||
| vstmia.f64 CO2, { d4 - d5 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -747,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d11 | FMAC_R2 d6 , d1 , d11 | ||||
| FMAC_I2 d7 , d1 , d10 | FMAC_I2 d7 , d1 , d10 | ||||
| fstmiad CO1, { d4 - d7 } | |||||
| vstmia.f64 CO1, { d4 - d7 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -872,7 +872,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d4 , d1 , d9 | FMAC_R2 d4 , d1 , d9 | ||||
| FMAC_I2 d5 , d1 , d8 | FMAC_I2 d5 , d1 , d8 | ||||
| fstmiad CO1, { d4 - d5 } | |||||
| vstmia.f64 CO1, { d4 - d5 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -391,8 +391,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d10, d1 , d23 | FMAC_R2 d10, d1 , d23 | ||||
| FMAC_I2 d11, d1 , d22 | FMAC_I2 d11, d1 , d22 | ||||
| fstmiad CO1, { d4 - d7 } | |||||
| fstmiad CO2, { d8 - d11 } | |||||
| vstmia.f64 CO1, { d4 - d7 } | |||||
| vstmia.f64 CO2, { d8 - d11 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -569,8 +569,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d8 , d1 , d21 | FMAC_R2 d8 , d1 , d21 | ||||
| FMAC_I2 d9 , d1 , d20 | FMAC_I2 d9 , d1 , d20 | ||||
| fstmiad CO1, { d4 - d5 } | |||||
| fstmiad CO2, { d8 - d9 } | |||||
| vstmia.f64 CO1, { d4 - d5 } | |||||
| vstmia.f64 CO2, { d8 - d9 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -747,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d6 , d1 , d19 | FMAC_R2 d6 , d1 , d19 | ||||
| FMAC_I2 d7 , d1 , d18 | FMAC_I2 d7 , d1 , d18 | ||||
| fstmiad CO1, { d4 - d7 } | |||||
| vstmia.f64 CO1, { d4 - d7 } | |||||
| add CO1, CO1, #32 | add CO1, CO1, #32 | ||||
| @@ -872,7 +872,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| FMAC_R2 d4 , d1 , d17 | FMAC_R2 d4 , d1 , d17 | ||||
| FMAC_I2 d5 , d1 , d16 | FMAC_I2 d5 , d1 , d16 | ||||
| fstmiad CO1, { d4 - d5 } | |||||
| vstmia.f64 CO1, { d4 - d5 } | |||||
| add CO1, CO1, #16 | add CO1, CO1, #16 | ||||
| @@ -1,17 +1,17 @@ | |||||
| ifndef SNRM2KERNEL | ifndef SNRM2KERNEL | ||||
| SNRM2KERNEL = nrm2.c | |||||
| SNRM2KERNEL = ../arm/nrm2.c | |||||
| endif | endif | ||||
| ifndef DNRM2KERNEL | ifndef DNRM2KERNEL | ||||
| DNRM2KERNEL = nrm2.c | |||||
| DNRM2KERNEL = ../arm/nrm2.c | |||||
| endif | endif | ||||
| ifndef CNRM2KERNEL | ifndef CNRM2KERNEL | ||||
| CNRM2KERNEL = znrm2.c | |||||
| CNRM2KERNEL = ../arm/znrm2.c | |||||
| endif | endif | ||||
| ifndef ZNRM2KERNEL | ifndef ZNRM2KERNEL | ||||
| ZNRM2KERNEL = znrm2.c | |||||
| ZNRM2KERNEL = ../arm/znrm2.c | |||||
| endif | endif | ||||
| ifndef SCABS_KERNEL | ifndef SCABS_KERNEL | ||||
| @@ -1,8 +1,3 @@ | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAMINKERNEL = ../arm/amin.c | SAMINKERNEL = ../arm/amin.c | ||||
| DAMINKERNEL = ../arm/amin.c | DAMINKERNEL = ../arm/amin.c | ||||
| CAMINKERNEL = ../arm/zamin.c | CAMINKERNEL = ../arm/zamin.c | ||||
| @@ -14,11 +9,6 @@ DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | SMINKERNEL = ../arm/min.c | ||||
| DMINKERNEL = ../arm/min.c | DMINKERNEL = ../arm/min.c | ||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| ISAMINKERNEL = ../arm/iamin.c | ISAMINKERNEL = ../arm/iamin.c | ||||
| IDAMINKERNEL = ../arm/iamin.c | IDAMINKERNEL = ../arm/iamin.c | ||||
| ICAMINKERNEL = ../arm/izamin.c | ICAMINKERNEL = ../arm/izamin.c | ||||
| @@ -30,32 +20,36 @@ IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | ISMINKERNEL = ../arm/imin.c | ||||
| IDMINKERNEL = ../arm/imin.c | IDMINKERNEL = ../arm/imin.c | ||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | SAXPYKERNEL = axpy.S | ||||
| DAXPYKERNEL = axpy.S | DAXPYKERNEL = axpy.S | ||||
| CAXPYKERNEL = zaxpy.S | CAXPYKERNEL = zaxpy.S | ||||
| ZAXPYKERNEL = zaxpy.S | ZAXPYKERNEL = zaxpy.S | ||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SDOTKERNEL = dot.S | |||||
| DDOTKERNEL = dot.S | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| DSDOTKERNEL = dot.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| SROTKERNEL = rot.S | SROTKERNEL = rot.S | ||||
| DROTKERNEL = rot.S | DROTKERNEL = rot.S | ||||
| CROTKERNEL = zrot.S | CROTKERNEL = zrot.S | ||||
| @@ -66,11 +60,6 @@ DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | CSCALKERNEL = zscal.S | ||||
| ZSCALKERNEL = zscal.S | ZSCALKERNEL = zscal.S | ||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| SGEMVNKERNEL = gemv_n.S | SGEMVNKERNEL = gemv_n.S | ||||
| DGEMVNKERNEL = gemv_n.S | DGEMVNKERNEL = gemv_n.S | ||||
| CGEMVNKERNEL = zgemv_n.S | CGEMVNKERNEL = zgemv_n.S | ||||
| @@ -81,55 +70,138 @@ DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | CGEMVTKERNEL = zgemv_t.S | ||||
| ZGEMVTKERNEL = zgemv_t.S | ZGEMVTKERNEL = zgemv_t.S | ||||
| STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| ifneq ($(OS_DARWIN)$(CROSS),11) | |||||
| SNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| endif | |||||
| DDOTKERNEL = dot.S | |||||
| SDOTKERNEL = dot.S | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| DSDOTKERNEL = dot.S | |||||
| ifneq ($(OS_DARWIN)$(CROSS),11) | |||||
| SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
| STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||||
| ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) | |||||
| ifeq ($(DGEMM_UNROLL_M), 8) | |||||
| DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S | |||||
| DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S | |||||
| else | |||||
| DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||||
| endif | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| ifeq ($(DGEMM_UNROLL_N), 4) | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| else | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||||
| endif | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| else | |||||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | DTRMMKERNEL = ../generic/trmmkernel_2x2.c | ||||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | ||||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | ||||
| SGEMMKERNEL = sgemm_kernel_4x4.S | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | DGEMMKERNEL = ../generic/gemmkernel_2x2.c | ||||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | DGEMMONCOPY = ../generic/gemm_ncopy_2.c | ||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | ||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| @@ -0,0 +1,3 @@ | |||||
| include $(KERNELDIR)/KERNEL.ARMV8 | |||||
| @@ -1,4 +1,49 @@ | |||||
| include $(KERNELDIR)/KERNEL.ARMV8 | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| SAMAXKERNEL = amax.S | SAMAXKERNEL = amax.S | ||||
| DAMAXKERNEL = amax.S | DAMAXKERNEL = amax.S | ||||
| @@ -66,13 +111,13 @@ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | ||||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | ||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | ||||
| SGEMMINCOPYOBJ = sgemm_incopy.o | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy.o | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | ||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | ||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | ||||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | ||||
| @@ -87,8 +132,8 @@ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | ||||
| endif | endif | ||||
| DGEMMINCOPYOBJ = dgemm_incopy.o | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy.o | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ifeq ($(DGEMM_UNROLL_N), 4) | ifeq ($(DGEMM_UNROLL_N), 4) | ||||
| @@ -99,32 +144,32 @@ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | ||||
| endif | endif | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | ||||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | ||||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | ||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | ||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | ||||
| CGEMMINCOPYOBJ = cgemm_incopy.o | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy.o | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | ||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | ||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ||||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | ||||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | ||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | ||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | ||||
| ZGEMMINCOPYOBJ = zgemm_incopy.o | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | endif | ||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | ||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | ||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| @@ -0,0 +1,3 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| @@ -0,0 +1,3 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| @@ -0,0 +1,3 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| @@ -1,6 +1,133 @@ | |||||
| include $(KERNELDIR)/KERNEL.ARMV8 | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMAXKERNEL = iamax.S | |||||
| IDAMAXKERNEL = iamax.S | |||||
| ICAMAXKERNEL = izamax.S | |||||
| IZAMAXKERNEL = izamax.S | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| SASUMKERNEL = asum.S | |||||
| DASUMKERNEL = asum.S | |||||
| CASUMKERNEL = casum.S | |||||
| ZASUMKERNEL = zasum.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = daxpy_thunderx.c | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SCOPYKERNEL = copy.S | |||||
| DCOPYKERNEL = copy.S | |||||
| CCOPYKERNEL = copy.S | |||||
| ZCOPYKERNEL = copy.S | |||||
| SDOTKERNEL = dot_thunderx.c | |||||
| DDOTKERNEL = ddot_thunderx.c | |||||
| CDOTKERNEL = zdot.S | |||||
| ZDOTKERNEL = zdot.S | |||||
| DSDOTKERNEL = dot.S | |||||
| SNRM2KERNEL = nrm2.S | |||||
| DNRM2KERNEL = nrm2.S | |||||
| CNRM2KERNEL = znrm2.S | |||||
| ZNRM2KERNEL = znrm2.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SSWAPKERNEL = swap.S | |||||
| DSWAPKERNEL = swap.S | |||||
| CSWAPKERNEL = swap.S | |||||
| ZSWAPKERNEL = swap.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| STRMMKERNEL = ../generic/trmmkernel_4x4.c | |||||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| SGEMMKERNEL = sgemm_kernel_4x4.S | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| SDOTKERNEL=dot_thunderx.c | |||||
| DDOTKERNEL=ddot_thunderx.c | |||||
| DAXPYKERNEL=daxpy_thunderx.c | |||||
| @@ -1,4 +1,137 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| SAMINKERNEL = ../arm/amin.c | |||||
| DAMINKERNEL = ../arm/amin.c | |||||
| CAMINKERNEL = ../arm/zamin.c | |||||
| ZAMINKERNEL = ../arm/zamin.c | |||||
| SMAXKERNEL = ../arm/max.c | |||||
| DMAXKERNEL = ../arm/max.c | |||||
| SMINKERNEL = ../arm/min.c | |||||
| DMINKERNEL = ../arm/min.c | |||||
| ISAMINKERNEL = ../arm/iamin.c | |||||
| IDAMINKERNEL = ../arm/iamin.c | |||||
| ICAMINKERNEL = ../arm/izamin.c | |||||
| IZAMINKERNEL = ../arm/izamin.c | |||||
| ISMAXKERNEL = ../arm/imax.c | |||||
| IDMAXKERNEL = ../arm/imax.c | |||||
| ISMINKERNEL = ../arm/imin.c | |||||
| IDMINKERNEL = ../arm/imin.c | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| SAMAXKERNEL = amax.S | |||||
| DAMAXKERNEL = amax.S | |||||
| CAMAXKERNEL = zamax.S | |||||
| ZAMAXKERNEL = zamax.S | |||||
| SAXPYKERNEL = axpy.S | |||||
| DAXPYKERNEL = daxpy_thunderx2t99.S | |||||
| CAXPYKERNEL = zaxpy.S | |||||
| ZAXPYKERNEL = zaxpy.S | |||||
| SROTKERNEL = rot.S | |||||
| DROTKERNEL = rot.S | |||||
| CROTKERNEL = zrot.S | |||||
| ZROTKERNEL = zrot.S | |||||
| SSCALKERNEL = scal.S | |||||
| DSCALKERNEL = scal.S | |||||
| CSCALKERNEL = zscal.S | |||||
| ZSCALKERNEL = zscal.S | |||||
| SGEMVNKERNEL = gemv_n.S | |||||
| DGEMVNKERNEL = gemv_n.S | |||||
| CGEMVNKERNEL = zgemv_n.S | |||||
| ZGEMVNKERNEL = zgemv_n.S | |||||
| SGEMVTKERNEL = gemv_t.S | |||||
| DGEMVTKERNEL = gemv_t.S | |||||
| CGEMVTKERNEL = zgemv_t.S | |||||
| ZGEMVTKERNEL = zgemv_t.S | |||||
| STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S | |||||
| ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) | |||||
| SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c | |||||
| SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c | |||||
| SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S | |||||
| ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) | |||||
| ifeq ($(DGEMM_UNROLL_M), 8) | |||||
| DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S | |||||
| DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S | |||||
| else | |||||
| DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c | |||||
| DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c | |||||
| endif | |||||
| DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| ifeq ($(DGEMM_UNROLL_N), 4) | |||||
| DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S | |||||
| DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S | |||||
| else | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c | |||||
| endif | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S | |||||
| ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) | |||||
| CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c | |||||
| CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c | |||||
| CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S | |||||
| ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) | |||||
| ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c | |||||
| ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c | |||||
| ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||||
| SASUMKERNEL = sasum_thunderx2t99.c | SASUMKERNEL = sasum_thunderx2t99.c | ||||
| DASUMKERNEL = dasum_thunderx2t99.c | DASUMKERNEL = dasum_thunderx2t99.c | ||||
| @@ -27,12 +160,12 @@ CNRM2KERNEL = scnrm2_thunderx2t99.c | |||||
| DNRM2KERNEL = dznrm2_thunderx2t99.c | DNRM2KERNEL = dznrm2_thunderx2t99.c | ||||
| ZNRM2KERNEL = dznrm2_thunderx2t99.c | ZNRM2KERNEL = dznrm2_thunderx2t99.c | ||||
| DAXPYKERNEL = daxpy_thunderx2t99.S | |||||
| DDOTKERNEL = dot_thunderx2t99.c | DDOTKERNEL = dot_thunderx2t99.c | ||||
| SDOTKERNEL = dot_thunderx2t99.c | SDOTKERNEL = dot_thunderx2t99.c | ||||
| CDOTKERNEL = zdot_thunderx2t99.c | CDOTKERNEL = zdot_thunderx2t99.c | ||||
| ZDOTKERNEL = zdot_thunderx2t99.c | ZDOTKERNEL = zdot_thunderx2t99.c | ||||
| DSDOTKERNEL = dot.S | |||||
| ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4) | ||||
| DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S | ||||
| @@ -1,3 +0,0 @@ | |||||
| include $(KERNELDIR)/KERNEL.THUNDERX2T99 | |||||
| @@ -1 +0,0 @@ | |||||
| include $(KERNELDIR)/KERNEL.ARMV8 | |||||
| @@ -943,13 +943,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| prfm PLDL1KEEP, [origPB] | prfm PLDL1KEEP, [origPB] | ||||
| prfm PLDL1KEEP, [origPA] | prfm PLDL1KEEP, [origPA] | ||||
| ldr A_PRE_SIZE, =dgemm_prefetch_size_a | |||||
| ldr A_PRE_SIZE, [A_PRE_SIZE] | |||||
| ldr B_PRE_SIZE, =dgemm_prefetch_size_b | |||||
| ldr B_PRE_SIZE, [B_PRE_SIZE] | |||||
| ldr C_PRE_SIZE, =dgemm_prefetch_size_c | |||||
| ldr C_PRE_SIZE, [C_PRE_SIZE] | |||||
| mov A_PRE_SIZE, #3584 | |||||
| mov B_PRE_SIZE, #512 | |||||
| mov C_PRE_SIZE, #128 | |||||
| add A_PRE_SIZE_64, A_PRE_SIZE, #64 | add A_PRE_SIZE_64, A_PRE_SIZE, #64 | ||||
| add B_PRE_SIZE_64, B_PRE_SIZE, #64 | add B_PRE_SIZE_64, B_PRE_SIZE, #64 | ||||