| @@ -1,27 +1,32 @@ | |||||
| # ifeq logical or | |||||
| #ifeq logical or | |||||
| ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15)) | ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15)) | ||||
| ifeq ($(OSNAME), Android) | ifeq ($(OSNAME), Android) | ||||
| CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
| FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
| CCOMMON_OPT += -mfpu=neon -march=armv7-a | |||||
| FCOMMON_OPT += -mfpu=neon -march=armv7-a | |||||
| else | else | ||||
| CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||||
| FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||||
| CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | |||||
| FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(CORE), ARMV7) | ifeq ($(CORE), ARMV7) | ||||
| ifeq ($(OSNAME), Android) | ifeq ($(OSNAME), Android) | ||||
| CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch | |||||
| FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch | |||||
| ifeq ($(ARM_SOFTFP_ABI), 1) | |||||
| CCOMMON_OPT += -mfpu=neon -march=armv7-a | |||||
| FCOMMON_OPT += -mfpu=neon -march=armv7-a | |||||
| else | else | ||||
| CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||||
| FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||||
| CCOMMON_OPT += -mfpu=neon -march=armv7-a -Wl,--no-warn-mismatch | |||||
| FCOMMON_OPT += -mfpu=neon -march=armv7-a -Wl,--no-warn-mismatch | |||||
| endif | |||||
| else | |||||
| CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | |||||
| FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(CORE), ARMV6) | ifeq ($(CORE), ARMV6) | ||||
| CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||||
| FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||||
| CCOMMON_OPT += -mfpu=vfp -march=armv6 | |||||
| FCOMMON_OPT += -mfpu=vfp -march=armv6 | |||||
| endif | endif | ||||
| @@ -482,6 +482,17 @@ endif | |||||
| ifeq ($(ARCH), arm) | ifeq ($(ARCH), arm) | ||||
| NO_BINARY_MODE = 1 | NO_BINARY_MODE = 1 | ||||
| BINARY_DEFINED = 1 | BINARY_DEFINED = 1 | ||||
| CCOMMON_OPT += -marm | |||||
| FCOMMON_OPT += -marm | |||||
| ifeq ($(ARM_SOFTFP_ABI), 1) | |||||
| CCOMMON_OPT += -mfloat-abi=softfp -DARM_SOFTFP_ABI | |||||
| FCOMMON_OPT += -mfloat-abi=softfp -DARM_SOFTFP_ABI | |||||
| else | |||||
| CCOMMON_OPT += -mfloat-abi=hard | |||||
| FCOMMON_OPT += -mfloat-abi=hard | |||||
| endif | |||||
| endif | endif | ||||
| ifeq ($(ARCH), arm64) | ifeq ($(ARCH), arm64) | ||||
| @@ -111,6 +111,11 @@ REALNAME: | |||||
| #define PROFCODE | #define PROFCODE | ||||
| #ifdef __ARM_PCS | |||||
| //-mfloat-abi=softfp | |||||
| #define SOFT_FLOAT_ABI | |||||
| #endif | |||||
| #endif | #endif | ||||
| @@ -38,11 +38,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define STACKSIZE 256 | #define STACKSIZE 256 | ||||
| #ifndef ARM_SOFTFP_ABI | |||||
| //hard | |||||
| #define OLD_INC_X [fp, #0 ] | #define OLD_INC_X [fp, #0 ] | ||||
| #define OLD_Y [fp, #4 ] | #define OLD_Y [fp, #4 ] | ||||
| #define OLD_INC_Y [fp, #8 ] | #define OLD_INC_Y [fp, #8 ] | ||||
| #else | |||||
| #define OLD_X [fp, #0 ] | |||||
| #define OLD_INC_X [fp, #4 ] | |||||
| #define OLD_Y [fp, #8 ] | |||||
| #define OLD_INC_Y [fp, #12 ] | |||||
| #endif | |||||
| #define N r0 | #define N r0 | ||||
| #define Y r1 | #define Y r1 | ||||
| #define INC_X r2 | #define INC_X r2 | ||||
| @@ -363,6 +370,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add fp, sp, #8 | add fp, sp, #8 | ||||
| sub sp, sp, #STACKSIZE // reserve stack | sub sp, sp, #STACKSIZE // reserve stack | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| #ifndef DOUBLE | |||||
| vmov s0, r3 //move alpha to s0 | |||||
| ldr X, OLD_X | |||||
| #endif | |||||
| #endif | |||||
| ldr INC_X , OLD_INC_X | ldr INC_X , OLD_INC_X | ||||
| ldr Y, OLD_Y | ldr Y, OLD_Y | ||||
| ldr INC_Y , OLD_INC_Y | ldr INC_Y , OLD_INC_Y | ||||
| @@ -38,12 +38,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define STACKSIZE 256 | #define STACKSIZE 256 | ||||
| #ifndef ARM_SOFTFP_ABI | |||||
| //hard | |||||
| #define OLD_LDA [fp, #0 ] | #define OLD_LDA [fp, #0 ] | ||||
| #define X [fp, #4 ] | #define X [fp, #4 ] | ||||
| #define OLD_INC_X [fp, #8 ] | #define OLD_INC_X [fp, #8 ] | ||||
| #define Y [fp, #12 ] | #define Y [fp, #12 ] | ||||
| #define OLD_INC_Y [fp, #16 ] | #define OLD_INC_Y [fp, #16 ] | ||||
| #define OLD_A r3 | #define OLD_A r3 | ||||
| #else | |||||
| #define OLD_A_SOFTFP [fp, #0 ] | |||||
| #define OLD_LDA [fp, #4 ] | |||||
| #define X [fp, #8 ] | |||||
| #define OLD_INC_X [fp, #12 ] | |||||
| #define Y [fp, #16 ] | |||||
| #define OLD_INC_Y [fp, #20 ] | |||||
| #define OLD_ALPHA r3 | |||||
| #define OLD_A r3 | |||||
| #endif | |||||
| #define OLD_M r0 | #define OLD_M r0 | ||||
| #define AO1 r0 | #define AO1 r0 | ||||
| @@ -552,9 +565,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| cmp N, #0 | cmp N, #0 | ||||
| ble gemvn_kernel_L999 | ble gemvn_kernel_L999 | ||||
| #ifndef DOUBLE | |||||
| #ifdef ARM_SOFTFP_ABI | |||||
| vmov s0, OLD_ALPHA | |||||
| ldr OLD_A, OLD_A_SOFTFP | |||||
| #endif | |||||
| #endif | |||||
| str OLD_A, A | str OLD_A, A | ||||
| str OLD_M, M | str OLD_M, M | ||||
| ldr INC_X , OLD_INC_X | ldr INC_X , OLD_INC_X | ||||
| ldr INC_Y , OLD_INC_Y | ldr INC_Y , OLD_INC_Y | ||||
| @@ -38,12 +38,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define STACKSIZE 256 | #define STACKSIZE 256 | ||||
| #ifndef ARM_SOFTFP_ABI | |||||
| //hard abi | |||||
| #define OLD_LDA [fp, #0 ] | #define OLD_LDA [fp, #0 ] | ||||
| #define X [fp, #4 ] | #define X [fp, #4 ] | ||||
| #define OLD_INC_X [fp, #8 ] | #define OLD_INC_X [fp, #8 ] | ||||
| #define Y [fp, #12 ] | #define Y [fp, #12 ] | ||||
| #define OLD_INC_Y [fp, #16 ] | #define OLD_INC_Y [fp, #16 ] | ||||
| #define OLD_A r3 | #define OLD_A r3 | ||||
| #else | |||||
| #define OLD_A_SOFTFP [fp, #0 ] | |||||
| #define OLD_LDA [fp, #4 ] | |||||
| #define X [fp, #8 ] | |||||
| #define OLD_INC_X [fp, #12 ] | |||||
| #define Y [fp, #16 ] | |||||
| #define OLD_INC_Y [fp, #20 ] | |||||
| #define OLD_ALPHA r3 | |||||
| #define OLD_A r3 | |||||
| #endif | |||||
| #define OLD_N r1 | #define OLD_N r1 | ||||
| #define M r0 | #define M r0 | ||||
| @@ -505,6 +518,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| cmp OLD_N, #0 | cmp OLD_N, #0 | ||||
| ble gemvt_kernel_L999 | ble gemvt_kernel_L999 | ||||
| #ifndef DOUBLE | |||||
| #ifdef ARM_SOFTFP_ABI | |||||
| vmov s0, OLD_ALPHA | |||||
| ldr OLD_A, OLD_A_SOFTFP | |||||
| #endif | |||||
| #endif | |||||
| str OLD_A, A | str OLD_A, A | ||||
| str OLD_N, N | str OLD_N, N | ||||
| @@ -503,6 +503,9 @@ nrm2_kernel_L999: | |||||
| #else | #else | ||||
| vsqrt.f32 s1, s1 | vsqrt.f32 s1, s1 | ||||
| vmul.f32 s0, s0, s1 | vmul.f32 s0, s0, s1 | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| vmov r0, s0 | |||||
| #endif | |||||
| #endif | #endif | ||||
| bx lr | bx lr | ||||
| @@ -332,10 +332,16 @@ sdot_kernel_L999: | |||||
| vadd.f64 d0 , d0, d1 // set return value | vadd.f64 d0 , d0, d1 // set return value | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| vmov r0, r1, d0 | |||||
| #endif | |||||
| #else | #else | ||||
| vadd.f32 s0 , s0, s1 // set return value | vadd.f32 s0 , s0, s1 // set return value | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| vmov r0, s0 | |||||
| #endif | |||||
| #endif | #endif | ||||
| sub sp, fp, #24 | sub sp, fp, #24 | ||||
| pop {r4 - r9, fp} | pop {r4 - r9, fp} | ||||
| @@ -58,8 +58,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define OLD_M r0 | #define OLD_M r0 | ||||
| #define OLD_N r1 | #define OLD_N r1 | ||||
| #define OLD_K r2 | #define OLD_K r2 | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| #define OLD_ALPHA r3 | |||||
| //#define OLD_A | |||||
| #else //hard | |||||
| #define OLD_A r3 | #define OLD_A r3 | ||||
| #define OLD_ALPHA s0 | #define OLD_ALPHA s0 | ||||
| #endif | |||||
| /****************************************************** | /****************************************************** | ||||
| * [fp, #-128] - [fp, #-64] is reserved | * [fp, #-128] - [fp, #-64] is reserved | ||||
| @@ -71,7 +77,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define M [fp, #-256 ] | #define M [fp, #-256 ] | ||||
| #define N [fp, #-260 ] | #define N [fp, #-260 ] | ||||
| #define K [fp, #-264 ] | #define K [fp, #-264 ] | ||||
| #ifndef ARM_SOFTFP_ABI | |||||
| #define A [fp, #-268 ] | #define A [fp, #-268 ] | ||||
| #endif | |||||
| #define FP_ZERO [fp, #-240] | #define FP_ZERO [fp, #-240] | ||||
| #define FP_ZERO_0 [fp, #-240] | #define FP_ZERO_0 [fp, #-240] | ||||
| @@ -79,10 +88,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ALPHA [fp, #-280] | #define ALPHA [fp, #-280] | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| #define A [fp, #4 ] | |||||
| #define B [fp, #8 ] | |||||
| #define C [fp, #12 ] | |||||
| #define OLD_LDC [fp, #16 ] | |||||
| #else //hard | |||||
| #define B [fp, #4 ] | #define B [fp, #4 ] | ||||
| #define C [fp, #8 ] | #define C [fp, #8 ] | ||||
| #define OLD_LDC [fp, #12 ] | #define OLD_LDC [fp, #12 ] | ||||
| #endif | |||||
| #define I r0 | #define I r0 | ||||
| #define J r1 | #define J r1 | ||||
| #define L r2 | #define L r2 | ||||
| @@ -854,9 +870,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| str OLD_M, M | str OLD_M, M | ||||
| str OLD_N, N | str OLD_N, N | ||||
| str OLD_K, K | str OLD_K, K | ||||
| #ifdef ARM_SOFTFP_ABI | |||||
| str OLD_ALPHA, ALPHA | |||||
| #else //hard | |||||
| str OLD_A, A | str OLD_A, A | ||||
| vstr OLD_ALPHA, ALPHA | vstr OLD_ALPHA, ALPHA | ||||
| #endif | |||||
| sub r3, fp, #128 | sub r3, fp, #128 | ||||
| vstm r3, { s8 - s31} // store floating point registers | vstm r3, { s8 - s31} // store floating point registers | ||||