| @@ -1,8 +1,3 @@ | |||
| SGEMVNKERNEL = ../arm/gemv_n.c | |||
| SGEMVTKERNEL = ../arm/gemv_t.c | |||
| CGEMVNKERNEL = ../arm/zgemv_n.c | |||
| CGEMVTKERNEL = ../arm/zgemv_t.c | |||
| ################################################################################# | |||
| SAMAXKERNEL = iamax_vfp.S | |||
| @@ -77,14 +72,14 @@ DSCALKERNEL = scal.c | |||
| CSCALKERNEL = zscal.c | |||
| ZSCALKERNEL = zscal.c | |||
| # BAD SGEMVNKERNEL = gemv_n_vfp.S | |||
| SGEMVNKERNEL = gemv_n_vfp.S | |||
| DGEMVNKERNEL = gemv_n_vfp.S | |||
| #CGEMVNKERNEL = cgemv_n_vfp.S | |||
| CGEMVNKERNEL = cgemv_n_vfp.S | |||
| ZGEMVNKERNEL = zgemv_n_vfp.S | |||
| # BAD SGEMVTKERNEL = gemv_t_vfp.S | |||
| SGEMVTKERNEL = gemv_t_vfp.S | |||
| DGEMVTKERNEL = gemv_t_vfp.S | |||
| #CGEMVTKERNEL = cgemv_t_vfp.S | |||
| CGEMVTKERNEL = cgemv_t_vfp.S | |||
| ZGEMVTKERNEL = zgemv_t_vfp.S | |||
| STRMMKERNEL = strmm_kernel_4x4_vfpv3.S | |||
| @@ -92,7 +87,6 @@ DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S | |||
| CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S | |||
| ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S | |||
| #SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S | |||
| SGEMMINCOPY = | |||
| SGEMMITCOPY = | |||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define N [fp, #-260 ] | |||
| #define K [fp, #-264 ] | |||
| #define FP_ZERO [fp, #-236] | |||
| #define FP_ZERO_0 [fp, #-236] | |||
| #define FP_ZERO_1 [fp, #-232] | |||
| #define ALPHA_I [fp, #-272] | |||
| #define ALPHA_R [fp, #-280] | |||
| @@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x2 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds s16 , FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s18, s16 | |||
| vmov.f32 s19, s16 | |||
| @@ -351,7 +355,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x2 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds s16 , FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s20, s16 | |||
| vmov.f32 s21, s16 | |||
| @@ -529,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x1 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds s16 , FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s18, s16 | |||
| vmov.f32 s19, s16 | |||
| @@ -706,7 +710,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x1 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds s16 , FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s24, s16 | |||
| vmov.f32 s25, s16 | |||
| @@ -852,6 +856,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| sub r3, fp, #128 | |||
| vstm r3, { s8 - s31} // store floating point registers | |||
| movs r4, #0 | |||
| str r4, FP_ZERO | |||
| str r4, FP_ZERO_1 | |||
| ldr r3, OLD_LDC | |||
| lsl r3, r3, #3 // ldc = ldc * 4 * 2 | |||
| str r3, LDC | |||
| @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define K [fp, #-264 ] | |||
| #define A [fp, #-268 ] | |||
| #define FP_ZERO [fp, #-236] | |||
| #define FP_ZERO_0 [fp, #-236] | |||
| #define FP_ZERO_1 [fp, #-232] | |||
| #define ALPHA [fp, #-276 ] | |||
| #define B [fp, #4 ] | |||
| @@ -89,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT4x4 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d18, d16 | |||
| vmov.f64 d19, d16 | |||
| @@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x4 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d20, d16 | |||
| vmov.f64 d21, d16 | |||
| @@ -468,7 +473,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x4 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| vmov.f64 d20, d16 | |||
| vmov.f64 d24, d16 | |||
| vmov.f64 d28, d16 | |||
| @@ -527,7 +532,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT4x2 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d18, d16 | |||
| vmov.f64 d19, d16 | |||
| @@ -601,7 +606,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x2 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d20, d16 | |||
| vmov.f64 d21, d16 | |||
| @@ -656,7 +661,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x2 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| vmov.f64 d20, d16 | |||
| .endm | |||
| @@ -699,7 +704,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT4x1 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d18, d16 | |||
| vmov.f64 d19, d16 | |||
| @@ -753,7 +758,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x1 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| .endm | |||
| @@ -794,7 +799,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x1 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16, FP_ZERO | |||
| .endm | |||
| @@ -850,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| sub r3, fp, #128 | |||
| vstm r3, { d8 - d15} // store floating point registers | |||
| movs r4, #0 | |||
| str r4, FP_ZERO | |||
| str r4, FP_ZERO_1 | |||
| ldr r3, OLD_LDC | |||
| lsl r3, r3, #3 // ldc = ldc * 8 | |||
| str r3, LDC | |||
| @@ -58,6 +58,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define K [fp, #-264 ] | |||
| #define A [fp, #-268 ] | |||
| #define FP_ZERO [fp, #-240] | |||
| #define FP_ZERO_0 [fp, # -240] | |||
| #define FP_ZERO_1 [fp, # -236] | |||
| #define ALPHA [fp, #-280] | |||
| #define B [fp, #4 ] | |||
| @@ -88,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT4x4 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s18, s16 | |||
| vmov.f32 s19, s16 | |||
| @@ -322,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x4 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s20, s16 | |||
| vmov.f32 s21, s16 | |||
| @@ -405,7 +409,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x4 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| vmov.f32 s20, s16 | |||
| vmov.f32 s24, s16 | |||
| vmov.f32 s28, s16 | |||
| @@ -464,7 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT4x2 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s18, s16 | |||
| vmov.f32 s19, s16 | |||
| @@ -538,7 +542,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x2 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s20, s16 | |||
| vmov.f32 s21, s16 | |||
| @@ -593,7 +597,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x2 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| vmov.f32 s20, s16 | |||
| .endm | |||
| @@ -636,7 +640,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT4x1 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| vmov.f32 s18, s16 | |||
| vmov.f32 s19, s16 | |||
| @@ -690,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x1 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| vmov.f32 s17, s16 | |||
| .endm | |||
| @@ -731,7 +735,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x1 | |||
| vsub.f32 s16 , s16 , s16 | |||
| flds S16, FP_ZERO | |||
| .endm | |||
| @@ -787,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| sub r3, fp, #128 | |||
| vstm r3, { s8 - s31} // store floating point registers | |||
| movs r4, #0 | |||
| str r4, FP_ZERO | |||
| str r4, FP_ZERO_1 | |||
| ldr r3, OLD_LDC | |||
| lsl r3, r3, #2 // ldc = ldc * 4 | |||
| str r3, LDC | |||
| @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define N [fp, #-260 ] | |||
| #define K [fp, #-264 ] | |||
| #define FP_ZERO [fp, #-236] | |||
| #define FP_ZERO_0 [fp, #-236] | |||
| #define FP_ZERO_1 [fp, #-232] | |||
| #define ALPHA_I [fp, #-272] | |||
| #define ALPHA_R [fp, #-280] | |||
| @@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x2 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16 , FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d18, d16 | |||
| vmov.f64 d19, d16 | |||
| @@ -388,7 +392,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x2 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16 , FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d20, d16 | |||
| vmov.f64 d21, d16 | |||
| @@ -566,7 +570,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT2x1 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16 , FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d18, d16 | |||
| vmov.f64 d19, d16 | |||
| @@ -743,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| .macro INIT1x1 | |||
| vsub.f64 d16 , d16 , d16 | |||
| fldd d16 , FP_ZERO | |||
| vmov.f64 d17, d16 | |||
| vmov.f64 d24, d16 | |||
| vmov.f64 d25, d16 | |||
| @@ -889,6 +893,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| sub r3, fp, #128 | |||
| vstm r3, { d8 - d15} // store floating point registers | |||
| movs r4, #0 | |||
| str r4, FP_ZERO | |||
| str r4, FP_ZERO_1 | |||
| ldr r3, OLD_LDC | |||
| lsl r3, r3, #4 // ldc = ldc * 8 * 2 | |||
| str r3, LDC | |||