| @@ -144,14 +144,14 @@ | |||||
| "decq %4; jmp "#ndim""#sim"04042b;"\ | "decq %4; jmp "#ndim""#sim"04042b;"\ | ||||
| #ndim""#sim"04043:\n\t"\ | #ndim""#sim"04043:\n\t"\ | ||||
| SAVE_R_m4n##ndim | SAVE_R_m4n##ndim | ||||
| #define COMPUTE_m4_n1 COMPUTE_L_m4(1,33833) | |||||
| #define COMPUTE_m4_n2 COMPUTE_L_m4(2,33833) | |||||
| #define COMPUTE_m4_n4 COMPUTE_L_m4(4,33833) | |||||
| #define COMPUTE_m4_n8 COMPUTE_L_m4(8,33833) | |||||
| #define COMPUTE_m4_n12 COMPUTE_L_m4(12,33833) | |||||
| #define COMPUTE_m4_n16 COMPUTE_L_m4(12,33733) COMPUTE_R_m4(4,33933) | |||||
| #define COMPUTE_m4_n20 COMPUTE_L_m4(12,33633) COMPUTE_R_m4(8,33933) | |||||
| #define COMPUTE_m4_n24 COMPUTE_L_m4(12,33533) COMPUTE_R_m4(12,33933) | |||||
| #define COMPUTE_m4_n1 COMPUTE_L_m4(1,383) | |||||
| #define COMPUTE_m4_n2 COMPUTE_L_m4(2,383) | |||||
| #define COMPUTE_m4_n4 COMPUTE_L_m4(4,383) | |||||
| #define COMPUTE_m4_n8 COMPUTE_L_m4(8,383) | |||||
| #define COMPUTE_m4_n12 COMPUTE_L_m4(12,383) | |||||
| #define COMPUTE_m4_n16 COMPUTE_L_m4(12,373) COMPUTE_R_m4(4,393) | |||||
| #define COMPUTE_m4_n20 COMPUTE_L_m4(12,363) COMPUTE_R_m4(8,393) | |||||
| #define COMPUTE_m4_n24 COMPUTE_L_m4(12,353) COMPUTE_R_m4(12,393) | |||||
| #define COMPUTE_m4(ndim) COMPUTE_m4_n##ndim | #define COMPUTE_m4(ndim) COMPUTE_m4_n##ndim | ||||
| /* m = 2 *//* vmm0 for alpha, vmm1-vmm3 for temporary use, vmm4-vmm15 for accumulators */ | /* m = 2 *//* vmm0 for alpha, vmm1-vmm3 for temporary use, vmm4-vmm15 for accumulators */ | ||||