| @@ -24,12 +24,11 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | ||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include "common.h" | #include "common.h" | ||||
| #ifndef HAVE_ASM_KERNEL | #ifndef HAVE_ASM_KERNEL | ||||
| #include <altivec.h> | #include <altivec.h> | ||||
| static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; | |||||
| static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) | static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) | ||||
| { | { | ||||
| @@ -43,7 +42,7 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT | |||||
| register __vector float valpha_i = {alpha_i, alpha_i,alpha_i, alpha_i}; | register __vector float valpha_i = {alpha_i, alpha_i,alpha_i, alpha_i}; | ||||
| #endif | #endif | ||||
| __vector unsigned char swap_mask = { 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; | |||||
| __vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr); | |||||
| register __vector float *vy = (__vector float *) y; | register __vector float *vy = (__vector float *) y; | ||||
| register __vector float *vx = (__vector float *) x; | register __vector float *vx = (__vector float *) x; | ||||
| BLASLONG i=0; | BLASLONG i=0; | ||||
| @@ -25,12 +25,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include "common.h" | #include "common.h" | ||||
| #ifndef HAVE_KERNEL_8 | #ifndef HAVE_KERNEL_8 | ||||
| #include <altivec.h> | #include <altivec.h> | ||||
| static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; | |||||
| static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot) | static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot) | ||||
| { | { | ||||
| __vector unsigned char swap_mask = { 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; | |||||
| __vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr); | |||||
| register __vector float *vy = (__vector float *) y; | register __vector float *vy = (__vector float *) y; | ||||
| register __vector float *vx = (__vector float *) x; | register __vector float *vx = (__vector float *) x; | ||||
| BLASLONG i = 0; | BLASLONG i = 0; | ||||
| @@ -96,7 +96,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA | |||||
| BLASLONG i = 0; | BLASLONG i = 0; | ||||
| BLASLONG ix=0, iy=0; | BLASLONG ix=0, iy=0; | ||||
| OPENBLAS_COMPLEX_FLOAT result; | OPENBLAS_COMPLEX_FLOAT result; | ||||
| FLOAT dot[4] __attribute__ ((aligned(16))) = {0.0, 0.0, 0.0, 0.0}; | |||||
| FLOAT dot[4] __attribute__((aligned(16))) = {0.0, 0.0, 0.0, 0.0}; | |||||
| if (n <= 0) { | if (n <= 0) { | ||||
| CREAL(result) = 0.0; | CREAL(result) = 0.0; | ||||
| @@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define NBMAX 1024 | #define NBMAX 1024 | ||||
| static const unsigned char swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; | |||||
| static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; | |||||
| static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y) { | static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y) { | ||||
| @@ -247,8 +247,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, | |||||
| BLASLONG m2; | BLASLONG m2; | ||||
| BLASLONG m3; | BLASLONG m3; | ||||
| BLASLONG n2; | BLASLONG n2; | ||||
| FLOAT xbuffer[8], *ybuffer; | |||||
| FLOAT xbuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *ybuffer; | |||||
| if (m < 1) return (0); | if (m < 1) return (0); | ||||
| if (n < 1) return (0); | if (n < 1) return (0); | ||||
| @@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define NBMAX 1024 | #define NBMAX 1024 | ||||
| #include <altivec.h> | #include <altivec.h> | ||||
| static const unsigned char swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; | |||||
| static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; | |||||
| static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) { | static void cgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) { | ||||
| BLASLONG i; | BLASLONG i; | ||||
| @@ -260,8 +260,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, | |||||
| BLASLONG m2; | BLASLONG m2; | ||||
| BLASLONG m3; | BLASLONG m3; | ||||
| BLASLONG n2; | BLASLONG n2; | ||||
| FLOAT ybuffer[8], *xbuffer; | |||||
| FLOAT ybuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *xbuffer; | |||||
| if (m < 1) return (0); | if (m < 1) return (0); | ||||
| if (n < 1) return (0); | if (n < 1) return (0); | ||||
| @@ -145,7 +145,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| BLASLONG m3; | BLASLONG m3; | ||||
| BLASLONG n2; | BLASLONG n2; | ||||
| BLASLONG lda4 = lda << 2; | BLASLONG lda4 = lda << 2; | ||||
| FLOAT xbuffer[8] __attribute__ ((aligned (16)));; | |||||
| FLOAT xbuffer[8] __attribute__ ((aligned (16))); | |||||
| FLOAT *ybuffer; | FLOAT *ybuffer; | ||||
| if ( m < 1 ) return(0); | if ( m < 1 ) return(0); | ||||
| @@ -581,9 +581,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| BLASLONG m1; | BLASLONG m1; | ||||
| BLASLONG m2; | BLASLONG m2; | ||||
| BLASLONG m3; | BLASLONG m3; | ||||
| BLASLONG n2; | |||||
| FLOAT ybuffer[8], *xbuffer; | |||||
| BLASLONG n2; | |||||
| FLOAT ybuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *xbuffer; | |||||
| if (m < 1) return (0); | if (m < 1) return (0); | ||||
| if (n < 1) return (0); | if (n < 1) return (0); | ||||
| @@ -174,7 +174,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| BLASLONG n2; | BLASLONG n2; | ||||
| BLASLONG lda4 = lda << 2; | BLASLONG lda4 = lda << 2; | ||||
| BLASLONG lda8 = lda << 3; | BLASLONG lda8 = lda << 3; | ||||
| FLOAT xbuffer[8],*ybuffer; | |||||
| FLOAT xbuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *ybuffer; | |||||
| if ( m < 1 ) return(0); | if ( m < 1 ) return(0); | ||||
| if ( n < 1 ) return(0); | if ( n < 1 ) return(0); | ||||
| @@ -213,7 +213,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| BLASLONG n2; | BLASLONG n2; | ||||
| BLASLONG lda4 = lda << 2; | BLASLONG lda4 = lda << 2; | ||||
| BLASLONG lda8 = lda << 3; | BLASLONG lda8 = lda << 3; | ||||
| FLOAT xbuffer[8],*ybuffer; | |||||
| FLOAT xbuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *ybuffer; | |||||
| if ( m < 1 ) return(0); | if ( m < 1 ) return(0); | ||||
| if ( n < 1 ) return(0); | if ( n < 1 ) return(0); | ||||
| @@ -177,10 +177,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| BLASLONG m1; | BLASLONG m1; | ||||
| BLASLONG m2; | BLASLONG m2; | ||||
| BLASLONG m3; | BLASLONG m3; | ||||
| BLASLONG n2; | |||||
| FLOAT ybuffer[8], *xbuffer; | |||||
| BLASLONG n2; | |||||
| FLOAT ybuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *xbuffer; | |||||
| if (m < 1) return (0); | if (m < 1) return (0); | ||||
| if (n < 1) return (0); | if (n < 1) return (0); | ||||
| @@ -204,8 +204,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO | |||||
| BLASLONG m3; | BLASLONG m3; | ||||
| BLASLONG n2; | BLASLONG n2; | ||||
| FLOAT ybuffer[8], *xbuffer; | |||||
| FLOAT ybuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *xbuffer; | |||||
| if (m < 1) return (0); | if (m < 1) return (0); | ||||
| if (n < 1) return (0); | if (n < 1) return (0); | ||||
| @@ -614,8 +614,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, | |||||
| BLASLONG m2; | BLASLONG m2; | ||||
| BLASLONG m3; | BLASLONG m3; | ||||
| BLASLONG n2; | BLASLONG n2; | ||||
| FLOAT xbuffer[8], *ybuffer; | |||||
| FLOAT xbuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *ybuffer; | |||||
| if (m < 1) return (0); | if (m < 1) return (0); | ||||
| if (n < 1) return (0); | if (n < 1) return (0); | ||||
| @@ -532,8 +532,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, | |||||
| BLASLONG m2; | BLASLONG m2; | ||||
| BLASLONG m3; | BLASLONG m3; | ||||
| BLASLONG n2; | BLASLONG n2; | ||||
| FLOAT ybuffer[8], *xbuffer; | |||||
| FLOAT ybuffer[8] __attribute__((aligned(16))); | |||||
| FLOAT *xbuffer; | |||||
| if (m < 1) return (0); | if (m < 1) return (0); | ||||
| if (n < 1) return (0); | if (n < 1) return (0); | ||||