| @@ -81,6 +81,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||||
| #ifdef SMP | #ifdef SMP | ||||
| nthreads = num_cpu_avail(1); | nthreads = num_cpu_avail(1); | ||||
| //disable multi-thread when incx==0 or incy==0 | |||||
| //In that case, the threads would be dependent. | |||||
| if (incx == 0 || incy == 0) | |||||
| nthreads = 1; | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -83,6 +83,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in | |||||
| #ifdef SMP | #ifdef SMP | ||||
| nthreads = num_cpu_avail(1); | nthreads = num_cpu_avail(1); | ||||
| //disable multi-thread when incx==0 or incy==0 | |||||
| //In that case, the threads would be dependent. | |||||
| if (incx == 0 || incy == 0) | |||||
| nthreads = 1; | |||||
| if (nthreads == 1) { | if (nthreads == 1) { | ||||
| #endif | #endif | ||||
| @@ -1463,6 +1463,12 @@ | |||||
| .L50: | .L50: | ||||
| movq M, %rax | movq M, %rax | ||||
| movq Y, YY | movq Y, YY | ||||
| //If incx==0 || incy==0, avoid unloop. | |||||
| cmpq $0, INCX | |||||
| je .L56 | |||||
| cmpq $0, INCY | |||||
| je .L56 | |||||
| sarq $3, %rax | sarq $3, %rax | ||||
| jle .L55 | jle .L55 | ||||
| ALIGN_3 | ALIGN_3 | ||||
| @@ -805,6 +805,12 @@ | |||||
| .L40: | .L40: | ||||
| movq Y, YY | movq Y, YY | ||||
| movq M, %rax | movq M, %rax | ||||
| //If incx==0 || incy==0, avoid unloop. | |||||
| cmpq $0, INCX | |||||
| je .L46 | |||||
| cmpq $0, INCY | |||||
| je .L46 | |||||
| sarq $3, %rax | sarq $3, %rax | ||||
| jle .L45 | jle .L45 | ||||
| ALIGN_3 | ALIGN_3 | ||||
| @@ -2893,6 +2893,12 @@ | |||||
| unpcklps %xmm13, %xmm15 | unpcklps %xmm13, %xmm15 | ||||
| #endif | #endif | ||||
| //If incx==0 || incy==0, avoid unloop and jump to end. | |||||
| cmpq $0, INCX | |||||
| je .L200 | |||||
| cmpq $0, INCY | |||||
| je .L200 | |||||
| movq Y, YY | movq Y, YY | ||||
| movq M, %rax | movq M, %rax | ||||
| @@ -3105,8 +3111,42 @@ | |||||
| addps %xmm1, %xmm8 | addps %xmm1, %xmm8 | ||||
| movsd %xmm8, (Y) | movsd %xmm8, (Y) | ||||
| jmp .L999 | |||||
| ALIGN_3 | ALIGN_3 | ||||
| .L200: | |||||
| movq M, %rax | |||||
| cmpq $0, %rax | |||||
| jle .L999 | |||||
| ALIGN_3 | |||||
| .L201: | |||||
| movsd (X), %xmm0 | |||||
| addq INCX, X | |||||
| #ifdef HAVE_SSE3 | |||||
| movshdup %xmm0, %xmm1 | |||||
| movsldup %xmm0, %xmm0 | |||||
| #else | |||||
| pshufd $0xf5, %xmm0, %xmm1 | |||||
| shufps $0xa0, %xmm0, %xmm0 | |||||
| #endif | |||||
| mulps %xmm14, %xmm0 | |||||
| mulps %xmm15, %xmm1 | |||||
| movsd (Y), %xmm8 | |||||
| addps %xmm0, %xmm8 | |||||
| addps %xmm1, %xmm8 | |||||
| movsd %xmm8, (Y) | |||||
| addq INCY, Y | |||||
| decq %rax | |||||
| jg .L201 | |||||
| ALIGN_3 | |||||
| .L999: | .L999: | ||||
| xorq %rax, %rax | xorq %rax, %rax | ||||
| @@ -1416,6 +1416,12 @@ | |||||
| movq Y, YY | movq Y, YY | ||||
| movq M, %rax | movq M, %rax | ||||
| //If incx==0 || incy==0, avoid unloop and jump to end. | |||||
| cmpq $0, INCX | |||||
| je .L58 | |||||
| cmpq $0, INCY | |||||
| je .L58 | |||||
| sarq $3, %rax | sarq $3, %rax | ||||
| jle .L55 | jle .L55 | ||||
| @@ -1769,6 +1775,7 @@ | |||||
| andq $1, %rax | andq $1, %rax | ||||
| jle .L999 | jle .L999 | ||||
| .L58: | |||||
| MOVDDUP( 0 * SIZE, X, %xmm0) | MOVDDUP( 0 * SIZE, X, %xmm0) | ||||
| MOVDDUP( 1 * SIZE, X, %xmm1) | MOVDDUP( 1 * SIZE, X, %xmm1) | ||||
| @@ -1781,6 +1788,9 @@ | |||||
| movlpd %xmm8, 0 * SIZE(YY) | movlpd %xmm8, 0 * SIZE(YY) | ||||
| movhpd %xmm8, 1 * SIZE(YY) | movhpd %xmm8, 1 * SIZE(YY) | ||||
| decq %rax | |||||
| jg .L58 | |||||
| ALIGN_3 | ALIGN_3 | ||||
| .L999: | .L999: | ||||