| @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4) | |||
| project(OpenBLAS) | |||
| set(OpenBLAS_MAJOR_VERSION 0) | |||
| set(OpenBLAS_MINOR_VERSION 2) | |||
| set(OpenBLAS_PATCH_VERSION 17.dev) | |||
| set(OpenBLAS_PATCH_VERSION 18.dev) | |||
| set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | |||
| enable_language(ASM) | |||
| @@ -1,4 +1,10 @@ | |||
| OpenBLAS ChangeLog | |||
| ==================================================================== | |||
| Version 0.2.17 | |||
| 20-Mar-2016 | |||
| common: | |||
| * Enable BUILD_LAPACK_DEPRECATED=1 by default. | |||
| ==================================================================== | |||
| Version 0.2.16 | |||
| 15-Mar-2016 | |||
| @@ -3,7 +3,7 @@ | |||
| # | |||
| # This library's version | |||
| VERSION = 0.2.17.dev | |||
| VERSION = 0.2.18.dev | |||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | |||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | |||
| @@ -80,7 +80,7 @@ VERSION = 0.2.17.dev | |||
| # NO_LAPACKE = 1 | |||
| # Build LAPACK Deprecated functions since LAPACK 3.6.0 | |||
| # BUILD_LAPACK_DEPRECATED = 1 | |||
| BUILD_LAPACK_DEPRECATED = 1 | |||
| # If you want to use legacy threaded Level 3 implementation. | |||
| # USE_SIMPLE_THREADED_LEVEL3 = 1 | |||
| @@ -120,10 +120,10 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| #CCOPYKERNEL = ../arm/zcopy.c | |||
| #ZCOPYKERNEL = ../arm/zcopy.c | |||
| # | |||
| #SDOTKERNEL = ../arm/dot.c | |||
| #DDOTKERNEL = ../arm/dot.c | |||
| SDOTKERNEL = sdot.c | |||
| DDOTKERNEL = ddot.c | |||
| #CDOTKERNEL = ../arm/zdot.c | |||
| #ZDOTKERNEL = ../arm/zdot.c | |||
| ZDOTKERNEL = zdot.c | |||
| # | |||
| #SNRM2KERNEL = ../arm/nrm2.c | |||
| #DNRM2KERNEL = ../arm/nrm2.c | |||
| @@ -0,0 +1,139 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/20 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #if defined(POWER8) | |||
| #include "ddot_microk_power8.c" | |||
| #endif | |||
| #ifndef HAVE_KERNEL_8 | |||
| static void ddot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) | |||
| { | |||
| BLASLONG register i = 0; | |||
| FLOAT dot = 0.0; | |||
| while(i < n) | |||
| { | |||
| dot += y[i] * x[i] | |||
| + y[i+1] * x[i+1] | |||
| + y[i+2] * x[i+2] | |||
| + y[i+3] * x[i+3] | |||
| + y[i+4] * x[i+4] | |||
| + y[i+5] * x[i+5] | |||
| + y[i+6] * x[i+6] | |||
| + y[i+7] * x[i+7] ; | |||
| i+=8 ; | |||
| } | |||
| *d += dot; | |||
| } | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| FLOAT dot = 0.0 ; | |||
| if ( n <= 0 ) return(dot); | |||
| if ( (inc_x == 1) && (inc_y == 1) ) | |||
| { | |||
| BLASLONG n1 = n & -16; | |||
| if ( n1 ) | |||
| ddot_kernel_8(n1, x, y , &dot ); | |||
| i = n1; | |||
| while(i < n) | |||
| { | |||
| dot += y[i] * x[i] ; | |||
| i++ ; | |||
| } | |||
| return(dot); | |||
| } | |||
| FLOAT temp1 = 0.0; | |||
| FLOAT temp2 = 0.0; | |||
| BLASLONG n1 = n & -4; | |||
| while(i < n1) | |||
| { | |||
| FLOAT m1 = y[iy] * x[ix] ; | |||
| FLOAT m2 = y[iy+inc_y] * x[ix+inc_x] ; | |||
| FLOAT m3 = y[iy+2*inc_y] * x[ix+2*inc_x] ; | |||
| FLOAT m4 = y[iy+3*inc_y] * x[ix+3*inc_x] ; | |||
| ix += inc_x*4 ; | |||
| iy += inc_y*4 ; | |||
| temp1 += m1+m3; | |||
| temp2 += m2+m4; | |||
| i+=4 ; | |||
| } | |||
| while(i < n) | |||
| { | |||
| temp1 += y[iy] * x[ix] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| dot = temp1 + temp2; | |||
| return(dot); | |||
| } | |||
| @@ -0,0 +1,178 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/20 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| #define HAVE_KERNEL_8 1 | |||
| static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y , FLOAT *dot) __attribute__ ((noinline)); | |||
| static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) | |||
| { | |||
| BLASLONG i = n; | |||
| BLASLONG o16 = 16; | |||
| BLASLONG o32 = 32; | |||
| BLASLONG o48 = 48; | |||
| BLASLONG o64 = 64; | |||
| BLASLONG o80 = 80; | |||
| BLASLONG o96 = 96; | |||
| BLASLONG o112 = 112; | |||
| FLOAT *x1=x; | |||
| FLOAT *y1=y; | |||
| BLASLONG pre = 384; | |||
| __asm__ __volatile__ | |||
| ( | |||
| "xxlxor 32,32,32 \n\t" | |||
| "xxlxor 33,33,33 \n\t" | |||
| "xxlxor 34,34,34 \n\t" | |||
| "xxlxor 35,35,35 \n\t" | |||
| "xxlxor 36,36,36 \n\t" | |||
| "xxlxor 37,37,37 \n\t" | |||
| "xxlxor 38,38,38 \n\t" | |||
| "xxlxor 39,39,39 \n\t" | |||
| "dcbt %2, %12 \n\t" | |||
| "dcbt %3, %12 \n\t" | |||
| "lxvd2x 40, 0, %2 \n\t" | |||
| "lxvd2x 48, 0, %3 \n\t" | |||
| "lxvd2x 41, %5, %2 \n\t" | |||
| "lxvd2x 49, %5, %3 \n\t" | |||
| "lxvd2x 42, %6, %2 \n\t" | |||
| "lxvd2x 50, %6, %3 \n\t" | |||
| "lxvd2x 43, %7, %2 \n\t" | |||
| "lxvd2x 51, %7, %3 \n\t" | |||
| "lxvd2x 44, %8, %2 \n\t" | |||
| "lxvd2x 52, %8, %3 \n\t" | |||
| "lxvd2x 45, %9, %2 \n\t" | |||
| "lxvd2x 53, %9, %3 \n\t" | |||
| "lxvd2x 46, %10, %2 \n\t" | |||
| "lxvd2x 54, %10, %3 \n\t" | |||
| "lxvd2x 47, %11, %2 \n\t" | |||
| "lxvd2x 55, %11, %3 \n\t" | |||
| "addi %2, %2, 128 \n\t" | |||
| "addi %3, %3, 128 \n\t" | |||
| "addic. %0 , %0 , -16 \n\t" | |||
| "ble 2f \n\t" | |||
| ".align 5 \n\t" | |||
| "1: \n\t" | |||
| "dcbt %2, %12 \n\t" | |||
| "dcbt %3, %12 \n\t" | |||
| "xvmaddadp 32, 40, 48 \n\t" | |||
| "lxvd2x 40, 0, %2 \n\t" | |||
| "lxvd2x 48, 0, %3 \n\t" | |||
| "xvmaddadp 33, 41, 49 \n\t" | |||
| "lxvd2x 41, %5, %2 \n\t" | |||
| "lxvd2x 49, %5, %3 \n\t" | |||
| "xvmaddadp 34, 42, 50 \n\t" | |||
| "lxvd2x 42, %6, %2 \n\t" | |||
| "lxvd2x 50, %6, %3 \n\t" | |||
| "xvmaddadp 35, 43, 51 \n\t" | |||
| "lxvd2x 43, %7, %2 \n\t" | |||
| "lxvd2x 51, %7, %3 \n\t" | |||
| "xvmaddadp 36, 44, 52 \n\t" | |||
| "lxvd2x 44, %8, %2 \n\t" | |||
| "lxvd2x 52, %8, %3 \n\t" | |||
| "xvmaddadp 37, 45, 53 \n\t" | |||
| "lxvd2x 45, %9, %2 \n\t" | |||
| "lxvd2x 53, %9, %3 \n\t" | |||
| "xvmaddadp 38, 46, 54 \n\t" | |||
| "lxvd2x 46, %10, %2 \n\t" | |||
| "lxvd2x 54, %10, %3 \n\t" | |||
| "xvmaddadp 39, 47, 55 \n\t" | |||
| "lxvd2x 47, %11, %2 \n\t" | |||
| "lxvd2x 55, %11, %3 \n\t" | |||
| "addi %2, %2, 128 \n\t" | |||
| "addi %3, %3, 128 \n\t" | |||
| "addic. %0 , %0 , -16 \n\t" | |||
| "bgt 1b \n\t" | |||
| "2: \n\t" | |||
| "xvmaddadp 32, 40, 48 \n\t" | |||
| "xvmaddadp 33, 41, 49 \n\t" | |||
| "xvmaddadp 34, 42, 50 \n\t" | |||
| "xvmaddadp 35, 43, 51 \n\t" | |||
| "xvmaddadp 36, 44, 52 \n\t" | |||
| "xvmaddadp 37, 45, 53 \n\t" | |||
| "xvmaddadp 38, 46, 54 \n\t" | |||
| "xvmaddadp 39, 47, 55 \n\t" | |||
| "xvadddp 32, 32, 33 \n\t" | |||
| "xvadddp 34, 34, 35 \n\t" | |||
| "xvadddp 36, 36, 37 \n\t" | |||
| "xvadddp 38, 38, 39 \n\t" | |||
| "xvadddp 32, 32, 34 \n\t" | |||
| "xvadddp 36, 36, 38 \n\t" | |||
| "xvadddp 32, 32, 36 \n\t" | |||
| "xxswapd 33, 32 \n\t" | |||
| "xsadddp 32, 32, 33 \n\t" | |||
| "stxsdx 32, 0, %4 \n\t" | |||
| : | |||
| : | |||
| "r" (i), // 0 | |||
| "r" (n), // 1 | |||
| "r" (x1), // 2 | |||
| "r" (y1), // 3 | |||
| "r" (dot), // 4 | |||
| "r" (o16), // 5 | |||
| "r" (o32), // 6 | |||
| "r" (o48), // 7 | |||
| "r" (o64), // 8 | |||
| "r" (o80), // 9 | |||
| "r" (o96), // 10 | |||
| "r" (o112), // 11 | |||
| "r" (pre) // 12 | |||
| : "cr0", "%0", "%2" , "%3", "memory" | |||
| ); | |||
| } | |||
| @@ -0,0 +1,126 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/21 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #if defined(POWER8) | |||
| #include "sdot_microk_power8.c" | |||
| #endif | |||
| #ifndef HAVE_KERNEL_16 | |||
| static void sdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) | |||
| { | |||
| BLASLONG register i = 0; | |||
| FLOAT dot = 0.0; | |||
| while(i < n) | |||
| { | |||
| dot += y[i] * x[i] | |||
| + y[i+1] * x[i+1] | |||
| + y[i+2] * x[i+2] | |||
| + y[i+3] * x[i+3] | |||
| + y[i+4] * x[i+4] | |||
| + y[i+5] * x[i+5] | |||
| + y[i+6] * x[i+6] | |||
| + y[i+7] * x[i+7] ; | |||
| i+=8 ; | |||
| } | |||
| *d += dot; | |||
| } | |||
| #endif | |||
| FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i=0; | |||
| BLASLONG ix=0,iy=0; | |||
| FLOAT dot = 0.0 ; | |||
| if ( n <= 0 ) return(dot); | |||
| if ( (inc_x == 1) && (inc_y == 1) ) | |||
| { | |||
| BLASLONG n1 = n & -32; | |||
| if ( n1 ) | |||
| sdot_kernel_16(n1, x, y , &dot ); | |||
| i = n1; | |||
| while(i < n) | |||
| { | |||
| dot += y[i] * x[i] ; | |||
| i++ ; | |||
| } | |||
| return(dot); | |||
| } | |||
| BLASLONG n1 = n & -2; | |||
| while(i < n1) | |||
| { | |||
| dot += y[iy] * x[ix] + y[iy+inc_y] * x[ix+inc_x]; | |||
| ix += inc_x*2 ; | |||
| iy += inc_y*2 ; | |||
| i+=2 ; | |||
| } | |||
| while(i < n) | |||
| { | |||
| dot += y[iy] * x[ix] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| return(dot); | |||
| } | |||
| @@ -0,0 +1,179 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/21 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| #define HAVE_KERNEL_16 1 | |||
| static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y , FLOAT *dot) __attribute__ ((noinline)); | |||
| static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) | |||
| { | |||
| BLASLONG i = n; | |||
| BLASLONG o16 = 16; | |||
| BLASLONG o32 = 32; | |||
| BLASLONG o48 = 48; | |||
| BLASLONG o64 = 64; | |||
| BLASLONG o80 = 80; | |||
| BLASLONG o96 = 96; | |||
| BLASLONG o112 = 112; | |||
| FLOAT *x1=x; | |||
| FLOAT *y1=y; | |||
| BLASLONG pre = 384; | |||
| FLOAT tempdot[4]; | |||
| __asm__ __volatile__ | |||
| ( | |||
| "xxlxor 32,32,32 \n\t" | |||
| "xxlxor 33,33,33 \n\t" | |||
| "xxlxor 34,34,34 \n\t" | |||
| "xxlxor 35,35,35 \n\t" | |||
| "xxlxor 36,36,36 \n\t" | |||
| "xxlxor 37,37,37 \n\t" | |||
| "xxlxor 38,38,38 \n\t" | |||
| "xxlxor 39,39,39 \n\t" | |||
| "dcbt %2, %12 \n\t" | |||
| "dcbt %3, %12 \n\t" | |||
| "lxvw4x 40, 0, %2 \n\t" | |||
| "lxvw4x 48, 0, %3 \n\t" | |||
| "lxvw4x 41, %5, %2 \n\t" | |||
| "lxvw4x 49, %5, %3 \n\t" | |||
| "lxvw4x 42, %6, %2 \n\t" | |||
| "lxvw4x 50, %6, %3 \n\t" | |||
| "lxvw4x 43, %7, %2 \n\t" | |||
| "lxvw4x 51, %7, %3 \n\t" | |||
| "lxvw4x 44, %8, %2 \n\t" | |||
| "lxvw4x 52, %8, %3 \n\t" | |||
| "lxvw4x 45, %9, %2 \n\t" | |||
| "lxvw4x 53, %9, %3 \n\t" | |||
| "lxvw4x 46, %10, %2 \n\t" | |||
| "lxvw4x 54, %10, %3 \n\t" | |||
| "lxvw4x 47, %11, %2 \n\t" | |||
| "lxvw4x 55, %11, %3 \n\t" | |||
| "addi %2, %2, 128 \n\t" | |||
| "addi %3, %3, 128 \n\t" | |||
| "addic. %0 , %0 , -32 \n\t" | |||
| "ble 2f \n\t" | |||
| ".align 5 \n\t" | |||
| "1: \n\t" | |||
| "dcbt %2, %12 \n\t" | |||
| "dcbt %3, %12 \n\t" | |||
| "xvmaddasp 32, 40, 48 \n\t" | |||
| "lxvw4x 40, 0, %2 \n\t" | |||
| "lxvw4x 48, 0, %3 \n\t" | |||
| "xvmaddasp 33, 41, 49 \n\t" | |||
| "lxvw4x 41, %5, %2 \n\t" | |||
| "lxvw4x 49, %5, %3 \n\t" | |||
| "xvmaddasp 34, 42, 50 \n\t" | |||
| "lxvw4x 42, %6, %2 \n\t" | |||
| "lxvw4x 50, %6, %3 \n\t" | |||
| "xvmaddasp 35, 43, 51 \n\t" | |||
| "lxvw4x 43, %7, %2 \n\t" | |||
| "lxvw4x 51, %7, %3 \n\t" | |||
| "xvmaddasp 36, 44, 52 \n\t" | |||
| "lxvw4x 44, %8, %2 \n\t" | |||
| "lxvw4x 52, %8, %3 \n\t" | |||
| "xvmaddasp 37, 45, 53 \n\t" | |||
| "lxvw4x 45, %9, %2 \n\t" | |||
| "lxvw4x 53, %9, %3 \n\t" | |||
| "xvmaddasp 38, 46, 54 \n\t" | |||
| "lxvw4x 46, %10, %2 \n\t" | |||
| "lxvw4x 54, %10, %3 \n\t" | |||
| "xvmaddasp 39, 47, 55 \n\t" | |||
| "lxvw4x 47, %11, %2 \n\t" | |||
| "lxvw4x 55, %11, %3 \n\t" | |||
| "addi %2, %2, 128 \n\t" | |||
| "addi %3, %3, 128 \n\t" | |||
| "addic. %0 , %0 , -32 \n\t" | |||
| "bgt 1b \n\t" | |||
| "2: \n\t" | |||
| "xvmaddasp 32, 40, 48 \n\t" | |||
| "xvmaddasp 33, 41, 49 \n\t" | |||
| "xvmaddasp 34, 42, 50 \n\t" | |||
| "xvmaddasp 35, 43, 51 \n\t" | |||
| "xvmaddasp 36, 44, 52 \n\t" | |||
| "xvmaddasp 37, 45, 53 \n\t" | |||
| "xvmaddasp 38, 46, 54 \n\t" | |||
| "xvmaddasp 39, 47, 55 \n\t" | |||
| "xvaddsp 32, 32 , 33 \n\t" | |||
| "xvaddsp 34, 34 , 35 \n\t" | |||
| "xvaddsp 36, 36 , 37 \n\t" | |||
| "xvaddsp 38, 38 , 39 \n\t" | |||
| "xvaddsp 32, 32 , 34 \n\t" | |||
| "xvaddsp 36, 36 , 38 \n\t" | |||
| "xvaddsp 32, 32 , 36 \n\t" | |||
| "stxvw4x 32, 0 , %4 \n\t" | |||
| : | |||
| : | |||
| "r" (i), // 0 | |||
| "r" (n), // 1 | |||
| "r" (x1), // 2 | |||
| "r" (y1), // 3 | |||
| "r" (tempdot), // 4 | |||
| "r" (o16), // 5 | |||
| "r" (o32), // 6 | |||
| "r" (o48), // 7 | |||
| "r" (o64), // 8 | |||
| "r" (o80), // 9 | |||
| "r" (o96), // 10 | |||
| "r" (o112), // 11 | |||
| "r" (pre) // 12 | |||
| : "cr0", "%0", "%2" , "%3", "memory" | |||
| ); | |||
| *dot = tempdot[0] + tempdot[1] + tempdot[2] + tempdot[3]; | |||
| } | |||
| @@ -0,0 +1,167 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/21 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| #include "common.h" | |||
| #include <complex.h> | |||
| #if defined(POWER8) | |||
| #include "zdot_microk_power8.c" | |||
| #endif | |||
| #ifndef HAVE_KERNEL_8 | |||
| static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) __attribute__ ((noinline)); | |||
| static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) | |||
| { | |||
| BLASLONG register i = 0; | |||
| FLOAT dot[4] = { 0.0, 0.0, 0.0, 0.0 }; | |||
| BLASLONG j=0; | |||
| while( i < n ) | |||
| { | |||
| dot[0] += x[j] * y[j] ; | |||
| dot[1] += x[j+1] * y[j+1] ; | |||
| dot[2] += x[j] * y[j+1] ; | |||
| dot[3] += x[j+1] * y[j] ; | |||
| dot[0] += x[j+2] * y[j+2] ; | |||
| dot[1] += x[j+3] * y[j+3] ; | |||
| dot[2] += x[j+2] * y[j+3] ; | |||
| dot[3] += x[j+3] * y[j+2] ; | |||
| dot[0] += x[j+4] * y[j+4] ; | |||
| dot[1] += x[j+5] * y[j+5] ; | |||
| dot[2] += x[j+4] * y[j+5] ; | |||
| dot[3] += x[j+5] * y[j+4] ; | |||
| dot[0] += x[j+6] * y[j+6] ; | |||
| dot[1] += x[j+7] * y[j+7] ; | |||
| dot[2] += x[j+6] * y[j+7] ; | |||
| dot[3] += x[j+7] * y[j+6] ; | |||
| j+=8; | |||
| i+=4; | |||
| } | |||
| d[0] = dot[0]; | |||
| d[1] = dot[1]; | |||
| d[2] = dot[2]; | |||
| d[3] = dot[3]; | |||
| } | |||
| #endif | |||
| FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
| { | |||
| BLASLONG i; | |||
| BLASLONG ix,iy; | |||
| FLOAT _Complex result; | |||
| FLOAT dot[4] = { 0.0, 0.0, 0.0 , 0.0 } ; | |||
| if ( n <= 0 ) | |||
| { | |||
| __real__ result = 0.0 ; | |||
| __imag__ result = 0.0 ; | |||
| return(result); | |||
| } | |||
| if ( (inc_x == 1) && (inc_y == 1) ) | |||
| { | |||
| BLASLONG n1 = n & -8; | |||
| if ( n1 ) | |||
| zdot_kernel_8(n1, x, y , dot ); | |||
| i = n1; | |||
| BLASLONG j = i * 2; | |||
| while( i < n ) | |||
| { | |||
| dot[0] += x[j] * y[j] ; | |||
| dot[1] += x[j+1] * y[j+1] ; | |||
| dot[2] += x[j] * y[j+1] ; | |||
| dot[3] += x[j+1] * y[j] ; | |||
| j+=2; | |||
| i++ ; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| i=0; | |||
| ix=0; | |||
| iy=0; | |||
| inc_x <<= 1; | |||
| inc_y <<= 1; | |||
| while(i < n) | |||
| { | |||
| dot[0] += x[ix] * y[iy] ; | |||
| dot[1] += x[ix+1] * y[iy+1] ; | |||
| dot[2] += x[ix] * y[iy+1] ; | |||
| dot[3] += x[ix+1] * y[iy] ; | |||
| ix += inc_x ; | |||
| iy += inc_y ; | |||
| i++ ; | |||
| } | |||
| } | |||
| #if !defined(CONJ) | |||
| __real__ result = dot[0] - dot[1]; | |||
| __imag__ result = dot[2] + dot[3]; | |||
| #else | |||
| __real__ result = dot[0] + dot[1]; | |||
| __imag__ result = dot[2] - dot[3]; | |||
| #endif | |||
| return(result); | |||
| } | |||
| @@ -0,0 +1,219 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/21 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| #define HAVE_KERNEL_8 1 | |||
| static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y , FLOAT *dot) __attribute__ ((noinline)); | |||
| static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) | |||
| { | |||
| BLASLONG i = n; | |||
| BLASLONG o16 = 16; | |||
| BLASLONG o32 = 32; | |||
| BLASLONG o48 = 48; | |||
| FLOAT *x1=x; | |||
| FLOAT *y1=y; | |||
| BLASLONG pre = 384; | |||
| __asm__ __volatile__ | |||
| ( | |||
| "xxlxor 32,32,32 \n\t" | |||
| "xxlxor 33,33,33 \n\t" | |||
| "xxlxor 34,34,34 \n\t" | |||
| "xxlxor 35,35,35 \n\t" | |||
| "xxlxor 36,36,36 \n\t" | |||
| "xxlxor 37,37,37 \n\t" | |||
| "xxlxor 38,38,38 \n\t" | |||
| "xxlxor 39,39,39 \n\t" | |||
| "dcbt %2, %8 \n\t" | |||
| "dcbt %3, %8 \n\t" | |||
| "lxvd2x 40, 0, %2 \n\t" // x0_r, x0_i | |||
| "lxvd2x 48, 0, %3 \n\t" // y0_r, y0_i | |||
| "lxvd2x 41, %5, %2 \n\t" // x1_r, x1_i | |||
| "lxvd2x 49, %5, %3 \n\t" // y1_r, y1_i | |||
| "lxvd2x 42, %6, %2 \n\t" // x2_r, x2_i | |||
| "lxvd2x 50, %6, %3 \n\t" // y2_r, y2_i | |||
| "lxvd2x 43, %7, %2 \n\t" // x3_r, x3_i | |||
| "lxvd2x 51, %7, %3 \n\t" // y3_r, y3_i | |||
| "xxswapd 52,48 \n\t" // y0_i, y0_r | |||
| "xxswapd 53,49 \n\t" // y1_i, y1_r | |||
| "xxswapd 54,50 \n\t" // y2_i, y2_r | |||
| "xxswapd 55,51 \n\t" // y3_i, y3_r | |||
| "addi %2, %2, 64 \n\t" | |||
| "addi %3, %3, 64 \n\t" | |||
| "lxvd2x 44, 0, %2 \n\t" // x0_r, x0_i | |||
| "lxvd2x 56, 0, %3 \n\t" // y0_r, y0_i | |||
| "lxvd2x 45, %5, %2 \n\t" // x1_r, x1_i | |||
| "lxvd2x 57, %5, %3 \n\t" // y1_r, y1_i | |||
| "lxvd2x 46, %6, %2 \n\t" // x2_r, x2_i | |||
| "lxvd2x 58, %6, %3 \n\t" // y2_r, y2_i | |||
| "lxvd2x 47, %7, %2 \n\t" // x3_r, x3_i | |||
| "lxvd2x 59, %7, %3 \n\t" // y3_r, y3_i | |||
| "xxswapd 60,56 \n\t" // y0_i, y0_r | |||
| "xxswapd 61,57 \n\t" // y1_i, y1_r | |||
| "xxswapd 62,58 \n\t" // y2_i, y2_r | |||
| "xxswapd 63,59 \n\t" // y3_i, y3_r | |||
| "addi %2, %2, 64 \n\t" | |||
| "addi %3, %3, 64 \n\t" | |||
| "addic. %0 , %0 , -8 \n\t" | |||
| "ble 2f \n\t" | |||
| ".align 5 \n\t" | |||
| "1: \n\t" | |||
| "dcbt %2, %8 \n\t" | |||
| "dcbt %3, %8 \n\t" | |||
| "xvmaddadp 32, 40, 48 \n\t" // x0_r * y0_r , x0_i * y0_i | |||
| "lxvd2x 48, 0, %3 \n\t" // y0_r, y0_i | |||
| "xvmaddadp 34, 41, 49 \n\t" // x1_r * y1_r , x1_i * y1_i | |||
| "lxvd2x 49, %5, %3 \n\t" // y1_r, y1_i | |||
| "xvmaddadp 36, 42, 50 \n\t" // x2_r * y2_r , x2_i * y2_i | |||
| "lxvd2x 50, %6, %3 \n\t" // y2_r, y2_i | |||
| "xvmaddadp 38, 43, 51 \n\t" // x3_r * y3_r , x3_i * y3_i | |||
| "lxvd2x 51, %7, %3 \n\t" // y3_r, y3_i | |||
| "xvmaddadp 33, 40, 52 \n\t" // x0_r * y0_i , x0_i * y0_r | |||
| "lxvd2x 40, 0, %2 \n\t" // x0_r, x0_i | |||
| "xvmaddadp 35, 41, 53 \n\t" // x1_r * y1_i , x1_i * y1_r | |||
| "lxvd2x 41, %5, %2 \n\t" // x1_r, x1_i | |||
| "xvmaddadp 37, 42, 54 \n\t" // x2_r * y2_i , x2_i * y2_r | |||
| "lxvd2x 42, %6, %2 \n\t" // x2_r, x2_i | |||
| "xvmaddadp 39, 43, 55 \n\t" // x3_r * y3_i , x3_i * y3_r | |||
| "lxvd2x 43, %7, %2 \n\t" // x3_r, x3_i | |||
| "xxswapd 52,48 \n\t" // y0_i, y0_r | |||
| "xxswapd 53,49 \n\t" // y1_i, y1_r | |||
| "addi %2, %2, 64 \n\t" | |||
| "addi %3, %3, 64 \n\t" | |||
| "xxswapd 54,50 \n\t" // y2_i, y2_r | |||
| "xxswapd 55,51 \n\t" // y3_i, y3_r | |||
| "xvmaddadp 32, 44, 56 \n\t" // x0_r * y0_r , x0_i * y0_i | |||
| "lxvd2x 56, 0, %3 \n\t" // y0_r, y0_i | |||
| "xvmaddadp 34, 45, 57 \n\t" // x1_r * y1_r , x1_i * y1_i | |||
| "lxvd2x 57, %5, %3 \n\t" // y1_r, y1_i | |||
| "xvmaddadp 36, 46, 58 \n\t" // x2_r * y2_r , x2_i * y2_i | |||
| "lxvd2x 58, %6, %3 \n\t" // y2_r, y2_i | |||
| "xvmaddadp 38, 47, 59 \n\t" // x3_r * y3_r , x3_i * y3_i | |||
| "lxvd2x 59, %7, %3 \n\t" // y3_r, y3_i | |||
| "xvmaddadp 33, 44, 60 \n\t" // x0_r * y0_i , x0_i * y0_r | |||
| "lxvd2x 44, 0, %2 \n\t" // x0_r, x0_i | |||
| "xvmaddadp 35, 45, 61 \n\t" // x1_r * y1_i , x1_i * y1_r | |||
| "lxvd2x 45, %5, %2 \n\t" // x1_r, x1_i | |||
| "xvmaddadp 37, 46, 62 \n\t" // x2_r * y2_i , x2_i * y2_r | |||
| "lxvd2x 46, %6, %2 \n\t" // x2_r, x2_i | |||
| "xvmaddadp 39, 47, 63 \n\t" // x3_r * y3_i , x3_i * y3_r | |||
| "lxvd2x 47, %7, %2 \n\t" // x3_r, x3_i | |||
| "xxswapd 60,56 \n\t" // y0_i, y0_r | |||
| "xxswapd 61,57 \n\t" // y1_i, y1_r | |||
| "addi %2, %2, 64 \n\t" | |||
| "addi %3, %3, 64 \n\t" | |||
| "xxswapd 62,58 \n\t" // y2_i, y2_r | |||
| "xxswapd 63,59 \n\t" // y3_i, y3_r | |||
| "addic. %0 , %0 , -8 \n\t" | |||
| "bgt 1b \n\t" | |||
| "2: \n\t" | |||
| "xvmaddadp 32, 40, 48 \n\t" // x0_r * y0_r , x0_i * y0_i | |||
| "xvmaddadp 34, 41, 49 \n\t" // x1_r * y1_r , x1_i * y1_i | |||
| "xvmaddadp 36, 42, 50 \n\t" // x2_r * y2_r , x2_i * y2_i | |||
| "xvmaddadp 38, 43, 51 \n\t" // x3_r * y3_r , x3_i * y3_i | |||
| "xvmaddadp 33, 40, 52 \n\t" // x0_r * y0_i , x0_i * y0_r | |||
| "xvmaddadp 35, 41, 53 \n\t" // x1_r * y1_i , x1_i * y1_r | |||
| "xvmaddadp 37, 42, 54 \n\t" // x2_r * y2_i , x2_i * y2_r | |||
| "xvmaddadp 39, 43, 55 \n\t" // x3_r * y3_i , x3_i * y3_r | |||
| "xvmaddadp 32, 44, 56 \n\t" // x0_r * y0_r , x0_i * y0_i | |||
| "xvmaddadp 34, 45, 57 \n\t" // x1_r * y1_r , x1_i * y1_i | |||
| "xvmaddadp 36, 46, 58 \n\t" // x2_r * y2_r , x2_i * y2_i | |||
| "xvmaddadp 38, 47, 59 \n\t" // x3_r * y3_r , x3_i * y3_i | |||
| "xvmaddadp 33, 44, 60 \n\t" // x0_r * y0_i , x0_i * y0_r | |||
| "xvmaddadp 35, 45, 61 \n\t" // x1_r * y1_i , x1_i * y1_r | |||
| "xvmaddadp 37, 46, 62 \n\t" // x2_r * y2_i , x2_i * y2_r | |||
| "xvmaddadp 39, 47, 63 \n\t" // x3_r * y3_i , x3_i * y3_r | |||
| "xvadddp 32, 32, 34 \n\t" | |||
| "xvadddp 36, 36, 38 \n\t" | |||
| "xvadddp 33, 33, 35 \n\t" | |||
| "xvadddp 37, 37, 39 \n\t" | |||
| "xvadddp 32, 32, 36 \n\t" | |||
| "xvadddp 33, 33, 37 \n\t" | |||
| "stxvd2x 32, 0, %4 \n\t" | |||
| "stxvd2x 33, %5, %4 \n\t" | |||
| : | |||
| : | |||
| "r" (i), // 0 | |||
| "r" (n), // 1 | |||
| "r" (x1), // 2 | |||
| "r" (y1), // 3 | |||
| "r" (dot), // 4 | |||
| "r" (o16), // 5 | |||
| "r" (o32), // 6 | |||
| "r" (o48), // 7 | |||
| "r" (pre) // 8 | |||
| : "cr0", "%0", "%2" , "%3", "memory" | |||
| ); | |||
| } | |||