| @@ -1,7 +1,7 @@ | |||||
| ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15)) | ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15)) | ||||
| ifeq ($(OSNAME), Android) | ifeq ($(OSNAME), Android) | ||||
| CCOMMON_OPT += -mfpu=neon | |||||
| FCOMMON_OPT += -mfpu=neon | |||||
| CCOMMON_OPT += -mfpu=neon -march=armv7-a | |||||
| FCOMMON_OPT += -mfpu=neon -march=armv7-a | |||||
| else | else | ||||
| CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | ||||
| FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | ||||
| @@ -47,7 +47,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) | ||||
| endif () | endif () | ||||
| if (DEFINED ${float_char}MINKERNEL) | if (DEFINED ${float_char}MINKERNEL) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "USE_MIN" "min_k" false "" "" false ${float_type}) | |||||
| endif () | endif () | ||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) | ||||
| @@ -55,7 +55,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) | ||||
| endif () | endif () | ||||
| if (DEFINED I${float_char}MINKERNEL) | if (DEFINED I${float_char}MINKERNEL) | ||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "USE_MIN" "i*min_k" false "" "" false ${float_type}) | |||||
| endif () | endif () | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) | ||||
| @@ -171,7 +171,7 @@ IXAMAXKERNEL = izamax.S | |||||
| endif | endif | ||||
| ifndef ISAMINKERNEL | ifndef ISAMINKERNEL | ||||
| ISAMINKERNEL = iamax.S | |||||
| ISAMINKERNEL = iamax_sse.S | |||||
| endif | endif | ||||
| ifndef IDAMINKERNEL | ifndef IDAMINKERNEL | ||||
| @@ -207,7 +207,7 @@ IQMAXKERNEL = iamax.S | |||||
| endif | endif | ||||
| ifndef ISMINKERNEL | ifndef ISMINKERNEL | ||||
| ISMINKERNEL = iamax.S | |||||
| ISMINKERNEL = iamax_sse.S | |||||
| endif | endif | ||||
| ifndef IDMINKERNEL | ifndef IDMINKERNEL | ||||
| @@ -36,10 +36,6 @@ | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| /* This kernel was found to give wrong results when used for ISMIN/ISAMIN | |||||
| with increment != 1, although it appears to be correct for corresponding | |||||
| MAX operations. See issue 2116 */ | |||||
| #define ASSEMBLER | #define ASSEMBLER | ||||
| #include "common.h" | #include "common.h" | ||||
| @@ -59,6 +55,15 @@ | |||||
| #define MAXSS minss | #define MAXSS minss | ||||
| #endif | #endif | ||||
| .macro LOAD_AND_COMPARE_TO_MXX REG | |||||
| movss 0 * SIZE(X), \REG | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, \REG | |||||
| #endif | |||||
| cmpeqss %xmm0, \REG | |||||
| .endm | |||||
| #include "l1param.h" | #include "l1param.h" | ||||
| PROLOGUE | PROLOGUE | ||||
| @@ -830,61 +835,14 @@ | |||||
| ALIGN_4 | ALIGN_4 | ||||
| .L93: | .L93: | ||||
| movss 0 * SIZE(X), %xmm1 | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, %xmm1 | |||||
| #endif | |||||
| cmpeqss %xmm0, %xmm1 | |||||
| movss 0 * SIZE(X), %xmm2 | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, %xmm2 | |||||
| #endif | |||||
| cmpeqss %xmm0, %xmm2 | |||||
| movss 0 * SIZE(X), %xmm3 | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, %xmm3 | |||||
| #endif | |||||
| cmpeqss %xmm0, %xmm3 | |||||
| movss 0 * SIZE(X), %xmm4 | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, %xmm4 | |||||
| #endif | |||||
| cmpeqss %xmm0, %xmm4 | |||||
| movss 0 * SIZE(X), %xmm5 | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, %xmm5 | |||||
| #endif | |||||
| cmpeqps %xmm0, %xmm5 | |||||
| movss 0 * SIZE(X), %xmm6 | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, %xmm6 | |||||
| #endif | |||||
| cmpeqss %xmm0, %xmm6 | |||||
| movss 0 * SIZE(X), %xmm7 | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, %xmm7 | |||||
| #endif | |||||
| cmpeqss %xmm0, %xmm7 | |||||
| movss 0 * SIZE(X), %xmm8 | |||||
| addq INCX, X | |||||
| #ifdef USE_ABS | |||||
| andps %xmm15, %xmm8 | |||||
| #endif | |||||
| cmpeqss %xmm0, %xmm8 | |||||
| LOAD_AND_COMPARE_TO_MXX %xmm1 | |||||
| LOAD_AND_COMPARE_TO_MXX %xmm2 | |||||
| LOAD_AND_COMPARE_TO_MXX %xmm3 | |||||
| LOAD_AND_COMPARE_TO_MXX %xmm4 | |||||
| LOAD_AND_COMPARE_TO_MXX %xmm5 | |||||
| LOAD_AND_COMPARE_TO_MXX %xmm6 | |||||
| LOAD_AND_COMPARE_TO_MXX %xmm7 | |||||
| LOAD_AND_COMPARE_TO_MXX %xmm8 | |||||
| orps %xmm2, %xmm1 | orps %xmm2, %xmm1 | ||||
| orps %xmm4, %xmm3 | orps %xmm4, %xmm3 | ||||
| @@ -7,6 +7,7 @@ else () | |||||
| set(OpenBLAS_utest_src | set(OpenBLAS_utest_src | ||||
| utest_main.c | utest_main.c | ||||
| test_amax.c | test_amax.c | ||||
| test_ismin.c | |||||
| test_rotmg.c | test_rotmg.c | ||||
| test_rot.c | test_rot.c | ||||
| test_axpy.c | test_axpy.c | ||||
| @@ -11,7 +11,7 @@ UTESTBIN=openblas_utest | |||||
| include $(TOPDIR)/Makefile.system | include $(TOPDIR)/Makefile.system | ||||
| OBJS=utest_main.o test_amax.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o | |||||
| OBJS=utest_main.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o | |||||
| #test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o | #test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o | ||||
| ifneq ($(NO_LAPACK), 1) | ifneq ($(NO_LAPACK), 1) | ||||
| @@ -0,0 +1,89 @@ | |||||
| /***************************************************************************** | |||||
| Copyright (c) 2020, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written | |||||
| permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| **********************************************************************************/ | |||||
| #include "openblas_utest.h" | |||||
| #define ELEMENTS 50 | |||||
| #define INCREMENT 2 | |||||
| CTEST(ismin, positive_step_2){ | |||||
| blasint i; | |||||
| blasint N = ELEMENTS, inc = INCREMENT; | |||||
| float x[ELEMENTS * INCREMENT]; | |||||
| for (i = 0; i < N * inc; i ++) { | |||||
| x[i] = i + 1000; | |||||
| } | |||||
| x[8 * inc] = 0; | |||||
| blasint index = BLASFUNC(ismin)(&N, x, &inc); | |||||
| ASSERT_EQUAL(9, index); | |||||
| } | |||||
| CTEST(ismin, negative_step_2){ | |||||
| blasint i; | |||||
| blasint N = ELEMENTS, inc = INCREMENT; | |||||
| float x[ELEMENTS * INCREMENT]; | |||||
| for (i = 0; i < N * inc; i ++) { | |||||
| x[i] = - i - 1000; | |||||
| } | |||||
| x[8 * inc] = -123456.0f; | |||||
| blasint index = BLASFUNC(ismin)(&N, x, &inc); | |||||
| ASSERT_EQUAL(9, index); | |||||
| } | |||||
| CTEST(ismax, positive_step_2){ | |||||
| blasint i; | |||||
| blasint N = ELEMENTS, inc = INCREMENT; | |||||
| float x[ELEMENTS * INCREMENT]; | |||||
| for (i = 0; i < N * inc; i ++) { | |||||
| x[i] = i + 1000; | |||||
| } | |||||
| x[8 * inc] = 123456.0f; | |||||
| blasint index = BLASFUNC(ismax)(&N, x, &inc); | |||||
| ASSERT_EQUAL(9, index); | |||||
| } | |||||
| CTEST(ismax, negative_step_2){ | |||||
| blasint i; | |||||
| blasint N = ELEMENTS, inc = INCREMENT; | |||||
| float x[ELEMENTS * INCREMENT]; | |||||
| for (i = 0; i < N * inc; i ++) { | |||||
| x[i] = - i - 1000; | |||||
| } | |||||
| x[8 * inc] = 0; | |||||
| blasint index = BLASFUNC(ismax)(&N, x, &inc); | |||||
| ASSERT_EQUAL(9, index); | |||||
| } | |||||