From bfaf5b9ea442633ca5e3c6968c375b933b1794ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Wed, 20 Nov 2024 11:41:52 +0100 Subject: [PATCH 1/5] Restore libsuffix support in pkg-config file It had been mistakenly removed in 9ef10ffa496b919c25aedbb4aa2fdb930901475a. --- openblas.pc.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openblas.pc.in b/openblas.pc.in index d9bb84549..7632645ac 100644 --- a/openblas.pc.in +++ b/openblas.pc.in @@ -2,6 +2,6 @@ Name: openblas Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version Version: ${version} URL: https://github.com/xianyi/OpenBLAS -Libs: -L${libdir} -l${libprefix}openblas${libnamesuffix} +Libs: -L${libdir} -l${libprefix}openblas${libnamesuffix}${libsuffix} Libs.private: ${extralib} Cflags: -I${includedir} ${omp_opt} From fff2e214caee6e516ba1e49de81e9044d46b5a2e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 30 Dec 2024 23:05:17 +0100 Subject: [PATCH 2/5] Add LAPACK-TEST errors topic --- docs/faq.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/faq.md b/docs/faq.md index 699042d51..1a3505ca9 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -51,9 +51,9 @@ In practice, the values are derived by experimentation to yield the block sizes ### How can I report a bug? -Please file an issue at this [issue page](https://github.com/xianyi/OpenBLAS/issues) or send mail to the [OpenBLAS mailing list](https://groups.google.com/forum/#!forum/openblas-users). +Please file an issue at this [issue page](https://github.com/OpenMathLib/OpenBLAS/issues) or send mail to the [OpenBLAS mailing list](https://groups.google.com/forum/#!forum/openblas-users). -Please provide the following information: CPU, OS, compiler, and OpenBLAS compiling flags (Makefile.rule). In addition, please describe how to reproduce this bug. +Please provide the following information: CPU, OS, compiler, OpenBLAS version and any compiling flags you used (Makefile.rule). In addition, please describe how to reproduce this bug. ### How to reference OpenBLAS. @@ -105,7 +105,7 @@ Please read [this page](install.md#visual-studio). Zaheer has fixed this bug. You can now use the structure instead of C99 complex numbers. Please read [this issue page](http://github.com/xianyi/OpenBLAS/issues/95) for details. -[This issue](https://github.com/xianyi/OpenBLAS/issues/305) is for using LAPACKE in Visual Studio. +[This issue](https://github.com/OpenMathLib/OpenBLAS/issues/305) is for using LAPACKE in Visual Studio. ### I get a SEGFAULT with multi-threading on Linux. What's wrong? @@ -134,6 +134,13 @@ Background: OpenBLAS implements optimized versions of some LAPACK functions, so Some of the LAPACK tests, notably in xeigtstz, try to allocate around 10MB on the stack. You may need to use `ulimit -s` to change the default limits on your system to allow this. +### My build worked fine and passed the BLAS tests, but running `make lapack-test` ends with a number of errors in the summary report + +The LAPACK tests were primarily created to test the validity of the Reference-LAPACK implementation, which is implemented in unoptimized, single-threaded Fortran code. This makes it very sensitive to small numerical deviations that can result from the use of specialized cpu instructions that combine multiplications and additions without intermediate rounding and storing to memory (FMA), or from changing the order of mathematical operations by splitting an original problem workload into smaller tasks that are solved in parallel. As a result, you may encounter a small number of errors in the "numerical" column of +the summary table at the end of the `make lapack-test` run - this is usually nothing to worry about, and the exact number and distribution of errors among the +four data types will often vary with the optimization flags you supplied to the compiler, or the cpu model for which you built OpenBLAS. Sporadic errors in the column labeled `other` are normally the sign of failed convergence of iterative diagonalizations for the same reasons just mentioned. A more detailed error report is stored in the file testing_results.txt - this should be consulted in case of doubt. Care should be taken if you encounter numerical errors in the hundreds, or `other` errors accompanied by the LAPACK error message "on entry to function_name parameter X had an illegal value" that signals a problem with argument passing between individual functions. +(See also [this issue](https://github.com/OpenMathLib/OpenBLAS/issues/4032) in the issue tracker on github for additional discussion, examples and links) + ### How could I disable OpenBLAS threading affinity on runtime? You can define the OPENBLAS_MAIN_FREE or GOTOBLAS_MAIN_FREE environment variable to disable threading affinity on runtime. For example, before the running, From c37509c213a34a8cae449ededd7bc7064675ecc4 Mon Sep 17 00:00:00 2001 From: "tingbo.liao" Date: Tue, 31 Dec 2024 08:46:55 +0800 Subject: [PATCH 3/5] Optimize the nrm2_rvv function to further improve performance. Signed-off-by: tingbo.liao --- kernel/riscv64/nrm2_rvv.c | 370 +++++++++++++++++++++----------------- 1 file changed, 204 insertions(+), 166 deletions(-) diff --git a/kernel/riscv64/nrm2_rvv.c b/kernel/riscv64/nrm2_rvv.c index 14ed68b0a..472b1148e 100644 --- a/kernel/riscv64/nrm2_rvv.c +++ b/kernel/riscv64/nrm2_rvv.c @@ -27,185 +27,223 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(DOUBLE) -#define VSETVL __riscv_vsetvl_e64m4 -#define FLOAT_V_T vfloat64m4_t -#define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVSF_FLOAT __riscv_vfmv_s_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define MASK_T vbool16_t -#define VFABS __riscv_vfabs_v_f64m4 -#define VMFNE __riscv_vmfne_vf_f64m4_b16 -#define VMFGT __riscv_vmfgt_vv_f64m4_b16 -#define VMFEQ __riscv_vmfeq_vf_f64m4_b16 -#define VCPOP __riscv_vcpop_m_b16 -#define VFREDMAX __riscv_vfredmax_vs_f64m4_f64m1 -#define VFREDMIN __riscv_vfredmin_vs_f64m4_f64m1 -#define VFIRST __riscv_vfirst_m_b16 -#define VRGATHER __riscv_vrgather_vx_f64m4 -#define VFDIV __riscv_vfdiv_vv_f64m4 -#define VFDIV_M __riscv_vfdiv_vv_f64m4_mu -#define VFMUL __riscv_vfmul_vv_f64m4 -#define VFMUL_M __riscv_vfmul_vv_f64m4_mu -#define VFMACC __riscv_vfmacc_vv_f64m4 -#define VFMACC_M __riscv_vfmacc_vv_f64m4_mu -#define VMSBF __riscv_vmsbf_m_b16 -#define VMSOF __riscv_vmsof_m_b16 -#define VMAND __riscv_vmand_mm_b16 -#define VMANDN __riscv_vmand_mm_b16 -#define VFREDSUM __riscv_vfredusum_vs_f64m4_f64m1 -#define VMERGE __riscv_vmerge_vvm_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define EXTRACT_FLOAT0_V(v) __riscv_vfmv_f_s_f64m4_f64(v) -#define ABS fabs -#else -#define VSETVL __riscv_vsetvl_e32m4 +#if !defined(DOUBLE) +#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL_MAX __riscv_vsetvlmax_e32m4() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t +#define MASK_T vbool8_t #define VLEV_FLOAT __riscv_vle32_v_f32m4 #define VLSEV_FLOAT __riscv_vlse32_v_f32m4 +#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1_tu +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVSF_FLOAT __riscv_vfmv_s_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define MASK_T vbool8_t -#define VFABS __riscv_vfabs_v_f32m4 -#define VMFNE __riscv_vmfne_vf_f32m4_b8 -#define VMFGT __riscv_vmfgt_vv_f32m4_b8 -#define VMFEQ __riscv_vmfeq_vf_f32m4_b8 -#define VCPOP __riscv_vcpop_m_b8 -#define VFREDMAX __riscv_vfredmax_vs_f32m4_f32m1 -#define VFREDMIN __riscv_vfredmin_vs_f32m4_f32m1 -#define VFIRST __riscv_vfirst_m_b8 -#define VRGATHER __riscv_vrgather_vx_f32m4 -#define VFDIV __riscv_vfdiv_vv_f32m4 -#define VFDIV_M __riscv_vfdiv_vv_f32m4_mu -#define VFMUL __riscv_vfmul_vv_f32m4 -#define VFMUL_M __riscv_vfmul_vv_f32m4_mu -#define VFMACC __riscv_vfmacc_vv_f32m4 -#define VFMACC_M __riscv_vfmacc_vv_f32m4_mu -#define VMSBF __riscv_vmsbf_m_b8 -#define VMSOF __riscv_vmsof_m_b8 -#define VMAND __riscv_vmand_mm_b8 -#define VMANDN __riscv_vmand_mm_b8 -#define VFREDSUM __riscv_vfredusum_vs_f32m4_f32m1 -#define VMERGE __riscv_vmerge_vvm_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define EXTRACT_FLOAT0_V(v) __riscv_vfmv_f_s_f32m4_f32(v) +#define VMFIRSTM __riscv_vfirst_m_b8 +#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f32m4_f32m1_tu +#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 +#define VMFGTVF_FLOAT __riscv_vmfgt_vf_f32m4_b8 +#define VFDIVVF_FLOAT __riscv_vfdiv_vf_f32m4 +#define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define ABS fabsf +#else +#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL_MAX __riscv_vsetvlmax_e64m4() +#define FLOAT_V_T vfloat64m4_t +#define FLOAT_V_T_M1 vfloat64m1_t +#define MASK_T vbool16_t +#define VLEV_FLOAT __riscv_vle64_v_f64m4 +#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 +#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1_tu +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu +#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 +#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 +#define VMFIRSTM __riscv_vfirst_m_b16 +#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f64m4_f64m1_tu +#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 +#define VMFGTVF_FLOAT __riscv_vmfgt_vf_f64m4_b16 +#define VFDIVVF_FLOAT __riscv_vfdiv_vf_f64m4 +#define VFABSV_FLOAT __riscv_vfabs_v_f64m4 +#define ABS fabs #endif FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { - BLASLONG i=0; - - if (n <= 0 || inc_x == 0) return(0.0); - if(n == 1) return (ABS(x[0])); - - unsigned int gvl = 0; - - MASK_T nonzero_mask; - MASK_T scale_mask; - - gvl = VSETVL(n); - FLOAT_V_T v0; - FLOAT_V_T v_ssq = VFMVVF_FLOAT(0, gvl); - FLOAT_V_T v_scale = VFMVVF_FLOAT(0, gvl); - - FLOAT scale = 0; - FLOAT ssq = 0; - unsigned int stride_x = inc_x * sizeof(FLOAT); - int idx = 0; - - if( n >= gvl && inc_x > 0 ) // don't pay overheads if we're not doing useful work - { - for(i=0; i 0 ){ + FLOAT_V_T vr, v0, v_zero; + unsigned int gvl = 0; + FLOAT_V_T_M1 v_res, v_z0; + gvl = VSETVL_MAX; + v_res = VFMVVF_FLOAT_M1(0, gvl); + v_z0 = VFMVVF_FLOAT_M1(0, gvl); + MASK_T mask; + BLASLONG index = 0; + + if (inc_x == 1) { + gvl = VSETVL(n); + vr = VFMVVF_FLOAT(0, gvl); + v_zero = VFMVVF_FLOAT(0, gvl); + for (i = 0, j = 0; i < n / gvl; i++) { + v0 = VLEV_FLOAT(&x[j], gvl); + // fabs(vector) + v0 = VFABSV_FLOAT(v0, gvl); + // if scale change + mask = VMFGTVF_FLOAT(v0, scale, gvl); + index = VMFIRSTM(mask, gvl); + if (index == -1) { // no elements greater than scale + if (scale != 0.0) { + v0 = VFDIVVF_FLOAT(v0, scale, gvl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, gvl); + } + } + else { // found greater element + // ssq in vector vr: vr[0] + v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); + // total ssq before current vector + ssq += VFMVFS_FLOAT(v_res); + // find max + v_res = VFREDMAXVS_FLOAT_TU(v_res, v0, v_z0, gvl); + // update ssq before max_index + ssq = ssq * (scale / VFMVFS_FLOAT(v_res)) * (scale / VFMVFS_FLOAT(v_res)); + // update scale + scale = VFMVFS_FLOAT(v_res); + // ssq in vector vr + v0 = VFDIVVF_FLOAT(v0, scale, gvl); + vr = VFMACCVV_FLOAT_TU(v_zero, v0, v0, gvl); + } + j += gvl; + } + // ssq in vector vr: vr[0] + v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); + // total ssq now + ssq += VFMVFS_FLOAT(v_res); + + // tail processing + if(j < n){ + gvl = VSETVL(n-j); + v0 = VLEV_FLOAT(&x[j], gvl); + // fabs(vector) + v0 = VFABSV_FLOAT(v0, gvl); + // if scale change + mask = VMFGTVF_FLOAT(v0, scale, gvl); + index = VMFIRSTM(mask, gvl); + if (index == -1) { // no elements greater than scale + if(scale != 0.0) + v0 = VFDIVVF_FLOAT(v0, scale, gvl); + } else { // found greater element + // find max + v_res = VFREDMAXVS_FLOAT_TU(v_res, v0, v_z0, gvl); + // update ssq before max_index + ssq = ssq * (scale / VFMVFS_FLOAT(v_res))*(scale / VFMVFS_FLOAT(v_res)); + // update scale + scale = VFMVFS_FLOAT(v_res); + v0 = VFDIVVF_FLOAT(v0, scale, gvl); + } + vr = VFMACCVV_FLOAT_TU(v_zero, v0, v0, gvl); + // ssq in vector vr: vr[0] + v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); + // total ssq now + ssq += VFMVFS_FLOAT(v_res); + } + } + else { + gvl = VSETVL(n); + vr = VFMVVF_FLOAT(0, gvl); + v_zero = VFMVVF_FLOAT(0, gvl); + unsigned int stride_x = inc_x * sizeof(FLOAT); + int idx = 0, inc_v = inc_x * gvl; + for (i = 0, j = 0; i < n / gvl; i++) { + v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); + // fabs(vector) + v0 = VFABSV_FLOAT(v0, gvl); + // if scale change + mask = VMFGTVF_FLOAT(v0, scale, gvl); + index = VMFIRSTM(mask, gvl); + if (index == -1) {// no elements greater than scale + if(scale != 0.0){ + v0 = VFDIVVF_FLOAT(v0, scale, gvl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, gvl); + } + } + else { // found greater element + // ssq in vector vr: vr[0] + v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); + // total ssq before current vector + ssq += VFMVFS_FLOAT(v_res); + // find max + v_res = VFREDMAXVS_FLOAT_TU(v_res, v0, v_z0, gvl); + // update ssq before max_index + ssq = ssq * (scale / VFMVFS_FLOAT(v_res))*(scale / VFMVFS_FLOAT(v_res)); + // update scale + scale = VFMVFS_FLOAT(v_res); + // ssq in vector vr + v0 = VFDIVVF_FLOAT(v0, scale, gvl); + vr = VFMACCVV_FLOAT_TU(v_zero, v0, v0, gvl); + } + j += gvl; + idx += inc_v; + } + // ssq in vector vr: vr[0] + v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); + // total ssq now + ssq += VFMVFS_FLOAT(v_res); + + // tail processing + if (j < n) { + gvl = VSETVL(n-j); + v0 = VLSEV_FLOAT(&x[idx], stride_x, gvl); + // fabs(vector) + v0 = VFABSV_FLOAT(v0, gvl); + // if scale change + mask = VMFGTVF_FLOAT(v0, scale, gvl); + index = VMFIRSTM(mask, gvl); + if(index == -1) { // no elements greater than scale + if(scale != 0.0) { + v0 = VFDIVVF_FLOAT(v0, scale, gvl); + vr = VFMACCVV_FLOAT_TU(v_zero, v0, v0, gvl); + } + } + else { // found greater element + // find max + v_res = VFREDMAXVS_FLOAT_TU(v_res, v0, v_z0, gvl); + // update ssq before max_index + ssq = ssq * (scale / VFMVFS_FLOAT(v_res))*(scale / VFMVFS_FLOAT(v_res)); + // update scale + scale = VFMVFS_FLOAT(v_res); + v0 = VFDIVVF_FLOAT(v0, scale, gvl); + vr = VFMACCVV_FLOAT_TU(v_zero, v0, v0, gvl); + } + // ssq in vector vr: vr[0] + v_res = VFREDSUM_FLOAT(v_res, vr, v_z0, gvl); + // total ssq now + ssq += VFMVFS_FLOAT(v_res); + } + } + } + else{ + // using scalar ops when inc_x < 0 + n *= inc_x; while(abs(i) < abs(n)){ - if ( x[i] != 0.0 ){ - FLOAT absxi = ABS( x[i] ); - if ( scale < absxi ){ - ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); - scale = absxi ; - } - else{ - ssq += ( absxi/scale ) * ( absxi/scale ); - } - - } - - i += inc_x; + if ( x[i] != 0.0 ){ + FLOAT absxi = ABS( x[i] ); + if ( scale < absxi ){ + ssq = 1 + ssq * ( scale / absxi ) * ( scale / absxi ); + scale = absxi ; + } + else{ + ssq += ( absxi/scale ) * ( absxi/scale ); + } + + } + i += inc_x; } - + } return(scale * sqrt(ssq)); } From 6ad793d65ec1e5e733e3c2e2327793cc1d3b8360 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 31 Dec 2024 14:34:55 +0100 Subject: [PATCH 4/5] Fix naming of suffixed libraries in the cmake and pkgconfig files --- Makefile.install | 4 ++-- openblas.pc.in | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.install b/Makefile.install index bfed157a4..486e9233e 100644 --- a/Makefile.install +++ b/Makefile.install @@ -191,13 +191,13 @@ endif #Generating OpenBLASConfig.cmake @echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) @echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" - @echo "file(REAL_PATH \"../../..\" _OpenBLAS_ROOT_DIR BASE_DIRECTORY \$${CMAKE_CURRENT_LIST_DIR} )" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" + @echo "file(REAL_PATH \"../../..\" _OpenBLAS_ROOT_DIR BASE_DIRECTORY \$${CMAKE_CURRENT_LIST_DIR} )" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" @echo "SET(OpenBLAS_INCLUDE_DIRS \$${_OpenBLAS_ROOT_DIR}/include)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" ifneq ($(NO_SHARED),1) #ifeq logical or ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly)) - @echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" + @echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" endif ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT)) @echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/bin/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" diff --git a/openblas.pc.in b/openblas.pc.in index 7632645ac..fe2f08720 100644 --- a/openblas.pc.in +++ b/openblas.pc.in @@ -2,6 +2,6 @@ Name: openblas Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version Version: ${version} URL: https://github.com/xianyi/OpenBLAS -Libs: -L${libdir} -l${libprefix}openblas${libnamesuffix}${libsuffix} +Libs: -L${libdir} -l${libprefix}openblas${libsuffix}${libnamesuffix} Libs.private: ${extralib} Cflags: -I${includedir} ${omp_opt} From e9ff70b3941d99ad101286629e0044f6de83daa5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 31 Dec 2024 15:55:13 +0100 Subject: [PATCH 5/5] Add an install_tests target to facilitate testing on cross-compiled targets --- Makefile | 3 ++ Makefile.install | 93 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/Makefile b/Makefile index 78f82dea5..4c7217734 100644 --- a/Makefile +++ b/Makefile @@ -426,6 +426,9 @@ dummy : install : $(MAKE) -f Makefile.install install +install_tests : + $(MAKE) -f Makefile.install install_tests + clean :: @for d in $(SUBDIRS_ALL) ; \ do if test -d $$d; then \ diff --git a/Makefile.install b/Makefile.install index 486e9233e..cd1dcdabc 100644 --- a/Makefile.install +++ b/Makefile.install @@ -227,3 +227,96 @@ endif @echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" @echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" @echo Install OK! + +install_tests : lib.grd +ifneq ($(ONLY_CBLAS), 1) + @install -m 666 utest/openblas_utest $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 utest/openblas_utest_ext $(DESTDIR)$(OPENBLAS_BINARY_DIR) +ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) +ifndef NO_FBLAS +ifeq ($(BUILD_BFLOAT16),1) + @install -m 666 test/test_sbgemm $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +ifeq ($(BUILD_SINGLE),1) + @install -m 666 test/sblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/sblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/sblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/sblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/sblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +ifeq ($(BUILD_DOUBLE),1) + @install -m 666 test/dblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/dblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/dblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/dblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/dblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +ifeq ($(BUILD_COMPLEX),1) + @install -m 666 test/cblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/cblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/cblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/cblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/cblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) +ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS)) + @install -m 666 test/cblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/cblat3_3m.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +endif +ifeq ($(BUILD_COMPLEX16),1) + @install -m 666 test/zblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/zblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/zblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/zblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/zblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) +ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS)) + @install -m 666 test/zblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 test/zblat3_3m.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +endif +endif +endif +ifneq ($(ONLY_CBLAS), 1) +ifeq ($(BUILD_SINGLE),1) + @install -m 666 ctest/xscblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/xscblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/xscblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/sin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/sin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +ifeq ($(BUILD_DOUBLE),1) + @install -m 666 ctest/xdcblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/xdcblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/xdcblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/din2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/din3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +ifeq ($(BUILD_COMPLEX),1) + @install -m 666 ctest/xccblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/xccblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/xccblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/cin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/cin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) +ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS)) + @install -m 666 ctest/xccblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/cin3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +endif +ifeq ($(BUILD_COMPLEX16),1) + @install -m 666 ctest/xzcblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/xzcblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/xzcblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/zin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/zin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) +ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS)) + @install -m 666 ctest/xzcblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) + @install -m 666 ctest/zin3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +endif + +endif +ifeq ($(CPP_THREAD_SAFETY_TEST), 1) +@install -m 666 cpp_thread_test/dgemm_tester $(DESTDIR)$(OPENBLAS_BINARY_DIR) +@install -m 666 cpp_thread_test/dgemv_tester $(DESTDIR)$(OPENBLAS_BINARY_DIR) +endif +endif +