| @@ -219,10 +219,10 @@ prof_lapack : lapack_prebuild | |||||
| lapack_prebuild : | lapack_prebuild : | ||||
| ifndef NOFORTRAN | ifndef NOFORTRAN | ||||
| -@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | -@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "OPTS = $(FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "POPTS = $(FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "NOOPT = $(FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "PNOOPT = $(FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| @@ -23,8 +23,8 @@ install : lib.grd | |||||
| #for inc | #for inc | ||||
| @echo \#ifndef OPENBLAS_CONFIG_H > $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @echo \#ifndef OPENBLAS_CONFIG_H > $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @echo \#define OPENBLAS_CONFIG_H >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @echo \#define OPENBLAS_CONFIG_H >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @cat config_last.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
| @echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
| @awk '{print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
| @echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
| @cat openblas_config_template.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @cat openblas_config_template.h >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | @echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(OPENBLAS_INCLUDE_DIR)/openblas_config.h | ||||
| @@ -229,6 +229,11 @@ endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| # ifeq logical or | |||||
| ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix)) | |||||
| OS_WINDOWS=1 | |||||
| endif | |||||
| ifdef QUAD_PRECISION | ifdef QUAD_PRECISION | ||||
| CCOMMON_OPT += -DQUAD_PRECISION | CCOMMON_OPT += -DQUAD_PRECISION | ||||
| NO_EXPRECISION = 1 | NO_EXPRECISION = 1 | ||||
| @@ -470,10 +475,8 @@ CCOMMON_OPT += -DF_INTERFACE_GFORT | |||||
| FCOMMON_OPT += -Wall | FCOMMON_OPT += -Wall | ||||
| #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | ||||
| ifneq ($(NO_LAPACK), 1) | ifneq ($(NO_LAPACK), 1) | ||||
| ifneq ($(C_COMPILER), LSB) | |||||
| EXTRALIB += -lgfortran | EXTRALIB += -lgfortran | ||||
| endif | endif | ||||
| endif | |||||
| ifdef NO_BINARY_MODE | ifdef NO_BINARY_MODE | ||||
| ifeq ($(ARCH), mips64) | ifeq ($(ARCH), mips64) | ||||
| ifdef BINARY64 | ifdef BINARY64 | ||||
| @@ -842,11 +845,18 @@ override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) | |||||
| override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF) | override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF) | ||||
| #MAKEOVERRIDES = | #MAKEOVERRIDES = | ||||
| #For LAPACK Fortran codes. | |||||
| LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS)) | |||||
| LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS)) | |||||
| LAPACK_CFLAGS = $(CFLAGS) | LAPACK_CFLAGS = $(CFLAGS) | ||||
| LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H | LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H | ||||
| ifdef INTERFACE64 | ifdef INTERFACE64 | ||||
| LAPACK_CFLAGS += -DLAPACK_ILP64 | LAPACK_CFLAGS += -DLAPACK_ILP64 | ||||
| endif | endif | ||||
| ifdef OS_WINDOWS | |||||
| LAPACK_CFLAGS += -DOPENBLAS_OS_WINDOWS | |||||
| endif | |||||
| ifeq ($(C_COMPILER), LSB) | ifeq ($(C_COMPILER), LSB) | ||||
| LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE | LAPACK_CFLAGS += -DLAPACK_COMPLEX_STRUCTURE | ||||
| endif | endif | ||||
| @@ -606,7 +606,8 @@ clean :: | |||||
| @if test -d $(ARCH); then \ | @if test -d $(ARCH); then \ | ||||
| (cd $(ARCH) && $(MAKE) clean) \ | (cd $(ARCH) && $(MAKE) clean) \ | ||||
| fi | fi | ||||
| @rm -rf *.a *.s *.o *.po *.obj *.i *.so core core.* gmon.out *.cso \ | |||||
| @find . -name '*.o' | xargs rm -rf | |||||
| @rm -rf *.a *.s *.po *.obj *.i *.so core core.* gmon.out *.cso \ | |||||
| *.csx *.is *~ *.exe *.flame *.pdb *.dwf \ | *.csx *.is *~ *.exe *.flame *.pdb *.dwf \ | ||||
| gen_insn_flash.c gen_insn_flash *.stackdump *.dll *.exp *.lib \ | gen_insn_flash.c gen_insn_flash *.stackdump *.dll *.exp *.lib \ | ||||
| *.pc *.pcl *.def *.i *.prof linktest.c \ | *.pc *.pcl *.def *.i *.prof linktest.c \ | ||||
| @@ -441,9 +441,10 @@ int BLASFUNC(blas_thread_shutdown)(void){ | |||||
| if (blas_server_avail){ | if (blas_server_avail){ | ||||
| SetEvent(pool.killed); | SetEvent(pool.killed); | ||||
| printf("blas_num_threads=%d\n", blas_num_threads); | |||||
| for(i = 0; i < blas_num_threads - 1; i++){ | for(i = 0; i < blas_num_threads - 1; i++){ | ||||
| WaitForSingleObject(blas_threads[i], INFINITE); | |||||
| WaitForSingleObject(blas_threads[i], 5); //INFINITE); | |||||
| TerminateThread(blas_threads[i],0); | |||||
| } | } | ||||
| blas_server_avail = 0; | blas_server_avail = 0; | ||||
| @@ -363,7 +363,7 @@ static void *alloc_mmap(void *address){ | |||||
| #define BENCH_ITERATION 4 | #define BENCH_ITERATION 4 | ||||
| #define SCALING 2 | #define SCALING 2 | ||||
| static inline BLASULONG run_bench(BLASULONG address, long size) { | |||||
| static inline BLASULONG run_bench(BLASULONG address, BLASULONG size) { | |||||
| BLASULONG original, *p; | BLASULONG original, *p; | ||||
| BLASULONG start, stop, min; | BLASULONG start, stop, min; | ||||
| @@ -450,12 +450,12 @@ static void *alloc_mmap(void *address){ | |||||
| current = (SCALING - 1) * BUFFER_SIZE; | current = (SCALING - 1) * BUFFER_SIZE; | ||||
| while(current > 0) { | while(current > 0) { | ||||
| *(long *)start = (long)start + PAGESIZE; | |||||
| *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; | |||||
| start += PAGESIZE; | start += PAGESIZE; | ||||
| current -= PAGESIZE; | current -= PAGESIZE; | ||||
| } | } | ||||
| *(long *)(start - PAGESIZE) = (BLASULONG)map_address; | |||||
| *(BLASLONG *)(start - PAGESIZE) = (BLASULONG)map_address; | |||||
| start = (BLASULONG)map_address; | start = (BLASULONG)map_address; | ||||
| @@ -1170,7 +1170,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, | |||||
| #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) | #if !defined(ARCH_POWER) && !defined(ARCH_SPARC) | ||||
| long size; | |||||
| size_t size; | |||||
| BLASULONG buffer; | BLASULONG buffer; | ||||
| size = BUFFER_SIZE - PAGESIZE; | size = BUFFER_SIZE - PAGESIZE; | ||||
| @@ -111,7 +111,7 @@ libgoto_hpl.def : gensymbol | |||||
| perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) | perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) > $(@F) | ||||
| $(LIBDYNNAME) : ../$(LIBNAME) osx.def | $(LIBDYNNAME) : ../$(LIBNAME) osx.def | ||||
| $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||||
| $(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||||
| symbol.$(SUFFIX) : symbol.S | symbol.$(SUFFIX) : symbol.S | ||||
| $(CC) $(CFLAGS) -c -o $(@F) $^ | $(CC) $(CFLAGS) -c -o $(@F) $^ | ||||
| @@ -124,14 +124,17 @@ ifeq ($(OSNAME), Linux) | |||||
| so : ../$(LIBSONAME) | so : ../$(LIBSONAME) | ||||
| ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c | ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c | ||||
| ifneq ($(C_COMPILER), LSB) | |||||
| $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | ||||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | ||||
| -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) | -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) | ||||
| ifneq ($(C_COMPILER), LSB) | |||||
| $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | ||||
| else | else | ||||
| #Use FC on LSB | |||||
| $(FC) $(FFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||||
| #for LSB | |||||
| env LSBCC_SHAREDLIBS=gfortran $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ | |||||
| -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ | |||||
| -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) | |||||
| $(FC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. | |||||
| endif | endif | ||||
| rm -f linktest | rm -f linktest | ||||
| @@ -60,7 +60,6 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT * | |||||
| }; | }; | ||||
| #endif | #endif | ||||
| extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); | |||||
| int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ | int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ | ||||
| @@ -133,18 +132,6 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In | |||||
| if (args.nthreads == 1) { | if (args.nthreads == 1) { | ||||
| #endif | #endif | ||||
| #if DOUBLE | |||||
| // double trtri_U single thread error | |||||
| // call dtrtri from lapack for a walk around. | |||||
| if(uplo==0){ | |||||
| BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info); | |||||
| #ifndef PPC440 | |||||
| blas_memory_free(buffer); | |||||
| #endif | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| *Info = (trtri_single[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); | *Info = (trtri_single[(uplo << 1) | diag])(&args, NULL, NULL, sa, sb, 0); | ||||
| #ifdef SMP | #ifdef SMP | ||||
| @@ -103,7 +103,7 @@ | |||||
| vmovups -10*SIZE(AO,%rax,8), %xmm6 | vmovups -10*SIZE(AO,%rax,8), %xmm6 | ||||
| vfmaddpd %xmm14, %xmm6 , %xmm1 , %xmm14 | vfmaddpd %xmm14, %xmm6 , %xmm1 , %xmm14 | ||||
| vfmaddpd %xmm15, %xmm6 , %xmm2 , %xmm15 | vfmaddpd %xmm15, %xmm6 , %xmm2 , %xmm15 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| .macro SOLVE_8x2 | .macro SOLVE_8x2 | ||||
| @@ -265,7 +265,7 @@ | |||||
| vmovups -14*SIZE(AO,%rax,4), %xmm0 | vmovups -14*SIZE(AO,%rax,4), %xmm0 | ||||
| vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10 | vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10 | ||||
| vfmaddpd %xmm11, %xmm0 , %xmm2 , %xmm11 | vfmaddpd %xmm11, %xmm0 , %xmm2 , %xmm11 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| @@ -338,7 +338,7 @@ | |||||
| vmovups -16*SIZE(AO,%rax,2), %xmm0 | vmovups -16*SIZE(AO,%rax,2), %xmm0 | ||||
| vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| vfmaddpd %xmm9 , %xmm0 , %xmm2 , %xmm9 | vfmaddpd %xmm9 , %xmm0 , %xmm2 , %xmm9 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| @@ -378,7 +378,7 @@ | |||||
| vmovups -16*SIZE(BO,%rax,2), %xmm1 | vmovups -16*SIZE(BO,%rax,2), %xmm1 | ||||
| vmovddup -16*SIZE(AO,%rax,1), %xmm0 | vmovddup -16*SIZE(AO,%rax,1), %xmm0 | ||||
| vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| .macro SOLVE_1x2 | .macro SOLVE_1x2 | ||||
| @@ -411,7 +411,7 @@ | |||||
| vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10 | vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10 | ||||
| vmovups -10*SIZE(AO,%rax,8), %xmm0 | vmovups -10*SIZE(AO,%rax,8), %xmm0 | ||||
| vfmaddpd %xmm11, %xmm0 , %xmm1 , %xmm11 | vfmaddpd %xmm11, %xmm0 , %xmm1 , %xmm11 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| .macro SOLVE_8x1 | .macro SOLVE_8x1 | ||||
| @@ -510,7 +510,7 @@ | |||||
| vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| vmovups -14*SIZE(AO,%rax,4), %xmm0 | vmovups -14*SIZE(AO,%rax,4), %xmm0 | ||||
| vfmaddpd %xmm9 , %xmm0 , %xmm1 , %xmm9 | vfmaddpd %xmm9 , %xmm0 , %xmm1 , %xmm9 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| @@ -560,7 +560,7 @@ | |||||
| vmovddup -16*SIZE(BO,%rax,1), %xmm1 | vmovddup -16*SIZE(BO,%rax,1), %xmm1 | ||||
| vmovups -16*SIZE(AO,%rax,2), %xmm0 | vmovups -16*SIZE(AO,%rax,2), %xmm0 | ||||
| vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| @@ -592,7 +592,7 @@ | |||||
| vmovsd -16*SIZE(BO,%rax,1), %xmm1 | vmovsd -16*SIZE(BO,%rax,1), %xmm1 | ||||
| vmovsd -16*SIZE(AO,%rax,1), %xmm0 | vmovsd -16*SIZE(AO,%rax,1), %xmm0 | ||||
| vfmaddsd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddsd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| .macro SOLVE_1x1 | .macro SOLVE_1x1 | ||||
| @@ -103,7 +103,7 @@ | |||||
| vmovups -10*SIZE(AO,%rax,8), %xmm6 | vmovups -10*SIZE(AO,%rax,8), %xmm6 | ||||
| vfmaddpd %xmm14, %xmm6 , %xmm1 , %xmm14 | vfmaddpd %xmm14, %xmm6 , %xmm1 , %xmm14 | ||||
| vfmaddpd %xmm15, %xmm6 , %xmm2 , %xmm15 | vfmaddpd %xmm15, %xmm6 , %xmm2 , %xmm15 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| .macro SOLVE_8x2 | .macro SOLVE_8x2 | ||||
| @@ -177,7 +177,7 @@ | |||||
| vmovups -14*SIZE(AO,%rax,4), %xmm0 | vmovups -14*SIZE(AO,%rax,4), %xmm0 | ||||
| vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10 | vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10 | ||||
| vfmaddpd %xmm11, %xmm0 , %xmm2 , %xmm11 | vfmaddpd %xmm11, %xmm0 , %xmm2 , %xmm11 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| @@ -226,7 +226,7 @@ | |||||
| vmovups -16*SIZE(AO,%rax,2), %xmm0 | vmovups -16*SIZE(AO,%rax,2), %xmm0 | ||||
| vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| vfmaddpd %xmm9 , %xmm0 , %xmm2 , %xmm9 | vfmaddpd %xmm9 , %xmm0 , %xmm2 , %xmm9 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| @@ -262,7 +262,7 @@ | |||||
| vmovups -16*SIZE(BO,%rax,2), %xmm1 | vmovups -16*SIZE(BO,%rax,2), %xmm1 | ||||
| vmovddup -16*SIZE(AO,%rax,1), %xmm0 | vmovddup -16*SIZE(AO,%rax,1), %xmm0 | ||||
| vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| .macro SOLVE_1x2 | .macro SOLVE_1x2 | ||||
| @@ -306,7 +306,7 @@ | |||||
| vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10 | vfmaddpd %xmm10, %xmm0 , %xmm1 , %xmm10 | ||||
| vmovups -10*SIZE(AO,%rax,8), %xmm0 | vmovups -10*SIZE(AO,%rax,8), %xmm0 | ||||
| vfmaddpd %xmm11, %xmm0 , %xmm1 , %xmm11 | vfmaddpd %xmm11, %xmm0 , %xmm1 , %xmm11 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| .macro SOLVE_8x1 | .macro SOLVE_8x1 | ||||
| @@ -347,7 +347,7 @@ | |||||
| vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| vmovups -14*SIZE(AO,%rax,4), %xmm0 | vmovups -14*SIZE(AO,%rax,4), %xmm0 | ||||
| vfmaddpd %xmm9 , %xmm0 , %xmm1 , %xmm9 | vfmaddpd %xmm9 , %xmm0 , %xmm1 , %xmm9 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| @@ -377,7 +377,7 @@ | |||||
| vmovddup -16*SIZE(BO,%rax,1), %xmm1 | vmovddup -16*SIZE(BO,%rax,1), %xmm1 | ||||
| vmovups -16*SIZE(AO,%rax,2), %xmm0 | vmovups -16*SIZE(AO,%rax,2), %xmm0 | ||||
| vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddpd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| @@ -402,7 +402,7 @@ | |||||
| vmovsd -16*SIZE(BO,%rax,1), %xmm1 | vmovsd -16*SIZE(BO,%rax,1), %xmm1 | ||||
| vmovsd -16*SIZE(AO,%rax,1), %xmm0 | vmovsd -16*SIZE(AO,%rax,1), %xmm0 | ||||
| vfmaddsd %xmm8 , %xmm0 , %xmm1 , %xmm8 | vfmaddsd %xmm8 , %xmm0 , %xmm1 , %xmm8 | ||||
| addq $SIZE, %rax | |||||
| addq $ SIZE, %rax | |||||
| .endm | .endm | ||||
| .macro SOLVE_1x1 | .macro SOLVE_1x1 | ||||
| @@ -45,7 +45,11 @@ extern "C" { | |||||
| #ifndef lapack_int | #ifndef lapack_int | ||||
| #if defined(LAPACK_ILP64) | #if defined(LAPACK_ILP64) | ||||
| #if defined(OPENBLAS_OS_WINDOWS) | |||||
| #define lapack_int long long | |||||
| #else | |||||
| #define lapack_int long | #define lapack_int long | ||||
| #endif | |||||
| #else | #else | ||||
| #define lapack_int int | #define lapack_int int | ||||
| #endif | #endif | ||||
| @@ -67,14 +67,14 @@ double sqrt(double); | |||||
| #undef GETRF_FACTOR | #undef GETRF_FACTOR | ||||
| #define GETRF_FACTOR 1.00 | #define GETRF_FACTOR 1.00 | ||||
| static inline long FORMULA1(long M, long N, long IS, long BK, long T) { | |||||
| static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { | |||||
| double m = (double)(M - IS - BK); | double m = (double)(M - IS - BK); | ||||
| double n = (double)(N - IS - BK); | double n = (double)(N - IS - BK); | ||||
| double b = (double)BK; | double b = (double)BK; | ||||
| double a = (double)T; | double a = (double)T; | ||||
| return (long)((n + GETRF_FACTOR * m * b * (1. - a) / (b + m)) / a); | |||||
| return (BLASLONG)((n + GETRF_FACTOR * m * b * (1. - a) / (b + m)) / a); | |||||
| } | } | ||||
| @@ -111,7 +111,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra | |||||
| if (args -> a == NULL) { | if (args -> a == NULL) { | ||||
| TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb); | TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb); | ||||
| sbb = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| } else { | } else { | ||||
| sb = (FLOAT *)args -> a; | sb = (FLOAT *)args -> a; | ||||
| } | } | ||||
| @@ -221,7 +221,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG * | |||||
| if (args -> a == NULL) { | if (args -> a == NULL) { | ||||
| TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb); | TRSM_ILTCOPY(k, k, (FLOAT *)args -> b, lda, 0, sb); | ||||
| sbb = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| sbb = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| } else { | } else { | ||||
| sb = (FLOAT *)args -> a; | sb = (FLOAT *)args -> a; | ||||
| } | } | ||||
| @@ -448,7 +448,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
| TRSM_ILTCOPY(bk, bk, a, lda, 0, sb); | TRSM_ILTCOPY(bk, bk, a, lda, 0, sb); | ||||
| sbb = (FLOAT *)((((long)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| sbb = (FLOAT *)((((BLASULONG)(sb + bk * bk * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| is = 0; | is = 0; | ||||
| num_cpu = 0; | num_cpu = 0; | ||||
| @@ -685,7 +685,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
| if (width > n - init_bk) width = n - init_bk; | if (width > n - init_bk) width = n - init_bk; | ||||
| if (width < init_bk) { | if (width < init_bk) { | ||||
| long temp; | |||||
| BLASLONG temp; | |||||
| temp = FORMULA2(m, n, 0, init_bk, args -> nthreads); | temp = FORMULA2(m, n, 0, init_bk, args -> nthreads); | ||||
| temp = (temp + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); | temp = (temp + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1); | ||||
| @@ -708,7 +708,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
| is = 0; | is = 0; | ||||
| num_cpu = 0; | num_cpu = 0; | ||||
| sbb = (FLOAT *)((((long)(sb + GEMM_PQ * GEMM_PQ * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| sbb = (FLOAT *)((((BLASULONG)(sb + GEMM_PQ * GEMM_PQ * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| while (is < mn) { | while (is < mn) { | ||||
| @@ -178,7 +178,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
| return info; | return info; | ||||
| } | } | ||||
| sbb = (FLOAT *)((((long)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| info = 0; | info = 0; | ||||
| @@ -82,7 +82,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
| return info; | return info; | ||||
| } | } | ||||
| sbb = (FLOAT *)((((long)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| sbb = (FLOAT *)((((BLASULONG)(sb + blocking * blocking * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| info = 0; | info = 0; | ||||
| @@ -185,7 +185,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | ||||
| buffer[0] = (FLOAT *)((((long)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| buffer[0] = (FLOAT *)((((BLASULONG)(sb + k * k * COMPSIZE) + GEMM_ALIGN) & ~GEMM_ALIGN) + GEMM_OFFSET_B); | |||||
| for (i = 1; i < DIVIDE_RATE; i++) { | for (i = 1; i < DIVIDE_RATE; i++) { | ||||
| buffer[i] = buffer[i - 1] + GEMM_Q * div_n * COMPSIZE; | buffer[i] = buffer[i - 1] + GEMM_Q * div_n * COMPSIZE; | ||||
| } | } | ||||
| @@ -13,7 +13,6 @@ ZBLASOBJS = ztrtri_UU_single.$(SUFFIX) ztrtri_UN_single.$(SUFFIX) ztrtri_LU_sing | |||||
| XBLASOBJS = xtrtri_UU_single.$(SUFFIX) xtrtri_UN_single.$(SUFFIX) xtrtri_LU_single.$(SUFFIX) xtrtri_LN_single.$(SUFFIX) | XBLASOBJS = xtrtri_UU_single.$(SUFFIX) xtrtri_UN_single.$(SUFFIX) xtrtri_LU_single.$(SUFFIX) xtrtri_LN_single.$(SUFFIX) | ||||
| DBLASOBJS += dtrtri_lapack.$(SUFFIX) | |||||
| ifdef SMP | ifdef SMP | ||||
| SBLASOBJS += strtri_UU_parallel.$(SUFFIX) strtri_UN_parallel.$(SUFFIX) strtri_LU_parallel.$(SUFFIX) strtri_LN_parallel.$(SUFFIX) | SBLASOBJS += strtri_UU_parallel.$(SUFFIX) strtri_UN_parallel.$(SUFFIX) strtri_LU_parallel.$(SUFFIX) strtri_LN_parallel.$(SUFFIX) | ||||
| @@ -54,9 +53,6 @@ dtrtri_UU_single.$(SUFFIX) : trtri_U_single.c | |||||
| dtrtri_UN_single.$(SUFFIX) : trtri_U_single.c | dtrtri_UN_single.$(SUFFIX) : trtri_U_single.c | ||||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUNIT $< -o $(@F) | $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UUNIT $< -o $(@F) | ||||
| dtrtri_lapack.$(SUFFIX) : dtrtri_lapack.f | |||||
| $(FC) -c $(FFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F) | |||||
| dtrtri_LU_single.$(SUFFIX) : trtri_L_single.c | dtrtri_LU_single.$(SUFFIX) : trtri_L_single.c | ||||
| $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F) | $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DUNIT $< -o $(@F) | ||||
| @@ -1,242 +0,0 @@ | |||||
| *> \brief \b DTRTRI | |||||
| * | |||||
| * =========== DOCUMENTATION =========== | |||||
| * | |||||
| * Online html documentation available at | |||||
| * http://www.netlib.org/lapack/explore-html/ | |||||
| * | |||||
| *> \htmlonly | |||||
| *> Download DTRTRI + dependencies | |||||
| *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dtrtri.f"> | |||||
| *> [TGZ]</a> | |||||
| *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dtrtri.f"> | |||||
| *> [ZIP]</a> | |||||
| *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dtrtri.f"> | |||||
| *> [TXT]</a> | |||||
| *> \endhtmlonly | |||||
| * | |||||
| * Definition: | |||||
| * =========== | |||||
| * | |||||
| * SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) | |||||
| * | |||||
| * .. Scalar Arguments .. | |||||
| * CHARACTER DIAG, UPLO | |||||
| * INTEGER INFO, LDA, N | |||||
| * .. | |||||
| * .. Array Arguments .. | |||||
| * DOUBLE PRECISION A( LDA, * ) | |||||
| * .. | |||||
| * | |||||
| * | |||||
| *> \par Purpose: | |||||
| * ============= | |||||
| *> | |||||
| *> \verbatim | |||||
| *> | |||||
| *> DTRTRI computes the inverse of a real upper or lower triangular | |||||
| *> matrix A. | |||||
| *> | |||||
| *> This is the Level 3 BLAS version of the algorithm. | |||||
| *> \endverbatim | |||||
| * | |||||
| * Arguments: | |||||
| * ========== | |||||
| * | |||||
| *> \param[in] UPLO | |||||
| *> \verbatim | |||||
| *> UPLO is CHARACTER*1 | |||||
| *> = 'U': A is upper triangular; | |||||
| *> = 'L': A is lower triangular. | |||||
| *> \endverbatim | |||||
| *> | |||||
| *> \param[in] DIAG | |||||
| *> \verbatim | |||||
| *> DIAG is CHARACTER*1 | |||||
| *> = 'N': A is non-unit triangular; | |||||
| *> = 'U': A is unit triangular. | |||||
| *> \endverbatim | |||||
| *> | |||||
| *> \param[in] N | |||||
| *> \verbatim | |||||
| *> N is INTEGER | |||||
| *> The order of the matrix A. N >= 0. | |||||
| *> \endverbatim | |||||
| *> | |||||
| *> \param[in,out] A | |||||
| *> \verbatim | |||||
| *> A is DOUBLE PRECISION array, dimension (LDA,N) | |||||
| *> On entry, the triangular matrix A. If UPLO = 'U', the | |||||
| *> leading N-by-N upper triangular part of the array A contains | |||||
| *> the upper triangular matrix, and the strictly lower | |||||
| *> triangular part of A is not referenced. If UPLO = 'L', the | |||||
| *> leading N-by-N lower triangular part of the array A contains | |||||
| *> the lower triangular matrix, and the strictly upper | |||||
| *> triangular part of A is not referenced. If DIAG = 'U', the | |||||
| *> diagonal elements of A are also not referenced and are | |||||
| *> assumed to be 1. | |||||
| *> On exit, the (triangular) inverse of the original matrix, in | |||||
| *> the same storage format. | |||||
| *> \endverbatim | |||||
| *> | |||||
| *> \param[in] LDA | |||||
| *> \verbatim | |||||
| *> LDA is INTEGER | |||||
| *> The leading dimension of the array A. LDA >= max(1,N). | |||||
| *> \endverbatim | |||||
| *> | |||||
| *> \param[out] INFO | |||||
| *> \verbatim | |||||
| *> INFO is INTEGER | |||||
| *> = 0: successful exit | |||||
| *> < 0: if INFO = -i, the i-th argument had an illegal value | |||||
| *> > 0: if INFO = i, A(i,i) is exactly zero. The triangular | |||||
| *> matrix is singular and its inverse can not be computed. | |||||
| *> \endverbatim | |||||
| * | |||||
| * Authors: | |||||
| * ======== | |||||
| * | |||||
| *> \author Univ. of Tennessee | |||||
| *> \author Univ. of California Berkeley | |||||
| *> \author Univ. of Colorado Denver | |||||
| *> \author NAG Ltd. | |||||
| * | |||||
| *> \date November 2011 | |||||
| * | |||||
| *> \ingroup doubleOTHERcomputational | |||||
| * | |||||
| * ===================================================================== | |||||
| SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO ) | |||||
| * | |||||
| * -- LAPACK computational routine (version 3.4.0) -- | |||||
| * -- LAPACK is a software package provided by Univ. of Tennessee, -- | |||||
| * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | |||||
| * November 2011 | |||||
| * | |||||
| * .. Scalar Arguments .. | |||||
| CHARACTER DIAG, UPLO | |||||
| INTEGER INFO, LDA, N | |||||
| * .. | |||||
| * .. Array Arguments .. | |||||
| DOUBLE PRECISION A( LDA, * ) | |||||
| * .. | |||||
| * | |||||
| * ===================================================================== | |||||
| * | |||||
| * .. Parameters .. | |||||
| DOUBLE PRECISION ONE, ZERO | |||||
| PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) | |||||
| * .. | |||||
| * .. Local Scalars .. | |||||
| LOGICAL NOUNIT, UPPER | |||||
| INTEGER J, JB, NB, NN | |||||
| * .. | |||||
| * .. External Functions .. | |||||
| LOGICAL LSAME | |||||
| INTEGER ILAENV | |||||
| EXTERNAL LSAME, ILAENV | |||||
| * .. | |||||
| * .. External Subroutines .. | |||||
| EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA | |||||
| * .. | |||||
| * .. Intrinsic Functions .. | |||||
| INTRINSIC MAX, MIN | |||||
| * .. | |||||
| * .. Executable Statements .. | |||||
| * | |||||
| * Test the input parameters. | |||||
| * | |||||
| INFO = 0 | |||||
| UPPER = LSAME( UPLO, 'U' ) | |||||
| NOUNIT = LSAME( DIAG, 'N' ) | |||||
| IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN | |||||
| INFO = -1 | |||||
| ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN | |||||
| INFO = -2 | |||||
| ELSE IF( N.LT.0 ) THEN | |||||
| INFO = -3 | |||||
| ELSE IF( LDA.LT.MAX( 1, N ) ) THEN | |||||
| INFO = -5 | |||||
| END IF | |||||
| IF( INFO.NE.0 ) THEN | |||||
| CALL XERBLA( 'DTRTRI', -INFO ) | |||||
| RETURN | |||||
| END IF | |||||
| * | |||||
| * Quick return if possible | |||||
| * | |||||
| IF( N.EQ.0 ) | |||||
| $ RETURN | |||||
| * | |||||
| * Check for singularity if non-unit. | |||||
| * | |||||
| IF( NOUNIT ) THEN | |||||
| DO 10 INFO = 1, N | |||||
| IF( A( INFO, INFO ).EQ.ZERO ) | |||||
| $ RETURN | |||||
| 10 CONTINUE | |||||
| INFO = 0 | |||||
| END IF | |||||
| * | |||||
| * Determine the block size for this environment. | |||||
| * | |||||
| NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 ) | |||||
| IF( NB.LE.1 .OR. NB.GE.N ) THEN | |||||
| * | |||||
| * Use unblocked code | |||||
| * | |||||
| CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) | |||||
| ELSE | |||||
| * | |||||
| * Use blocked code | |||||
| * | |||||
| IF( UPPER ) THEN | |||||
| * | |||||
| * Compute inverse of upper triangular matrix | |||||
| * | |||||
| DO 20 J = 1, N, NB | |||||
| JB = MIN( NB, N-J+1 ) | |||||
| * | |||||
| * Compute rows 1:j-1 of current block column | |||||
| * | |||||
| CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, | |||||
| $ JB, ONE, A, LDA, A( 1, J ), LDA ) | |||||
| CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, | |||||
| $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) | |||||
| * | |||||
| * Compute inverse of current diagonal block | |||||
| * | |||||
| CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) | |||||
| 20 CONTINUE | |||||
| ELSE | |||||
| * | |||||
| * Compute inverse of lower triangular matrix | |||||
| * | |||||
| NN = ( ( N-1 ) / NB )*NB + 1 | |||||
| DO 30 J = NN, 1, -NB | |||||
| JB = MIN( NB, N-J+1 ) | |||||
| IF( J+JB.LE.N ) THEN | |||||
| * | |||||
| * Compute rows j+jb:n of current block column | |||||
| * | |||||
| CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG, | |||||
| $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, | |||||
| $ A( J+JB, J ), LDA ) | |||||
| CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG, | |||||
| $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, | |||||
| $ A( J+JB, J ), LDA ) | |||||
| END IF | |||||
| * | |||||
| * Compute inverse of current diagonal block | |||||
| * | |||||
| CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) | |||||
| 30 CONTINUE | |||||
| END IF | |||||
| END IF | |||||
| * | |||||
| RETURN | |||||
| * | |||||
| * End of DTRTRI | |||||
| * | |||||
| END | |||||
| @@ -127,8 +127,14 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
| if (min_i > GEMM_P) min_i = GEMM_P; | if (min_i > GEMM_P) min_i = GEMM_P; | ||||
| if (ls == i + bk) { | if (ls == i + bk) { | ||||
| NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); | |||||
| //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); | |||||
| GEMM_BETA(min_i, bk, 0, dm1, | |||||
| #ifdef COMPLEX | |||||
| ZERO, | |||||
| #endif | |||||
| NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda); | |||||
| TRSM_KERNEL_RN(min_i, bk, bk, dm1, | TRSM_KERNEL_RN(min_i, bk, bk, dm1, | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| ZERO, | ZERO, | ||||
| @@ -171,8 +177,13 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
| min_i = i - is; | min_i = i - is; | ||||
| if (min_i > GEMM_P) min_i = GEMM_P; | if (min_i > GEMM_P) min_i = GEMM_P; | ||||
| NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); | |||||
| //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); | |||||
| GEMM_BETA(min_i, bk, 0, dm1, | |||||
| #ifdef COMPLEX | |||||
| ZERO, | |||||
| #endif | |||||
| NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda); | |||||
| TRSM_KERNEL_RN(min_i, bk, bk, dm1, | TRSM_KERNEL_RN(min_i, bk, bk, dm1, | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| ZERO, | ZERO, | ||||
| @@ -1,8 +1,8 @@ | |||||
| /*This is only for "make install" target.*/ | /*This is only for "make install" target.*/ | ||||
| #if defined(OS_WINNT) || defined(OS_CYGWIN_NT) || defined(OS_INTERIX) | |||||
| #define WINDOWS_ABI | |||||
| #define OS_WINDOWS | |||||
| #if defined(OPENBLAS_OS_WINNT) || defined(OPENBLAS_OS_CYGWIN_NT) || defined(OPENBLAS_OS_INTERIX) | |||||
| #define OPENBLAS_WINDOWS_ABI | |||||
| #define OPENBLAS_OS_WINDOWS | |||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define DOUBLE_DEFINED DOUBLE | #define DOUBLE_DEFINED DOUBLE | ||||
| @@ -10,23 +10,23 @@ | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #ifdef NEEDBUNDERSCORE | |||||
| #ifdef OPENBLAS_NEEDBUNDERSCORE | |||||
| #define BLASFUNC(FUNC) FUNC##_ | #define BLASFUNC(FUNC) FUNC##_ | ||||
| #else | #else | ||||
| #define BLASFUNC(FUNC) FUNC | #define BLASFUNC(FUNC) FUNC | ||||
| #endif | #endif | ||||
| #ifdef QUAD_PRECISION | |||||
| #ifdef OPENBLAS_QUAD_PRECISION | |||||
| typedef struct { | typedef struct { | ||||
| unsigned long x[2]; | unsigned long x[2]; | ||||
| } xdouble; | } xdouble; | ||||
| #elif defined EXPRECISION | |||||
| #elif defined OPENBLAS_EXPRECISION | |||||
| #define xdouble long double | #define xdouble long double | ||||
| #else | #else | ||||
| #define xdouble double | #define xdouble double | ||||
| #endif | #endif | ||||
| #if defined(OS_WINDOWS) && defined(__64BIT__) | |||||
| #if defined(OPENBLAS_OS_WINDOWS) && defined(OPENBLAS___64BIT__) | |||||
| typedef long long BLASLONG; | typedef long long BLASLONG; | ||||
| typedef unsigned long long BLASULONG; | typedef unsigned long long BLASULONG; | ||||
| #else | #else | ||||
| @@ -34,7 +34,7 @@ typedef long BLASLONG; | |||||
| typedef unsigned long BLASULONG; | typedef unsigned long BLASULONG; | ||||
| #endif | #endif | ||||
| #ifdef USE64BITINT | |||||
| #ifdef OPENBLAS_USE64BITINT | |||||
| typedef BLASLONG blasint; | typedef BLASLONG blasint; | ||||
| #else | #else | ||||
| typedef int blasint; | typedef int blasint; | ||||