Browse Source

Merge pull request #64 from xianyi/develop

rebase
tags/v0.3.10^2
Martin Kroeker GitHub 5 years ago
parent
commit
63d26090f5
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 26 additions and 19 deletions
  1. +7
    -6
      Makefile.system
  2. +1
    -0
      cmake/fc.cmake
  3. +9
    -0
      cmake/system.cmake
  4. +5
    -1
      exports/Makefile
  5. +4
    -12
      kernel/arm64/sgemm_kernel_8x8_cortexa53.S

+ 7
- 6
Makefile.system View File

@@ -797,11 +797,11 @@ endif
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -fopenmp FCOMMON_OPT += -fopenmp
endif endif
ifeq ($(OSNAME), Linux)
ifeq ($(ARCH), x86_64)
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
endif
endif
#ifeq ($(OSNAME), Linux)
#ifeq ($(ARCH), x86_64)
#FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
#endif
#endif
endif endif


ifeq ($(F_COMPILER), G77) ifeq ($(F_COMPILER), G77)
@@ -1276,7 +1276,8 @@ endif


override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF) override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
ifeq ($(FLANG_VENDOR),AOCC)
#ifeq ($(FLANG_VENDOR),AOCC)
ifeq ($(F_COMPILER),FLANG)
override FFLAGS += $(filter-out -O2 -O3,$(COMMON_OPT)) -O1 $(FCOMMON_OPT) override FFLAGS += $(filter-out -O2 -O3,$(COMMON_OPT)) -O1 $(FCOMMON_OPT)
else else
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)


+ 1
- 0
cmake/fc.cmake View File

@@ -16,6 +16,7 @@ if (${F_COMPILER} STREQUAL "FLANG")
if (USE_OPENMP) if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp") set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
endif () endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -frecursive")
endif () endif ()


if (${F_COMPILER} STREQUAL "G77") if (${F_COMPILER} STREQUAL "G77")


+ 9
- 0
cmake/system.cmake View File

@@ -417,6 +417,15 @@ if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE") set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
endif () endif ()


if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if ("${F_COMPILER}" STREQUAL "FLANG")
set(FILTER_FLAGS "-O2;-O3")
foreach (FILTER_FLAG ${FILTER_FLAGS})
string(REPLACE ${FILTER_FLAG} "-O1" CMAKE_Fortran_FLAGS_RELEASE ${CMAKE_Fortran_FLAGS_RELEASE})
endforeach ()
endif ()
endif ()

if (NOT DEFINED SUFFIX) if (NOT DEFINED SUFFIX)
set(SUFFIX o) set(SUFFIX o)
endif () endif ()


+ 5
- 1
exports/Makefile View File

@@ -155,8 +155,12 @@ ifeq ($(F_COMPILER), INTEL)
-Wl,--whole-archive $< -Wl,--no-whole-archive \ -Wl,--whole-archive $< -Wl,--no-whole-archive \
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB) -Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
else ifeq ($(F_COMPILER), FLANG)
$(FC) $(FFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive $< -Wl,--no-whole-archive \
-Wl,-soname,$(INTERNALNAME) $(EXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK.
else else

ifneq ($(C_COMPILER), LSB) ifneq ($(C_COMPILER), LSB)
$(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \
-Wl,--whole-archive $< -Wl,--no-whole-archive \ -Wl,--whole-archive $< -Wl,--no-whole-archive \


+ 4
- 12
kernel/arm64/sgemm_kernel_8x8_cortexa53.S View File

@@ -681,12 +681,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT8x4 .macro INIT8x4
fmov s16, wzr fmov s16, wzr
fmov s17, wzr fmov s17, wzr
fmov s18, wzr
fmov s19, s16
fmov s20, wzr fmov s20, wzr
fmov s21, s16 fmov s21, s16
fmov s24, wzr
fmov s25, s16
fmov s28, wzr
fmov s29, s16
fmov s22, wzr
fmov s23, s16
.endm .endm


.macro KERNEL8x4_I .macro KERNEL8x4_I
@@ -765,14 +765,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmla v21.4s, v3.4s, v6.s[2] fmla v21.4s, v3.4s, v6.s[2]
fmla v22.4s, v2.4s, v6.s[3] fmla v22.4s, v2.4s, v6.s[3]
fmla v23.4s, v3.4s, v6.s[3] fmla v23.4s, v3.4s, v6.s[3]
fmla v24.4s, v2.4s, v7.s[0]
fmla v25.4s, v3.4s, v7.s[0]
fmla v26.4s, v2.4s, v7.s[1]
fmla v27.4s, v3.4s, v7.s[1]
fmla v28.4s, v2.4s, v7.s[2]
fmla v29.4s, v3.4s, v7.s[2]
fmla v30.4s, v2.4s, v7.s[3]
fmla v31.4s, v3.4s, v7.s[3]
.endm .endm


.macro KERNEL8x4_SUB .macro KERNEL8x4_SUB


Loading…
Cancel
Save