|
|
|
@@ -0,0 +1,269 @@ |
|
|
|
/***************************************************************************** |
|
|
|
Copyright (c) 2024, The OpenBLAS Project |
|
|
|
All rights reserved. |
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without |
|
|
|
modification, are permitted provided that the following conditions are |
|
|
|
met: |
|
|
|
|
|
|
|
1. Redistributions of source code must retain the above copyright |
|
|
|
notice, this list of conditions and the following disclaimer. |
|
|
|
|
|
|
|
2. Redistributions in binary form must reproduce the above copyright |
|
|
|
notice, this list of conditions and the following disclaimer in |
|
|
|
the documentation and/or other materials provided with the |
|
|
|
distribution. |
|
|
|
3. Neither the name of the OpenBLAS project nor the names of |
|
|
|
its contributors may be used to endorse or promote products |
|
|
|
derived from this software without specific prior written |
|
|
|
permission. |
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|
|
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
|
|
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
|
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE |
|
|
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
|
|
**********************************************************************************/ |
|
|
|
|
|
|
|
#include <stdbool.h> |
|
|
|
|
|
|
|
#include "common.h" |
|
|
|
|
|
|
|
/* |
|
|
|
* OpenBLAS contains some kernels that are optimised for RVV 1.0. Before we |
|
|
|
* can use these kernels we need to determine whether the device supports |
|
|
|
* RVV 1.0 and what the device's VLEN is. Our strategy will be as follows. |
|
|
|
* |
|
|
|
* First we'll invoke the hwprobe syscall to detect RVV 1.0. In an ideal world, |
|
|
|
* this is all we should need to do. If the syscall is not implemented we |
|
|
|
* should be able to deduce that RVV 1.0 is not supported (as it was added to |
|
|
|
* Linux after hwprobe) and if the syscall is implemented we can use it to |
|
|
|
* determine whether RVV 1.0 is supported. However, there are some riscv64 |
|
|
|
* boards out there that implement RVV 1.0 but ship with a Linux kernel that |
|
|
|
* predates RVV vector support and hwprobe support. These kernels contain |
|
|
|
* the backported RVV patches but not the hwprobe patches and so they |
|
|
|
* advertise support for RVV via hwcap. To cater for these boards we need |
|
|
|
* to fall back to hwcap if hwprobe is not supported. Unfortunately, some |
|
|
|
* boards indicate support for RVV via hwcap even though they only support |
|
|
|
* RVV 0.7.1, which is incompatible with RVV 1.0. So an additional check is |
|
|
|
* required to test if the devices advertising support for RVV via hwcap really |
|
|
|
* support RVV 1.0. This test works by executing a vsetvli instruction that |
|
|
|
* sets the tail agnostic and mask agnostic bits in the vtype register. |
|
|
|
* These bits are not supported prior to RVV 0.9 so will cause the VIL bit to |
|
|
|
* be set on the VTYPE register in CPUs supporting 0.7.1. If this bit is set |
|
|
|
* we can determine that RVV 1.0 is not supported. |
|
|
|
* |
|
|
|
* This approach is borrowed from |
|
|
|
* VideoLan dav1d: |
|
|
|
* (https://code.videolan.org/videolan/dav1d/-/merge_requests/1629). |
|
|
|
* |
|
|
|
* We assume that if a kernel reports the presence of RVV via hwcap that |
|
|
|
* the device supports the vsetvli instruction. |
|
|
|
* |
|
|
|
* For now we're just going to invoke the hwprobe syscall directly, rather than |
|
|
|
* invoking it through glibc. Support for hwprobe has been added to glibc but |
|
|
|
* at the time of writing this support has not yet been included in a glibc |
|
|
|
* release. Once it has, it will be better to invoke hwprobe via glibc as doing |
|
|
|
* so should take advantage of the vdso entry and be more efficient. |
|
|
|
*/ |
|
|
|
|
|
|
|
/* |
|
|
|
* This should work on Android as well but I have no way of testing. |
|
|
|
*/ |
|
|
|
|
|
|
|
#if defined(OS_LINUX) |
|
|
|
#include <unistd.h> |
|
|
|
#include <sys/syscall.h> |
|
|
|
#include <stdint.h> |
|
|
|
#include <sys/auxv.h> |
|
|
|
|
|
|
|
#define DETECT_RISCV64_HWCAP_ISA_V (1 << ('V' - 'A')) |
|
|
|
|
|
|
|
struct riscv_hwprobe { |
|
|
|
int64_t key; |
|
|
|
uint64_t value; |
|
|
|
}; |
|
|
|
|
|
|
|
/* The constants below are copied from |
|
|
|
* /usr/include/riscv64-linux-gnu/asm/hwprobe.h. We duplicate the |
|
|
|
* constants as the header file from which they are copied will only |
|
|
|
* be present if we're building on a device with Linux 6.5 or greater. |
|
|
|
*/ |
|
|
|
|
|
|
|
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 |
|
|
|
#define RISCV_HWPROBE_IMA_V (1 << 2) |
|
|
|
|
|
|
|
#ifndef NR_riscv_hwprobe |
|
|
|
#ifndef NR_arch_specific_syscall |
|
|
|
#define NR_arch_specific_syscall 244 |
|
|
|
#endif |
|
|
|
#define NR_riscv_hwprobe (NR_arch_specific_syscall + 14) |
|
|
|
#endif |
|
|
|
#endif // defined(OS_LINUX) |
|
|
|
|
|
|
|
unsigned detect_riscv64_get_vlenb(void); |
|
|
|
uint64_t detect_riscv64_rvv100(void); |
|
|
|
|
|
|
|
extern gotoblas_t gotoblas_RISCV64_GENERIC; |
|
|
|
#if !defined(DYNAMIC_LIST) || defined(DYN_RISCV64_ZVL256B) |
|
|
|
extern gotoblas_t gotoblas_RISCV64_ZVL256B; |
|
|
|
#endif |
|
|
|
#if !defined(DYNAMIC_LIST) || defined(DYN_RISCV64_ZVL128B) |
|
|
|
extern gotoblas_t gotoblas_RISCV64_ZVL128B; |
|
|
|
#endif |
|
|
|
|
|
|
|
#define CPU_GENERIC 0 |
|
|
|
#define CPU_RISCV64_ZVL256B 1 |
|
|
|
#define CPU_RISCV64_ZVL128B 2 |
|
|
|
|
|
|
|
static char *cpuname[] = { |
|
|
|
"riscv64_generic", |
|
|
|
"riscv64_zvl256b", |
|
|
|
"riscv64_zvl128b" |
|
|
|
}; |
|
|
|
#define NUM_CORETYPES (sizeof(cpuname)/sizeof(char*)) |
|
|
|
|
|
|
|
extern int openblas_verbose(void); |
|
|
|
extern void openblas_warning(int verbose, const char* msg); |
|
|
|
|
|
|
|
char* gotoblas_corename(void) { |
|
|
|
#if !defined(DYNAMIC_LIST) || defined(DYN_RISCV64_ZVL256B) |
|
|
|
if (gotoblas == &gotoblas_RISCV64_ZVL256B) |
|
|
|
return cpuname[CPU_RISCV64_ZVL256B]; |
|
|
|
#endif |
|
|
|
#if !defined(DYNAMIC_LIST) || defined(DYN_RISCV64_ZVL128B) |
|
|
|
if (gotoblas == &gotoblas_RISCV64_ZVL128B) |
|
|
|
return cpuname[CPU_RISCV64_ZVL128B]; |
|
|
|
#endif |
|
|
|
if (gotoblas == &gotoblas_RISCV64_GENERIC) |
|
|
|
return cpuname[CPU_GENERIC]; |
|
|
|
|
|
|
|
return "unknown"; |
|
|
|
} |
|
|
|
|
|
|
|
static gotoblas_t* get_coretype(void) { |
|
|
|
unsigned vlenb = 0; |
|
|
|
|
|
|
|
#if !defined(OS_LINUX) |
|
|
|
return NULL; |
|
|
|
#else |
|
|
|
|
|
|
|
/* |
|
|
|
* See the hwprobe documentation |
|
|
|
* |
|
|
|
* ( https://docs.kernel.org/arch/riscv/hwprobe.html ) |
|
|
|
* for more details. |
|
|
|
*/ |
|
|
|
|
|
|
|
struct riscv_hwprobe pairs[] = { |
|
|
|
{ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, }, |
|
|
|
}; |
|
|
|
int ret = syscall(NR_riscv_hwprobe, pairs, 1, 0, NULL, 0); |
|
|
|
if (ret == 0) { |
|
|
|
if (!(pairs[0].value & RISCV_HWPROBE_IMA_V)) |
|
|
|
return NULL; |
|
|
|
} else { |
|
|
|
if (!(getauxval(AT_HWCAP) & DETECT_RISCV64_HWCAP_ISA_V)) |
|
|
|
return NULL; |
|
|
|
|
|
|
|
if (!detect_riscv64_rvv100()) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
* RVV 1.0 is supported. We now just need to determine the coretype |
|
|
|
* based on the VLEN. |
|
|
|
*/ |
|
|
|
|
|
|
|
vlenb = detect_riscv64_get_vlenb(); |
|
|
|
|
|
|
|
if (vlenb < 16) |
|
|
|
return NULL; |
|
|
|
#if !defined(DYNAMIC_LIST) || defined(DYN_RISCV64_ZVL256B) |
|
|
|
if (vlenb >= 32) |
|
|
|
return &gotoblas_RISCV64_ZVL256B; |
|
|
|
#endif |
|
|
|
|
|
|
|
#if !defined(DYNAMIC_LIST) || defined(DYN_RISCV64_ZVL128B) |
|
|
|
return &gotoblas_RISCV64_ZVL128B; |
|
|
|
#else |
|
|
|
return NULL; |
|
|
|
#endif |
|
|
|
|
|
|
|
#endif // !defined(OS_LINUX) |
|
|
|
} |
|
|
|
|
|
|
|
static gotoblas_t* force_coretype(char* coretype) { |
|
|
|
size_t i; |
|
|
|
char message[128]; |
|
|
|
|
|
|
|
for (i = 0; i < NUM_CORETYPES && strcasecmp(coretype, cpuname[i]); i++); |
|
|
|
|
|
|
|
if (i == CPU_GENERIC) |
|
|
|
return &gotoblas_RISCV64_GENERIC; |
|
|
|
|
|
|
|
if (i == CPU_RISCV64_ZVL256B) { |
|
|
|
#if !defined(DYNAMIC_LIST) || defined(DYN_RISCV64_ZVL256B) |
|
|
|
return &gotoblas_RISCV64_ZVL256B; |
|
|
|
#else |
|
|
|
openblas_warning(1, |
|
|
|
"riscv64_zvl256b support not compiled in\n"); |
|
|
|
return NULL; |
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
if (i == CPU_RISCV64_ZVL128B) { |
|
|
|
#if !defined(DYNAMIC_LIST) || defined(DYN_RISCV64_ZVL128B) |
|
|
|
return &gotoblas_RISCV64_ZVL128B; |
|
|
|
#else |
|
|
|
openblas_warning(1, |
|
|
|
"riscv64_zvl128b support not compiled in\n"); |
|
|
|
return NULL; |
|
|
|
#endif |
|
|
|
} |
|
|
|
|
|
|
|
snprintf(message, sizeof(message), "Core not found: %s\n", coretype); |
|
|
|
openblas_warning(1, message); |
|
|
|
|
|
|
|
return NULL; |
|
|
|
} |
|
|
|
|
|
|
|
void gotoblas_dynamic_init(void) { |
|
|
|
|
|
|
|
char coremsg[128]; |
|
|
|
char* p; |
|
|
|
|
|
|
|
if (gotoblas) return; |
|
|
|
|
|
|
|
p = getenv("OPENBLAS_CORETYPE"); |
|
|
|
if (p) |
|
|
|
gotoblas = force_coretype(p); |
|
|
|
else |
|
|
|
gotoblas = get_coretype(); |
|
|
|
|
|
|
|
if (!gotoblas) { |
|
|
|
snprintf(coremsg, sizeof(coremsg), "Falling back to generic riscv64 core\n"); |
|
|
|
openblas_warning(1, coremsg); |
|
|
|
gotoblas = &gotoblas_RISCV64_GENERIC; |
|
|
|
} |
|
|
|
|
|
|
|
if (gotoblas->init) { |
|
|
|
snprintf(coremsg, sizeof(coremsg), "Core: %s\n", |
|
|
|
gotoblas_corename()); |
|
|
|
openblas_warning(2, coremsg); |
|
|
|
gotoblas->init(); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); |
|
|
|
exit(1); |
|
|
|
} |
|
|
|
|
|
|
|
void gotoblas_dynamic_quit(void) { |
|
|
|
gotoblas = NULL; |
|
|
|
} |