Fix instabilities in CGEMM/CTRMM/DNRM2 on Apple M1/M2 under OSXtags/v0.3.24
| @@ -267,9 +267,9 @@ int detect(void) | |||||
| } | } | ||||
| #else | #else | ||||
| #ifdef __APPLE__ | #ifdef __APPLE__ | ||||
| sysctlbyname("hw.cpufamily",&value,&length,NULL,0); | |||||
| if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1 | |||||
| if (value == 3660830781) return CPU_VORTEX; //A15/M2 | |||||
| sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0); | |||||
| if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1 | |||||
| if (value64 == 3660830781) return CPU_VORTEX; //A15/M2 | |||||
| #endif | #endif | ||||
| return CPU_ARMV8; | return CPU_ARMV8; | ||||
| #endif | #endif | ||||
| @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define pCRow3 x15 | #define pCRow3 x15 | ||||
| #define pA x16 | #define pA x16 | ||||
| #define alphaR w17 | #define alphaR w17 | ||||
| #define alphaI w18 | |||||
| #define alphaI w19 | |||||
| #define alpha0_R s10 | #define alpha0_R s10 | ||||
| #define alphaV0_R v10.s[0] | #define alphaV0_R v10.s[0] | ||||
| @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define pCRow3 x15 | #define pCRow3 x15 | ||||
| #define pA x16 | #define pA x16 | ||||
| #define alphaR w17 | #define alphaR w17 | ||||
| #define alphaI w18 | |||||
| #define alphaI w19 | |||||
| #define alpha0_R s10 | #define alpha0_R s10 | ||||
| #define alphaV0_R v10.s[0] | #define alphaV0_R v10.s[0] | ||||
| @@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define pCRow3 x15 | #define pCRow3 x15 | ||||
| #define pA x16 | #define pA x16 | ||||
| #define alphaR w17 | #define alphaR w17 | ||||
| #define alphaI w18 | |||||
| #define temp x19 | |||||
| #define tempOffset x20 | |||||
| #define tempK x21 | |||||
| #define alphaI w19 | |||||
| #define temp x20 | |||||
| #define tempOffset x21 | |||||
| #define tempK x22 | |||||
| #define alpha0_R s10 | #define alpha0_R s10 | ||||
| #define alphaV0_R v10.s[0] | #define alphaV0_R v10.s[0] | ||||
| @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "common.h" | #include "common.h" | ||||
| #include <float.h> | |||||
| #include <arm_neon.h> | #include <arm_neon.h> | ||||
| #if defined(SMP) | #if defined(SMP) | ||||
| @@ -404,7 +404,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) | |||||
| #else | #else | ||||
| nrm2_compute(n, x, inc_x, &ssq, &scale); | nrm2_compute(n, x, inc_x, &ssq, &scale); | ||||
| #endif | #endif | ||||
| if (fabs(scale) <1.e-300) return 0.; | |||||
| volatile FLOAT sca = fabs(scale); | |||||
| if (sca < DBL_MIN) return 0.; | |||||
| ssq = sqrt(ssq) * scale; | ssq = sqrt(ssq) * scale; | ||||
| return ssq; | return ssq; | ||||