diff --git a/lib/libm/arch/aarch64/fenv.c b/lib/libm/arch/aarch64/fenv.c index 6921d103cd3a..b6da2f9df381 100644 --- a/lib/libm/arch/aarch64/fenv.c +++ b/lib/libm/arch/aarch64/fenv.c @@ -63,7 +63,7 @@ __weak_alias(feupdateenv,_feupdateenv) const fenv_t __fe_dfl_env = { .__fpsr = 0, - .__fpcr = FPCR_FZ|FPCR_DN|FPCR_RN, + .__fpcr = __SHIFTIN(FPCR_RN, FPCR_RMODE), }; /* diff --git a/sys/arch/aarch64/aarch64/fpu.c b/sys/arch/aarch64/aarch64/fpu.c index 53cfc13ba047..85c728e55bbc 100644 --- a/sys/arch/aarch64/aarch64/fpu.c +++ b/sys/arch/aarch64/aarch64/fpu.c @@ -75,10 +75,62 @@ fpu_state_load(lwp_t *l, unsigned int flags) KASSERT(l == curlwp); if (__predict_false((flags & PCU_VALID) == 0)) { + uint64_t mvfr1 = reg_mvfr1_el1_read(); + bool fp16 = false; + uint32_t fpcr = 0; + + /* + * Determine whether ARMv8.2-FP16 binary16 + * floating-point arithmetic is supported. + */ + switch (__SHIFTOUT(mvfr1, MVFR1_FPHP)) { + case MVFR1_FPHP_HALF_ARITH: + fp16 = true; + break; + } + + /* Rounding mode: round to nearest, ties to even. */ + fpcr |= __SHIFTIN(FPCR_RN, FPCR_RMODE); + + /* NaN propagation or default NaN. */ + switch (__SHIFTOUT(mvfr1, MVFR1_FPDNAN)) { + case MVFR1_FPDNAN_NAN: + /* + * IEEE 754 NaN propagation supported. Don't + * enable default NaN mode. + */ + break; + default: + /* + * IEEE 754 NaN propagation not supported, so + * enable default NaN mode. + */ + fpcr |= FPCR_DN; + } + + /* Subnormal arithmetic or flush-to-zero. */ + switch (__SHIFTOUT(mvfr1, MVFR1_FPFTZ)) { + case MVFR1_FPFTZ_DENORMAL: + /* + * IEEE 754 subnormal arithmetic supported. + * Don't enable flush-to-zero mode. + */ + break; + default: + /* + * IEEE 754 subnormal arithmetic not supported, + * so enable flush-to-zero mode. If FP16 is + * supported, also enable flush-to-zero for + * binary16 arithmetic. + */ + fpcr |= FPCR_FZ; + if (fp16) + fpcr |= FPCR_FZ16; + } + /* initialize fpregs */ memset(&pcb->pcb_fpregs, 0, sizeof(pcb->pcb_fpregs)); - pcb->pcb_fpregs.fpcr = - FPCR_DN | FPCR_FZ | __SHIFTIN(FPCR_RN, FPCR_RMODE); + pcb->pcb_fpregs.fpcr = fpcr; curcpu()->ci_vfp_use.ev_count++; } else { diff --git a/sys/arch/aarch64/include/armreg.h b/sys/arch/aarch64/include/armreg.h index 2b5370bf0d03..eaecfa884e3b 100644 --- a/sys/arch/aarch64/include/armreg.h +++ b/sys/arch/aarch64/include/armreg.h @@ -102,6 +102,7 @@ AARCH64REG_WRITE_INLINE(fpcr) #define FPCR_RM 2 // Round towards Minus infinity #define FPCR_RZ 3 // Round towards Zero #define FPCR_STRIDE __BITS(21,20) +#define FPCR_FZ16 __BIT(19) // Flush-To-Zero for FP16 #define FPCR_LEN __BITS(18,16) #define FPCR_IDE __BIT(15) // Input Denormal Exception enable #define FPCR_IXE __BIT(12) // IneXact Exception enable @@ -303,9 +304,11 @@ AARCH64REG_READ_INLINE(mvfr1_el1) #define MVFR1_FPHP_NONE 0 #define MVFR1_FPHP_HALF_SINGLE 1 #define MVFR1_FPHP_HALF_DOUBLE 2 +#define MVFR1_FPHP_HALF_ARITH 3 #define MVFR1_SIMDHP __BITS(23,20) #define MVFR1_SIMDHP_NONE 0 #define MVFR1_SIMDHP_HALF 1 +#define MVFR1_SIMDHP_HALF_ARITH 3 #define MVFR1_SIMDSP __BITS(19,16) #define MVFR1_SIMDSP_NONE 0 #define MVFR1_SIMDSP_SINGLE 1