diff -r 14228ccdcddb -r 2d0f42cbc645 share/man/man9/workqueue.9 --- a/share/man/man9/workqueue.9 Fri Jul 31 02:02:15 2020 +0000 +++ b/share/man/man9/workqueue.9 Fri Jul 31 18:29:28 2020 +0000 @@ -83,6 +83,11 @@ The highest IPL at which this workqueue The value of 0 indicates a standard create operation, however the following flags may be bitwise ORed together: .Bl -tag -width WQ_MPSAFE +.It Dv WQ_FPU +Specifies that the kthread must be allowed to use any machine-dependent +per-CPU floating-point units or SIMD vector units, as in +.Xr kthread_fpu_enter 9 / Xr kthread_fpu_exit 9 , +when it executes the worker function.u .It Dv WQ_MPSAFE Specifies that the workqueue is multiprocessor safe and does its own locking; otherwise the kernel lock will be held while processing work. diff -r 14228ccdcddb -r 2d0f42cbc645 sys/arch/aarch64/aarch64/fpu.c --- a/sys/arch/aarch64/aarch64/fpu.c Fri Jul 31 02:02:15 2020 +0000 +++ b/sys/arch/aarch64/aarch64/fpu.c Fri Jul 31 18:29:28 2020 +0000 @@ -35,6 +35,8 @@ #include #include +#include +#include #include #include @@ -176,12 +178,30 @@ fpu_state_release(lwp_t *l) __asm __volatile ("isb"); } +static const struct fpreg zero_fpreg; + +/* + * True if this is a system thread with its own private FPU state. + */ +static inline bool +lwp_system_fpu_p(struct lwp *l) +{ + + return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == + (LW_SYSTEM|LW_SYSTEM_FPU); +} + void fpu_kern_enter(void) { struct cpu_info *ci; int s; + if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { + KASSERT(!cpu_softintr_p()); + return; + } + /* * Block interrupts up to IPL_VM. We must block preemption * since -- if this is a user thread -- there is nowhere to @@ -209,10 +229,16 @@ fpu_kern_enter(void) void fpu_kern_leave(void) { - static const struct fpreg zero_fpreg; - struct cpu_info *ci = curcpu(); + struct cpu_info *ci; int s; + if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { + KASSERT(!cpu_softintr_p()); + return; + } + + ci = curcpu(); + KASSERT(ci->ci_cpl == IPL_VM); KASSERT(ci->ci_kfpu_spl != -1); @@ -234,3 +260,19 @@ fpu_kern_leave(void) ci->ci_kfpu_spl = -1; splx(s); } + +void +kthread_fpu_enter_md(void) +{ + + fpu_load(curlwp); +} + +void +kthread_fpu_exit_md(void) +{ + + /* XXX Should fpu_state_release zero the registers itself? */ + load_fpregs(&zero_fpreg); + fpu_discard(curlwp, 0); +} diff -r 14228ccdcddb -r 2d0f42cbc645 sys/arch/aarch64/aarch64/trap.c --- a/sys/arch/aarch64/aarch64/trap.c Fri Jul 31 02:02:15 2020 +0000 +++ b/sys/arch/aarch64/aarch64/trap.c Fri Jul 31 18:29:28 2020 +0000 @@ -242,6 +242,12 @@ trap_el1h_sync(struct trapframe *tf) break; case ESR_EC_FP_ACCESS: + if ((curlwp->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == + (LW_SYSTEM|LW_SYSTEM_FPU)) { + fpu_load(curlwp); + break; + } + /*FALLTHROUGH*/ case ESR_EC_FP_TRAP_A64: case ESR_EC_PC_ALIGNMENT: case ESR_EC_SP_ALIGNMENT: diff -r 14228ccdcddb -r 2d0f42cbc645 sys/arch/arm/vfp/vfp_init.c --- a/sys/arch/arm/vfp/vfp_init.c Fri Jul 31 02:02:15 2020 +0000 +++ b/sys/arch/arm/vfp/vfp_init.c Fri Jul 31 18:29:28 2020 +0000 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -504,7 +505,8 @@ neon_handler(u_int address, u_int insn, return 1; /* This shouldn't ever happen. */ - if (fault_code != FAULT_USER) + if (fault_code != FAULT_USER && + (curlwp->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == LW_SYSTEM) panic("NEON fault in non-user mode"); /* if we already own the FPU and it's enabled, raise SIGILL */ @@ -668,6 +670,19 @@ vfp_setcontext(struct lwp *l, const mcon sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx)); } +/* + * True if this is a system thread with its own private FPU state. + */ +static inline bool +lwp_system_fpu_p(struct lwp *l) +{ + + return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == + (LW_SYSTEM|LW_SYSTEM_FPU); +} + +static const struct vfpreg zero_vfpreg; + void fpu_kern_enter(void) { @@ -675,6 +690,11 @@ fpu_kern_enter(void) uint32_t fpexc; int s; + if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { + KASSERT(!cpu_softintr_p()); + return; + } + /* * Block interrupts up to IPL_VM. We must block preemption * since -- if this is a user thread -- there is nowhere to @@ -701,11 +721,15 @@ fpu_kern_enter(void) void fpu_kern_leave(void) { - static const struct vfpreg zero_vfpreg; struct cpu_info *ci = curcpu(); int s; uint32_t fpexc; + if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { + KASSERT(!cpu_softintr_p()); + return; + } + KASSERT(ci->ci_cpl == IPL_VM); KASSERT(ci->ci_kfpu_spl != -1); @@ -730,4 +754,20 @@ fpu_kern_leave(void) splx(s); } +void +kthread_fpu_enter_md(void) +{ + + pcu_load(&arm_vfp_ops); +} + +void +kthread_fpu_exit_md(void) +{ + + /* XXX Should vfp_state_release zero the registers itself? */ + load_vfpregs(&zero_vfpreg); + vfp_discardcontext(curlwp, 0); +} + #endif /* FPU_VFP */ diff -r 14228ccdcddb -r 2d0f42cbc645 sys/arch/x86/x86/fpu.c --- a/sys/arch/x86/x86/fpu.c Fri Jul 31 02:02:15 2020 +0000 +++ b/sys/arch/x86/x86/fpu.c Fri Jul 31 18:29:28 2020 +0000 @@ -107,6 +107,7 @@ #include #include #include +#include #include #include @@ -131,13 +132,35 @@ void fpu_switch(struct lwp *, struct lwp uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; +/* + * True if this a thread that is allowed to use the FPU -- either a + * user thread, or a system thread with LW_SYSTEM_FPU enabled. + */ +static inline bool +lwp_can_haz_fpu(struct lwp *l) +{ + + return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) != LW_SYSTEM; +} + +/* + * True if this is a system thread with its own private FPU state. + */ +static inline bool +lwp_system_fpu_p(struct lwp *l) +{ + + return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == + (LW_SYSTEM|LW_SYSTEM_FPU); +} + static inline union savefpu * fpu_lwp_area(struct lwp *l) { struct pcb *pcb = lwp_getpcb(l); union savefpu *area = &pcb->pcb_savefpu; - KASSERT((l->l_flag & LW_SYSTEM) == 0); + KASSERT(lwp_can_haz_fpu(l)); if (l == curlwp) { fpu_save(); } @@ -155,7 +178,7 @@ fpu_save_lwp(struct lwp *l) s = splvm(); if (l->l_md.md_flags & MDL_FPU_IN_CPU) { - KASSERT((l->l_flag & LW_SYSTEM) == 0); + KASSERT(lwp_can_haz_fpu(l)); fpu_area_save(area, x86_xsave_features); l->l_md.md_flags &= ~MDL_FPU_IN_CPU; } @@ -307,7 +330,7 @@ fpu_switch(struct lwp *oldlwp, struct lw cpu_index(ci), ci->ci_ilevel); if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) { - KASSERT(!(oldlwp->l_flag & LW_SYSTEM)); + KASSERT(lwp_can_haz_fpu(oldlwp)); pcb = lwp_getpcb(oldlwp); fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features); oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU; @@ -322,11 +345,11 @@ fpu_lwp_fork(struct lwp *l1, struct lwp union savefpu *fpu_save; /* Kernel threads have no FPU. */ - if (__predict_false(l2->l_flag & LW_SYSTEM)) { + if (__predict_false(!lwp_can_haz_fpu(l2))) { return; } /* For init(8). */ - if (__predict_false(l1->l_flag & LW_SYSTEM)) { + if (__predict_false(!lwp_can_haz_fpu(l1))) { memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size); return; } @@ -350,6 +373,8 @@ fpu_lwp_abandon(struct lwp *l) /* -------------------------------------------------------------------------- */ +static const union savefpu zero_fpu __aligned(64); + /* * fpu_kern_enter() * @@ -369,6 +394,11 @@ fpu_kern_enter(void) struct cpu_info *ci; int s; + if (lwp_system_fpu_p(l) && !cpu_intr_p()) { + KASSERT(!cpu_softintr_p()); + return; + } + s = splvm(); ci = curcpu(); @@ -401,10 +431,16 @@ fpu_kern_enter(void) void fpu_kern_leave(void) { - static const union savefpu zero_fpu __aligned(64); - struct cpu_info *ci = curcpu(); + struct cpu_info *ci; int s; + if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { + KASSERT(!cpu_softintr_p()); + return; + } + + ci = curcpu(); + KASSERT(ci->ci_ilevel == IPL_VM); KASSERT(ci->ci_kfpu_spl != -1); @@ -426,6 +462,23 @@ fpu_kern_leave(void) splx(s); } +void +kthread_fpu_enter_md(void) +{ + + /* Enable the FPU by clearing CR0_TS. */ + clts(); +} + +void +kthread_fpu_exit_md(void) +{ + + /* Zero the FPU state and disable the FPU by setting CR0_TS. */ + fpu_area_restore(&zero_fpu, x86_xsave_features); + stts(); +} + /* -------------------------------------------------------------------------- */ /* diff -r 14228ccdcddb -r 2d0f42cbc645 sys/dev/cgd.c --- a/sys/dev/cgd.c Fri Jul 31 02:02:15 2020 +0000 +++ b/sys/dev/cgd.c Fri Jul 31 18:29:28 2020 +0000 @@ -673,7 +673,7 @@ cgd_create_worker(void) cp = kmem_alloc(sizeof(struct pool), KM_SLEEP); error = workqueue_create(&wq, "cgd", cgd_process, NULL, - PRI_BIO, IPL_BIO, WQ_MPSAFE | WQ_PERCPU); + PRI_BIO, IPL_BIO, WQ_FPU|WQ_MPSAFE|WQ_PERCPU); if (error) { kmem_free(cp, sizeof(struct pool)); kmem_free(cw, sizeof(struct cgd_worker)); @@ -684,9 +684,8 @@ cgd_create_worker(void) cw->cw_wq = wq; pool_init(cw->cw_cpool, sizeof(struct cgd_xfer), 0, 0, 0, "cgdcpl", NULL, IPL_BIO); + mutex_init(&cw->cw_lock, MUTEX_DEFAULT, IPL_BIO); - mutex_init(&cw->cw_lock, MUTEX_DEFAULT, IPL_BIO); - return cw; } diff -r 14228ccdcddb -r 2d0f42cbc645 sys/kern/subr_pcu.c --- a/sys/kern/subr_pcu.c Fri Jul 31 02:02:15 2020 +0000 +++ b/sys/kern/subr_pcu.c Fri Jul 31 18:29:28 2020 +0000 @@ -89,6 +89,17 @@ typedef struct { extern const pcu_ops_t * const pcu_ops_md_defs[]; /* + * pcu_available_p: true if lwp is allowed to use PCU state. + */ +static inline bool +pcu_available_p(struct lwp *l) +{ + + /* XXX Not sure this is safe unless l is locked! */ + return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) != LW_SYSTEM; +} + +/* * pcu_switchpoint: release PCU state if the LWP is being run on another CPU. * This routine is called on each context switch by by mi_switch(). */ @@ -135,7 +146,7 @@ pcu_discard_all(lwp_t *l) * due to an error in the LWP creation path before it ever runs. */ KASSERT(l == curlwp || l->l_stat == LSIDL || - ((l->l_flag & LW_SYSTEM) && pcu_valid == 0)); + (!pcu_available_p(l) && pcu_valid == 0)); if (__predict_true(pcu_valid == 0)) { /* PCUs are not in use. */ @@ -174,7 +185,7 @@ pcu_save_all(lwp_t *l) * with a different LWP (forking a system LWP or doing a coredump of * a process with multiple threads) and we need to deal with that. */ - KASSERT(l == curlwp || (((l->l_flag & LW_SYSTEM) || + KASSERT(l == curlwp || ((!pcu_available_p(l) || (curlwp->l_proc == l->l_proc && l->l_stat == LSSUSPENDED)) && pcu_valid == 0)); diff -r 14228ccdcddb -r 2d0f42cbc645 sys/kern/subr_workqueue.c --- a/sys/kern/subr_workqueue.c Fri Jul 31 02:02:15 2020 +0000 +++ b/sys/kern/subr_workqueue.c Fri Jul 31 18:29:28 2020 +0000 @@ -112,10 +112,13 @@ workqueue_worker(void *cookie) { struct workqueue *wq = cookie; struct workqueue_queue *q; + int s; /* find the workqueue of this kthread */ q = workqueue_queue_lookup(wq, curlwp->l_cpu); + if (wq->wq_flags & WQ_FPU) + s = kthread_fpu_enter(); for (;;) { /* * we violate abstraction of SIMPLEQ. @@ -141,6 +144,8 @@ workqueue_worker(void *cookie) } mutex_exit(&q->q_mutex); } + if (wq->wq_flags & WQ_FPU) + kthread_fpu_exit(s); } static void diff -r 14228ccdcddb -r 2d0f42cbc645 sys/sys/workqueue.h --- a/sys/sys/workqueue.h Fri Jul 31 02:02:15 2020 +0000 +++ b/sys/sys/workqueue.h Fri Jul 31 18:29:28 2020 +0000 @@ -47,6 +47,7 @@ struct workqueue; #define WQ_MPSAFE 0x01 #define WQ_PERCPU 0x02 +#define WQ_FPU 0x04 int workqueue_create(struct workqueue **, const char *, void (*)(struct work *, void *), void *, pri_t, int, int);