# HG changeset patch # User Taylor R Campbell # Date 1590115443 0 # Fri May 22 02:44:03 2020 +0000 # Branch trunk # Node ID f7490c54d671cb1d3e3bbe8b67cc178ee79a2e2e # Parent 21bda4d546c91ad2467383e1a56ed41ec9c16e7c # EXP-Topic riastradh-sdmmc-task-fixes Fix races in sdmmc tasks and teach ld@sdmmc to abort xfers on detach. - Teach sdmmc_add_task to queue it only if not already queued. - Remove now-redundant logic to avoid repeated queueing elsewhere. - Teach sdmmc_del_task to wait until task has completed. - Call sdmmc_del_task in various needful places. - Replace abuse of pcq by a lock and a tailq. (pcq is multi-producer, _single_-consumer, but there are potentially multiple consumers here and really only one producer.) - Teach ld_sdmmc to abort xfers on detach. XXX Currently ld_sdmmc_detach aborts xfers _before_ ldbegindetach has has committed to detaching or not. This is currently necessary to avoid a deadlock because ldbegindetach waits for xfers to drain -- which strikes me as wrong; ldbegindetach shouldn't wait for anything, and should only make the decision to commit to detaching or not so the caller can decide whether to abort xfers before we actually wait for them in ldenddetach. diff -r 21bda4d546c9 -r f7490c54d671 sys/dev/sdmmc/if_bwfm_sdio.c --- a/sys/dev/sdmmc/if_bwfm_sdio.c Thu May 21 16:50:25 2020 +0000 +++ b/sys/dev/sdmmc/if_bwfm_sdio.c Fri May 22 02:44:03 2020 +0000 @@ -69,7 +69,6 @@ enum bwfm_sdio_clkstate { struct bwfm_sdio_softc { struct bwfm_softc sc_sc; kmutex_t sc_lock; - kmutex_t sc_intr_lock; bool sc_bwfm_attached; @@ -361,10 +360,8 @@ bwfm_sdio_attach(device_t parent, device mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&sc->sc_rxctl_cv, "bwfmctl"); - mutex_init(&sc->sc_intr_lock, MUTEX_DEFAULT, IPL_NONE); sdmmc_init_task(&sc->sc_task, bwfm_sdio_task, sc); - sc->sc_task_queued = false; sc->sc_bounce_size = 64 * 1024; sc->sc_bounce_buf = kmem_alloc(sc->sc_bounce_size, KM_SLEEP); @@ -620,10 +617,11 @@ bwfm_sdio_detach(device_t self, int flag if (sc->sc_bwfm_attached) bwfm_detach(&sc->sc_sc, flags); + sdmmc_del_task(sc->sc_sf[1]->sc, &sc->sc_task, NULL); + kmem_free(sc->sc_sf, sc->sc_sf_size); kmem_free(sc->sc_bounce_buf, sc->sc_bounce_size); - mutex_destroy(&sc->sc_intr_lock); cv_destroy(&sc->sc_rxctl_cv); mutex_destroy(&sc->sc_lock); @@ -1426,11 +1424,7 @@ bwfm_sdio_intr1(void *v, const char *nam DPRINTF(("%s: %s\n", DEVNAME(sc), name)); - mutex_enter(&sc->sc_intr_lock); - if (!sdmmc_task_pending(&sc->sc_task)) - sdmmc_add_task(sc->sc_sf[1]->sc, &sc->sc_task); - sc->sc_task_queued = true; - mutex_exit(&sc->sc_intr_lock); + sdmmc_add_task(sc->sc_sf[1]->sc, &sc->sc_task); return 1; } @@ -1444,33 +1438,13 @@ static void bwfm_sdio_task(void *v) { struct bwfm_sdio_softc *sc = (void *)v; + + mutex_enter(&sc->sc_lock); + bwfm_sdio_task1(sc); #ifdef BWFM_DEBUG - unsigned count = 0; -#endif - - mutex_enter(&sc->sc_intr_lock); - while (sc->sc_task_queued) { -#ifdef BWFM_DEBUG - ++count; + bwfm_sdio_debug_console(sc); #endif - sc->sc_task_queued = false; - mutex_exit(&sc->sc_intr_lock); - - mutex_enter(&sc->sc_lock); - bwfm_sdio_task1(sc); -#ifdef BWFM_DEBUG - bwfm_sdio_debug_console(sc); -#endif - mutex_exit(&sc->sc_lock); - - mutex_enter(&sc->sc_intr_lock); - } - mutex_exit(&sc->sc_intr_lock); - -#ifdef BWFM_DEBUG - if (count > 1) - DPRINTF(("%s: finished %u tasks\n", DEVNAME(sc), count)); -#endif + mutex_exit(&sc->sc_lock); } static void diff -r 21bda4d546c9 -r f7490c54d671 sys/dev/sdmmc/ld_sdmmc.c --- a/sys/dev/sdmmc/ld_sdmmc.c Thu May 21 16:50:25 2020 +0000 +++ b/sys/dev/sdmmc/ld_sdmmc.c Fri May 22 02:44:03 2020 +0000 @@ -59,7 +59,7 @@ #ifdef LD_SDMMC_DEBUG #define DPRINTF(s) printf s #else -#define DPRINTF(s) /**/ +#define DPRINTF(s) __nothing #endif #define LD_SDMMC_IORETRIES 5 /* number of retries before giving up */ @@ -72,32 +72,35 @@ struct ld_sdmmc_softc; struct ld_sdmmc_task { struct sdmmc_task task; - struct ld_sdmmc_softc *task_sc; struct buf *task_bp; int task_retries; /* number of xfer retry */ struct callout task_restart_ch; - kmutex_t task_lock; - kcondvar_t task_cv; + void *task_cookie; - uintptr_t task_data; + TAILQ_ENTRY(ld_sdmmc_task) task_entry; }; struct ld_sdmmc_softc { struct ld_softc sc_ld; int sc_hwunit; - + char *sc_typename; struct sdmmc_function *sc_sf; - struct ld_sdmmc_task sc_task[LD_SDMMC_MAXTASKCNT]; - pcq_t *sc_freeq; - char *sc_typename; + + kmutex_t sc_lock; + kcondvar_t sc_cv; + TAILQ_HEAD(, ld_sdmmc_task) sc_freeq; + TAILQ_HEAD(, ld_sdmmc_task) sc_xferq; + bool sc_dying; struct evcnt sc_ev_discard; /* discard counter */ struct evcnt sc_ev_discarderr; /* discard error counter */ struct evcnt sc_ev_discardbusy; /* discard busy counter */ struct evcnt sc_ev_cachesyncbusy; /* cache sync busy counter */ + + struct ld_sdmmc_task sc_task[LD_SDMMC_MAXTASKCNT]; }; static int ld_sdmmc_match(device_t, cfdata_t, void *); @@ -117,6 +120,32 @@ static void ld_sdmmc_dodiscard(void *); CFATTACH_DECL_NEW(ld_sdmmc, sizeof(struct ld_sdmmc_softc), ld_sdmmc_match, ld_sdmmc_attach, ld_sdmmc_detach, NULL); +static struct ld_sdmmc_task * +ld_sdmmc_task_get(struct ld_sdmmc_softc *sc) +{ + struct ld_sdmmc_task *task; + + KASSERT(mutex_owned(&sc->sc_lock)); + + if (sc->sc_dying || (task = TAILQ_FIRST(&sc->sc_freeq)) == NULL) + return NULL; + TAILQ_REMOVE(&sc->sc_freeq, task, task_entry); + TAILQ_INSERT_TAIL(&sc->sc_xferq, task, task_entry); + KASSERT(task->task_bp == NULL); + + return task; +} + +static void +ld_sdmmc_task_put(struct ld_sdmmc_softc *sc, struct ld_sdmmc_task *task) +{ + + KASSERT(mutex_owned(&sc->sc_lock)); + + TAILQ_REMOVE(&sc->sc_xferq, task, task_entry); + TAILQ_INSERT_TAIL(&sc->sc_freeq, task, task_entry); + task->task_bp = NULL; +} /* ARGSUSED */ static int @@ -157,15 +186,18 @@ ld_sdmmc_attach(device_t parent, device_ evcnt_attach_dynamic(&sc->sc_ev_discardbusy, EVCNT_TYPE_MISC, NULL, device_xname(self), "sdmmc discard busy"); + mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SDMMC); + cv_init(&sc->sc_cv, "ldsdmmc"); + TAILQ_INIT(&sc->sc_freeq); + TAILQ_INIT(&sc->sc_xferq); + sc->sc_dying = false; + const int ntask = __arraycount(sc->sc_task); - sc->sc_freeq = pcq_create(ntask, KM_SLEEP); for (i = 0; i < ntask; i++) { task = &sc->sc_task[i]; task->task_sc = sc; callout_init(&task->task_restart_ch, CALLOUT_MPSAFE); - mutex_init(&task->task_lock, MUTEX_DEFAULT, IPL_NONE); - cv_init(&task->task_cv, "ldsdmmctask"); - pcq_put(sc->sc_freeq, task); + TAILQ_INSERT_TAIL(&sc->sc_freeq, task, task_entry); } sc->sc_hwunit = 0; /* always 0? */ @@ -231,19 +263,66 @@ ld_sdmmc_detach(device_t dev, int flags) { struct ld_sdmmc_softc *sc = device_private(dev); struct ld_softc *ld = &sc->sc_ld; + struct ld_sdmmc_task *task; + struct buf *bp; int rv, i; - if ((rv = ldbegindetach(ld, flags)) != 0) + /* Block new xfers. */ + sc->sc_dying = true; + + /* Abort all pending tasks. */ + mutex_enter(&sc->sc_lock); + while ((task = TAILQ_FIRST(&sc->sc_xferq)) != NULL) { + /* + * Try to abort the callout. If it already fired, it + * may have scheduled the task if so, so we have to + * continue anyway. + */ + (void)callout_halt(&task->task_restart_ch, &sc->sc_lock); + + /* Try to abort the task. If it already started, tough. */ + if (!sdmmc_del_task(sc->sc_sf->sc, &task->task, &sc->sc_lock)) + continue; + + /* + * We aborted the task while it was still pending. + * Remove it from the queue and dissociate it from any + * I/O xfer. + */ + bp = task->task_bp; + ld_sdmmc_task_put(sc, task); + + /* + * If the task was for an I/O xfer, fail the I/O xfer, + * with the softc lock dropped since this is a callback + * into arbitrary other subsystems. + */ + if (bp) { + mutex_exit(&sc->sc_lock); + bp->b_error = ENXIO; + bp->b_resid = bp->b_bcount; + lddone(&sc->sc_ld, bp); + mutex_enter(&sc->sc_lock); + } + } + mutex_exit(&sc->sc_lock); + + /* Do the ld detach dance. */ + if ((rv = ldbegindetach(ld, flags)) != 0) { + /* Detach failed -- back out. */ + sc->sc_dying = false; return rv; + } ldenddetach(ld); - for (i = 0; i < __arraycount(sc->sc_task); i++) { + KASSERT(TAILQ_EMPTY(&sc->sc_xferq)); + + for (i = 0; i < __arraycount(sc->sc_task); i++) callout_destroy(&sc->sc_task[i].task_restart_ch); - mutex_destroy(&sc->sc_task[i].task_lock); - cv_destroy(&sc->sc_task[i].task_cv); - } - pcq_destroy(sc->sc_freeq); + cv_destroy(&sc->sc_cv); + mutex_destroy(&sc->sc_lock); + evcnt_detach(&sc->sc_ev_discard); evcnt_detach(&sc->sc_ev_discarderr); evcnt_detach(&sc->sc_ev_discardbusy); @@ -256,10 +335,14 @@ static int ld_sdmmc_start(struct ld_softc *ld, struct buf *bp) { struct ld_sdmmc_softc *sc = device_private(ld->sc_dv); - struct ld_sdmmc_task *task = pcq_get(sc->sc_freeq); + struct ld_sdmmc_task *task; + int error; - if (task == NULL) - return EAGAIN; + mutex_enter(&sc->sc_lock); + if ((task = ld_sdmmc_task_get(sc)) == NULL) { + error = EAGAIN; + goto out; + } task->task_bp = bp; task->task_retries = 0; @@ -267,7 +350,11 @@ ld_sdmmc_start(struct ld_softc *ld, stru sdmmc_add_task(sc->sc_sf->sc, &task->task); - return 0; + /* Success! The xfer is now queued. */ + error = 0; + +out: mutex_exit(&sc->sc_lock); + return error; } static void @@ -335,7 +422,10 @@ ld_sdmmc_dobio(void *arg) } done: - pcq_put(sc->sc_freeq, task); + /* Dissociate the task from the I/O xfer and release it. */ + mutex_enter(&sc->sc_lock); + ld_sdmmc_task_put(sc, task); + mutex_exit(&sc->sc_lock); lddone(&sc->sc_ld, bp); } @@ -364,11 +454,14 @@ ld_sdmmc_dodiscard(void *arg) /* An error from discard is non-fatal */ error = sdmmc_mem_discard(sc->sc_sf, sblkno, sblkno + nblks - 1); - if (error != 0) + + mutex_enter(&sc->sc_lock); + if (error) sc->sc_ev_discarderr.ev_count++; else sc->sc_ev_discard.ev_count++; - pcq_put(sc->sc_freeq, task); + ld_sdmmc_task_put(sc, task); + mutex_exit(&sc->sc_lock); if (error) bp->b_error = error; @@ -380,61 +473,101 @@ static int ld_sdmmc_discard(struct ld_softc *ld, struct buf *bp) { struct ld_sdmmc_softc *sc = device_private(ld->sc_dv); - struct ld_sdmmc_task *task = pcq_get(sc->sc_freeq); + struct ld_sdmmc_task *task; + + mutex_enter(&sc->sc_lock); - if (task == NULL) { + /* Acquire a free task, or drop the request altogether. */ + if ((task = ld_sdmmc_task_get(sc)) == NULL) { sc->sc_ev_discardbusy.ev_count++; - return 0; + goto out; } + /* Set up the task and schedule it. */ task->task_bp = bp; - sdmmc_init_task(&task->task, ld_sdmmc_dodiscard, task); sdmmc_add_task(sc->sc_sf->sc, &task->task); + /* Nothing more to do -- fire and forget. */ +out: mutex_exit(&sc->sc_lock); return 0; } +struct ld_sdmmc_cachesync { + int error; + bool done; + bool poll; +}; + static void ld_sdmmc_docachesync(void *arg) { struct ld_sdmmc_task *task = arg; struct ld_sdmmc_softc *sc = task->task_sc; - const bool poll = (bool)task->task_data; + struct ld_sdmmc_cachesync *C = task->task_cookie; + int error; - task->task_data = sdmmc_mem_flush_cache(sc->sc_sf, poll); + /* Flush the cache. */ + error = sdmmc_mem_flush_cache(sc->sc_sf, C->poll); - mutex_enter(&task->task_lock); - cv_signal(&task->task_cv); - mutex_exit(&task->task_lock); + /* Notify the other thread that we're done; pass on the error. */ + mutex_enter(&sc->sc_lock); + C->error = error; + C->done = true; + cv_broadcast(&sc->sc_cv); + mutex_exit(&sc->sc_lock); } static int ld_sdmmc_cachesync(struct ld_softc *ld, bool poll) { struct ld_sdmmc_softc *sc = device_private(ld->sc_dv); - struct ld_sdmmc_task *task = pcq_get(sc->sc_freeq); - int error = 0; + struct ld_sdmmc_task *task; + struct ld_sdmmc_cachesync cachesync = { + .poll = poll, + .done = false, + .error = -1, + }, *C = &cachesync; + int error; - if (task == NULL) { + mutex_enter(&sc->sc_lock); + + /* Acquire a free task, or fail with EBUSY. */ + if ((task = ld_sdmmc_task_get(sc)) == NULL) { sc->sc_ev_cachesyncbusy.ev_count++; - return EBUSY; + error = EBUSY; + goto out; } + /* Set up the task and schedule it. */ + task->task_cookie = C; sdmmc_init_task(&task->task, ld_sdmmc_docachesync, task); - task->task_data = poll; - mutex_enter(&task->task_lock); sdmmc_add_task(sc->sc_sf->sc, &task->task); - error = cv_wait_sig(&task->task_cv, &task->task_lock); - mutex_exit(&task->task_lock); + + /* Wait for the task to complete. If interrupted, abort the task. */ + error = 0; + while (!C->done) { + error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); + if (error) { + sdmmc_del_task(sc->sc_sf->sc, &task->task, + &sc->sc_lock); + break; + } + } - if (error == 0) - error = (int)task->task_data; + /* + * If the task completed -- whether or not we were interrupted + * -- get its error. + */ + if (C->done) + error = C->error; - pcq_put(sc->sc_freeq, task); + /* Release the task for reuse. */ + ld_sdmmc_task_put(sc, task); +out: mutex_exit(&sc->sc_lock); return error; } diff -r 21bda4d546c9 -r f7490c54d671 sys/dev/sdmmc/sdmmc.c --- a/sys/dev/sdmmc/sdmmc.c Thu May 21 16:50:25 2020 +0000 +++ b/sys/dev/sdmmc/sdmmc.c Fri May 22 02:44:03 2020 +0000 @@ -152,7 +152,6 @@ sdmmc_attach(device_t parent, device_t s mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE); mutex_init(&sc->sc_tskq_mtx, MUTEX_DEFAULT, IPL_SDMMC); mutex_init(&sc->sc_discover_task_mtx, MUTEX_DEFAULT, IPL_SDMMC); - mutex_init(&sc->sc_intr_task_mtx, MUTEX_DEFAULT, IPL_SDMMC); cv_init(&sc->sc_tskq_cv, "mmctaskq"); evcnt_attach_dynamic(&sc->sc_ev_xfer, EVCNT_TYPE_MISC, NULL, @@ -227,8 +226,10 @@ sdmmc_detach(device_t self, int flags) callout_destroy(&sc->sc_card_detect_ch); } + sdmmc_del_task(sc, &sc->sc_intr_task, NULL); + sdmmc_del_task(sc, &sc->sc_discover_task, NULL); + cv_destroy(&sc->sc_tskq_cv); - mutex_destroy(&sc->sc_intr_task_mtx); mutex_destroy(&sc->sc_discover_task_mtx); mutex_destroy(&sc->sc_tskq_mtx); mutex_destroy(&sc->sc_mtx); @@ -258,32 +259,58 @@ sdmmc_add_task(struct sdmmc_softc *sc, s { mutex_enter(&sc->sc_tskq_mtx); + if (task->sc == sc) { + KASSERT(task->onqueue); + goto out; + } + KASSERT(task->sc == NULL); + KASSERT(!task->onqueue); task->onqueue = 1; task->sc = sc; TAILQ_INSERT_TAIL(&sc->sc_tskq, task, next); cv_broadcast(&sc->sc_tskq_cv); - mutex_exit(&sc->sc_tskq_mtx); +out: mutex_exit(&sc->sc_tskq_mtx); } static inline void sdmmc_del_task1(struct sdmmc_softc *sc, struct sdmmc_task *task) { + KASSERT(mutex_owned(&sc->sc_tskq_mtx)); + TAILQ_REMOVE(&sc->sc_tskq, task, next); task->sc = NULL; task->onqueue = 0; } -void -sdmmc_del_task(struct sdmmc_task *task) +bool +sdmmc_del_task(struct sdmmc_softc *sc, struct sdmmc_task *task, + kmutex_t *interlock) { - struct sdmmc_softc *sc = (struct sdmmc_softc *)task->sc; + bool cancelled; - if (sc != NULL) { - mutex_enter(&sc->sc_tskq_mtx); + mutex_enter(&sc->sc_tskq_mtx); + if (task->sc == sc) { + KASSERT(task->onqueue); + KASSERT(sc->sc_curtask != task); sdmmc_del_task1(sc, task); - mutex_exit(&sc->sc_tskq_mtx); + cancelled = true; + } else { + KASSERT(task->sc == NULL); + KASSERT(!task->onqueue); + mutex_exit(interlock); + while (sc->sc_curtask == task) + cv_wait(&sc->sc_tskq_cv, &sc->sc_tskq_mtx); + if (!mutex_tryenter(interlock)) { + mutex_exit(&sc->sc_tskq_mtx); + mutex_enter(interlock); + mutex_enter(&sc->sc_tskq_mtx); + } + cancelled = false; } + mutex_exit(&sc->sc_tskq_mtx); + + return cancelled; } static void @@ -300,9 +327,12 @@ sdmmc_task_thread(void *arg) task = TAILQ_FIRST(&sc->sc_tskq); if (task != NULL) { sdmmc_del_task1(sc, task); + sc->sc_curtask = task; mutex_exit(&sc->sc_tskq_mtx); (*task->func)(task->arg); mutex_enter(&sc->sc_tskq_mtx); + sc->sc_curtask = NULL; + cv_broadcast(&sc->sc_tskq_cv); } else { /* Check for the exit condition. */ if (sc->sc_dying) @@ -335,10 +365,7 @@ sdmmc_needs_discover(device_t dev) if (!ISSET(sc->sc_flags, SMF_INITED)) return; - mutex_enter(&sc->sc_discover_task_mtx); - if (!sdmmc_task_pending(&sc->sc_discover_task)) - sdmmc_add_task(sc, &sc->sc_discover_task); - mutex_exit(&sc->sc_discover_task_mtx); + sdmmc_add_task(sc, &sc->sc_discover_task); } static void diff -r 21bda4d546c9 -r f7490c54d671 sys/dev/sdmmc/sdmmc_io.c --- a/sys/dev/sdmmc/sdmmc_io.c Thu May 21 16:50:25 2020 +0000 +++ b/sys/dev/sdmmc/sdmmc_io.c Fri May 22 02:44:03 2020 +0000 @@ -804,10 +804,7 @@ sdmmc_card_intr(device_t dev) if (sc->sc_sct->card_enable_intr == NULL) return; - mutex_enter(&sc->sc_intr_task_mtx); - if (!sdmmc_task_pending(&sc->sc_intr_task)) - sdmmc_add_task(sc, &sc->sc_intr_task); - mutex_exit(&sc->sc_intr_task_mtx); + sdmmc_add_task(sc, &sc->sc_intr_task); } void diff -r 21bda4d546c9 -r f7490c54d671 sys/dev/sdmmc/sdmmcvar.h --- a/sys/dev/sdmmc/sdmmcvar.h Thu May 21 16:50:25 2020 +0000 +++ b/sys/dev/sdmmc/sdmmcvar.h Fri May 22 02:44:03 2020 +0000 @@ -89,8 +89,6 @@ do { \ (xtask)->sc = NULL; \ } while (/*CONSTCOND*/0) -#define sdmmc_task_pending(xtask) ((xtask)->onqueue) - struct sdmmc_command { struct sdmmc_task c_task; /* task queue entry */ uint16_t c_opcode; /* SD or MMC command index */ @@ -267,6 +265,7 @@ struct sdmmc_softc { TAILQ_HEAD(, sdmmc_task) sc_tskq; /* task thread work queue */ struct kmutex sc_tskq_mtx; struct kcondvar sc_tskq_cv; + struct sdmmc_task *sc_curtask; /* discover task */ struct sdmmc_task sc_discover_task; /* card attach/detach task */ @@ -274,7 +273,6 @@ struct sdmmc_softc { /* interrupt task */ struct sdmmc_task sc_intr_task; /* card interrupt task */ - struct kmutex sc_intr_task_mtx; TAILQ_HEAD(, sdmmc_intr_handler) sc_intrq; /* interrupt handlers */ u_int sc_clkmin; /* host min bus clock */ @@ -327,7 +325,7 @@ extern int sdmmcdebug; #endif void sdmmc_add_task(struct sdmmc_softc *, struct sdmmc_task *); -void sdmmc_del_task(struct sdmmc_task *); +bool sdmmc_del_task(struct sdmmc_softc *, struct sdmmc_task *, kmutex_t *); struct sdmmc_function *sdmmc_function_alloc(struct sdmmc_softc *); void sdmmc_function_free(struct sdmmc_function *);