From 54c4acd9cce20f633071b4934bf42e5a024e9ceb Mon Sep 17 00:00:00 2001 From: Taylor R Campbell <riastradh@NetBSD.org> Date: Fri, 26 Aug 2022 10:30:23 +0000 Subject: [PATCH 1/2] autoconf(9): New diagnostic to detect double-detach. - Rename dv_detached -> dv_detach_committed. - Add dv_detach_done, asserted false and then set in config_detach. dv_detach_done may appear redundant with dv_del_gen, but dv_del_gen will be used to safely detect config_detach on two valid references to a device (e.g., a bus detaching its child concurrently with drvctl detaching the same child), while dv_detach_done is strictly a diagnostic to detect races in the config_detach API. Currently the config_detach API itself is unsafe, but we can add a config_detach_release function that simultaneously releases and detaches a referenced device_t; this will continue to use dv_del_gen to safely avoid multiple detach, and dv_detach_done to check for races in usage. --- sys/kern/subr_autoconf.c | 14 +++++++++----- sys/sys/device_impl.h | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c index 94656285ad49..3bc700ef9955 100644 --- a/sys/kern/subr_autoconf.c +++ b/sys/kern/subr_autoconf.c @@ -2051,6 +2051,9 @@ config_detach(device_t dev, int flags) } else rv = EOPNOTSUPP; + KASSERTMSG(!dev->dv_detach_done, "%s detached twice, error=%d", + device_xname(dev), rv); + /* * If it was not possible to detach the device, then we either * panic() (for the forced but failed case), or return an error. @@ -2060,9 +2063,9 @@ config_detach(device_t dev, int flags) * Detach failed -- likely EOPNOTSUPP or EBUSY. Driver * must not have called config_detach_commit. */ - KASSERTMSG(!dev->dv_detached, - "%s committed to detaching and then backed out", - device_xname(dev)); + KASSERTMSG(!dev->dv_detach_committed, + "%s committed to detaching and then backed out, error=%d", + device_xname(dev), rv); if (flags & DETACH_FORCE) { panic("config_detach: forced detach of %s failed (%d)", device_xname(dev), rv); @@ -2073,6 +2076,7 @@ config_detach(device_t dev, int flags) /* * The device has now been successfully detached. */ + dev->dv_detach_done = true; /* * If .ca_detach didn't commit to detach, then do that for it. @@ -2193,7 +2197,7 @@ config_detach_commit(device_t dev) "lwp %ld [%s] @ %p detaching %s", (long)l->l_lid, (l->l_name ? l->l_name : l->l_proc->p_comm), l, device_xname(dev)); - dev->dv_detached = true; + dev->dv_detach_committed = true; cv_broadcast(&config_misc_cv); mutex_exit(&config_misc_lock); } @@ -2706,7 +2710,7 @@ device_lookup_acquire(cfdriver_t cd, int unit) retry: if (unit < 0 || unit >= cd->cd_ndevs || (dv = cd->cd_devs[unit]) == NULL || dv->dv_del_gen != 0 || - dv->dv_detached) { + dv->dv_detach_committed) { dv = NULL; } else { /* diff --git a/sys/sys/device_impl.h b/sys/sys/device_impl.h index b1d352ada311..7baf19f5f28d 100644 --- a/sys/sys/device_impl.h +++ b/sys/sys/device_impl.h @@ -147,7 +147,8 @@ struct device { struct lwp *dv_attaching; /* thread not yet finished in attach */ struct lwp *dv_detaching; /* detach lock (config_misc_lock/cv) */ - bool dv_detached; /* config_misc_lock */ + bool dv_detach_committed; /* config_misc_lock */ + bool dv_detach_done; /* dv_detaching */ size_t dv_activity_count; void (**dv_activity_handlers)(device_t, devactive_t); From f274717d0434d5dd879c433fea53c5b016d9c3ea Mon Sep 17 00:00:00 2001 From: Taylor R Campbell <riastradh@NetBSD.org> Date: Fri, 26 Aug 2022 10:35:08 +0000 Subject: [PATCH 2/2] autoconf(9): New functions for referenced attach/detach. New functions: - config_found_acquire(dev, aux, print, cfargs) - config_attach_acquire(parent, cf, aux, print, cfargs) - config_attach_pseudo_acquire(cf, aux) - config_detach_release(dev, flags) - device_acquire(dev) The config_*_acquire functions are like the non-acquire versions, but they return a referenced device_t, which is guaranteed to be safe to use until released. The device's detach function may run while it is referenced, but the device_t will not be freed and the parent's .ca_childdetached routine will not be called. => config_attach_pseudo_acquire additionally lets you pass an aux argument to the device's .ca_attach routine, unlike config_attach_pseudo which always passes NULL. => Eventually, config_found, config_attach, and config_attach_pseudo should be made to return void, because use of the device_t they return is fundamentally unsafe. config_detach_release is like device_release and then config_detach, but avoids the race inherent with that sequence. => Eventually, config_detach should be eliminated, because it's fundamentally MP-unsafe (and difficult to use safely even in a UP system or with the kernel lock). device_acquire acquires a reference to a device. It never fails. The caller is responsible for ensuring that the device_t cannot be freed. Typically this will be used in a read section (mutex, rwlock, pserialize, &c.) in a data structure lookup, with corresponding logic in the .ca_detach function to remove the device from the data structure and wait for all read sections to complete. --- sys/kern/subr_autoconf.c | 135 +++++++++++++++++++++++++++++++++++++-- sys/sys/device.h | 7 ++ 2 files changed, 136 insertions(+), 6 deletions(-) diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c index 3bc700ef9955..2f59be6eb429 100644 --- a/sys/kern/subr_autoconf.c +++ b/sys/kern/subr_autoconf.c @@ -1249,7 +1249,7 @@ static const char * const msgs[] = { * not configured, call the given `print' function and return NULL. */ device_t -config_found(device_t parent, void *aux, cfprint_t print, +config_found_acquire(device_t parent, void *aux, cfprint_t print, const struct cfargs * const cfargs) { cfdata_t cf; @@ -1276,6 +1276,29 @@ config_found(device_t parent, void *aux, cfprint_t print, return NULL; } +/* + * config_found(parent, aux, print, cfargs) + * + * Legacy entry point for callers whose use of the returned + * device_t is not delimited by device_release. This is + * inherently racy -- either they must ignore the return value, or + * they must be converted to config_found_acquire with a matching + * device_release. + */ +device_t +config_found(device_t parent, void *aux, cfprint_t print, + const struct cfargs * const cfargs) +{ + device_t dev; + + dev = config_found_acquire(parent, aux, print, cfargs); + if (dev == NULL) + return NULL; + device_release(dev); + + return dev; +} + /* * As above, but for root devices. */ @@ -1695,6 +1718,8 @@ config_add_attrib_dict(device_t dev) /* * Attach a found device. + * + * Returns the device referenced, to be released with device_release. */ static device_t config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, @@ -1765,6 +1790,12 @@ config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, */ config_pending_incr(dev); + /* + * Prevent concurrent detach from destroying the device_t until + * the caller has released the device. + */ + device_acquire(dev); + /* Call the driver's attach function. */ (*dev->dv_cfattach->ca_attach)(parent, dev, aux); @@ -1800,7 +1831,7 @@ config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, } device_t -config_attach(device_t parent, cfdata_t cf, void *aux, cfprint_t print, +config_attach_acquire(device_t parent, cfdata_t cf, void *aux, cfprint_t print, const struct cfargs *cfargs) { struct cfargs_internal store; @@ -1811,6 +1842,29 @@ config_attach(device_t parent, cfdata_t cf, void *aux, cfprint_t print, cfargs_canonicalize(cfargs, &store)); } +/* + * config_attach(parent, cf, aux, print, cfargs) + * + * Legacy entry point for callers whose use of the returned + * device_t is not delimited by device_release. This is + * inherently racy -- either they must ignore the return value, or + * they must be converted to config_attach_acquire with a matching + * device_release. + */ +device_t +config_attach(device_t parent, cfdata_t cf, void *aux, cfprint_t print, + const struct cfargs *cfargs) +{ + device_t dev; + + dev = config_attach_acquire(parent, cf, aux, print, cfargs); + if (dev == NULL) + return NULL; + device_release(dev); + + return dev; +} + /* * As above, but for pseudo-devices. Pseudo-devices attached in this * way are silently inserted into the device tree, and their children @@ -1821,7 +1875,7 @@ config_attach(device_t parent, cfdata_t cf, void *aux, cfprint_t print, * name by the attach routine. */ device_t -config_attach_pseudo(cfdata_t cf) +config_attach_pseudo_acquire(cfdata_t cf, void *aux) { device_t dev; @@ -1854,8 +1908,14 @@ config_attach_pseudo(cfdata_t cf) */ config_pending_incr(dev); + /* + * Prevent concurrent detach from destroying the device_t until + * the caller has released the device. + */ + device_acquire(dev); + /* Call the driver's attach function. */ - (*dev->dv_cfattach->ca_attach)(ROOT, dev, NULL); + (*dev->dv_cfattach->ca_attach)(ROOT, dev, aux); /* * Allow other threads to acquire references to the device now @@ -1879,6 +1939,28 @@ out: KERNEL_UNLOCK_ONE(NULL); return dev; } +/* + * config_attach_pseudo(cf) + * + * Legacy entry point for callers whose use of the returned + * device_t is not delimited by device_release. This is + * inherently racy -- either they must ignore the return value, or + * they must be converted to config_attach_pseudo_acquire with a + * matching device_release. + */ +device_t +config_attach_pseudo(cfdata_t cf) +{ + device_t dev; + + dev = config_attach_pseudo_acquire(cf, NULL); + if (dev == NULL) + return dev; + device_release(dev); + + return dev; +} + /* * Caller must hold alldevs_lock. */ @@ -1987,9 +2069,12 @@ config_detach_exit(device_t dev) * Note that this code wants to be run from a process context, so * that the detach can sleep to allow processes which have a device * open to run and unwind their stacks. + * + * Caller must hold a reference with device_acquire or + * device_lookup_acquire. */ int -config_detach(device_t dev, int flags) +config_detach_release(device_t dev, int flags) { struct alldevs_foray af; struct cftable *ct; @@ -2018,6 +2103,7 @@ config_detach(device_t dev, int flags) * attached. */ rv = config_detach_enter(dev); + device_release(dev); if (rv) { KERNEL_UNLOCK_ONE(NULL); return rv; @@ -2172,6 +2258,22 @@ out: return rv; } +/* + * config_detach(dev, flags) + * + * Legacy entry point for callers that have not acquired a + * reference to dev. This is inherently racy -- you must convert + * such callers to acquire a reference and then use + * config_detach_release instead. + */ +int +config_detach(device_t dev, int flags) +{ + + localcount_acquire(dev->dv_localcount); + return config_detach_release(dev, flags); +} + /* * config_detach_commit(dev) * @@ -2735,10 +2837,31 @@ retry: if (unit < 0 || unit >= cd->cd_ndevs || return dv; } +/* + * device_acquire: + * + * Acquire a reference to a device. It is the caller's + * responsibility to ensure that the device's .ca_detach routine + * cannot return before calling this. Caller must release the + * reference with device_release or config_detach_release. + */ +void +device_acquire(device_t dv) +{ + + /* + * No lock because the caller has promised that this can't + * change concurrently with device_acquire. + */ + KASSERTMSG(!dv->dv_detach_done, "%s", + dv == NULL ? "(null)" : device_xname(dv)); + localcount_acquire(dv->dv_localcount); +} + /* * device_release: * - * Release a reference to a device acquired with + * Release a reference to a device acquired with device_acquire or * device_lookup_acquire. */ void diff --git a/sys/sys/device.h b/sys/sys/device.h index 2109a354cf8b..671f6f8c6be3 100644 --- a/sys/sys/device.h +++ b/sys/sys/device.h @@ -554,14 +554,20 @@ device_t config_found(device_t, void *, cfprint_t, const struct cfargs *); device_t config_rootfound(const char *, void *); device_t config_attach(device_t, cfdata_t, void *, cfprint_t, const struct cfargs *); +device_t config_found_acquire(device_t, void *, cfprint_t, + const struct cfargs *); +device_t config_attach_acquire(device_t, cfdata_t, void *, cfprint_t, + const struct cfargs *); int config_match(device_t, cfdata_t, void *); int config_probe(device_t, cfdata_t, void *); bool ifattr_match(const char *, const char *); device_t config_attach_pseudo(cfdata_t); +device_t config_attach_pseudo_acquire(cfdata_t, void *); int config_detach(device_t, int); +int config_detach_release(device_t, int); int config_detach_children(device_t, int flags); void config_detach_commit(device_t); bool config_detach_all(int); @@ -588,6 +594,7 @@ device_t device_lookup(cfdriver_t, int); void *device_lookup_private(cfdriver_t, int); device_t device_lookup_acquire(cfdriver_t, int); +void device_acquire(device_t); void device_release(device_t); void device_register(device_t, void *);