From 17cb7c47df125dbc6a3f115ebf385d8db3e67c7e Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Tue, 18 Jan 2022 01:11:50 +0000 Subject: [PATCH] driver(9): New devsw d_cancel op to interrupt I/O before close. If specified, when revoking a device node or closing its last open node, specfs will: 1. Call d_cancel, which should return promptly without blocking. 2. Wait for all concurrent d_read/write/ioctl/&c. to drain. 3. Call d_close. Otherwise, specfs will: 1. Call d_close. 2. Wait for all concurrent d_read/write/ioctl/&c. to drain. This fallback is problematic because often parts of d_close rely on concurrent devsw operations to have completed already, so it is up to each driver to have its own mechanism for waiting, and the extra step in (2) is almost redundant. But it is still important to ensure that devsw operations are not active by the time a module tries to invoke devsw_detach, because only d_open is protected against that. The signature of d_cancel matches d_close, mostly so we don't raise questions about `why is this different?'; the lwp argument is not useful but we should remove it from open/cancel/close all at the same time. The only way d_cancel should fail, if it does at all, is with ENODEV, meaning the driver doesn't support cancelling outstanding I/O, and will take responsibility for that in d_close. I would make it return void and only have bdev_cancel and cdev_cancel possibly return ENODEV so specfs can detect whether a driver supports it, but this would break the pattern around devsw operation types. Drivers are allowed to omit it from struct bdevsw, struct cdevsw -- if so, it is as if they used a function that just returns ENODEV. --- sys/kern/subr_devsw.c | 36 ++++++++++++++++++++++++++++++++++ sys/miscfs/specfs/spec_vnops.c | 16 +++++++++++++++ sys/sys/conf.h | 5 +++++ 3 files changed, 57 insertions(+) diff --git a/sys/kern/subr_devsw.c b/sys/kern/subr_devsw.c index a06d30e061bf..2cde467ef67b 100644 --- a/sys/kern/subr_devsw.c +++ b/sys/kern/subr_devsw.c @@ -935,6 +935,24 @@ bdev_open(dev_t dev, int flag, int devtype, lwp_t *l) return rv; } +int +bdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l) +{ + const struct bdevsw *d; + int rv, mpflag; + + if ((d = bdevsw_lookup(dev)) == NULL) + return ENXIO; + if (d->d_cancel == NULL) + return ENODEV; + + DEV_LOCK(d); + rv = (*d->d_cancel)(dev, flag, devtype, l); + DEV_UNLOCK(d); + + return rv; +} + int bdev_close(dev_t dev, int flag, int devtype, lwp_t *l) { @@ -1130,6 +1148,24 @@ cdev_open(dev_t dev, int flag, int devtype, lwp_t *l) return rv; } +int +cdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l) +{ + const struct cdevsw *d; + int rv, mpflag; + + if ((d = cdevsw_lookup(dev)) == NULL) + return ENXIO; + if (d->d_cancel == NULL) + return ENODEV; + + DEV_LOCK(d); + rv = (*d->d_cancel)(dev, flag, devtype, l); + DEV_UNLOCK(d); + + return rv; +} + int cdev_close(dev_t dev, int flag, int devtype, lwp_t *l) { diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index 58e054bfe513..dfe22506e794 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -1642,6 +1642,22 @@ spec_close(void *v) if (!(flags & FNONBLOCK)) VOP_UNLOCK(vp); + /* + * If we can cancel all outstanding I/O, then wait for it to + * drain before we call .d_close. Drivers that split up + * .d_cancel and .d_close this way need not have any internal + * mechanism for waiting in .d_close for I/O to drain. + */ + if (vp->v_type == VBLK) + error = bdev_cancel(dev, flags, mode, curlwp); + else + error = cdev_cancel(dev, flags, mode, curlwp); + if (error == 0) + spec_io_drain(sd); + else + KASSERTMSG(error == ENODEV, "cancel dev=0x%lx failed with %d", + (unsigned long)dev, error); + if (vp->v_type == VBLK) error = bdev_close(dev, flags, mode, curlwp); else diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 210a237b8c43..23c65819ddcf 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -70,6 +70,7 @@ struct vnode; */ struct bdevsw { int (*d_open)(dev_t, int, int, struct lwp *); + int (*d_cancel)(dev_t, int, int, struct lwp *); int (*d_close)(dev_t, int, int, struct lwp *); void (*d_strategy)(struct buf *); int (*d_ioctl)(dev_t, u_long, void *, int, struct lwp *); @@ -86,6 +87,7 @@ struct bdevsw { */ struct cdevsw { int (*d_open)(dev_t, int, int, struct lwp *); + int (*d_cancel)(dev_t, int, int, struct lwp *); int (*d_close)(dev_t, int, int, struct lwp *); int (*d_read)(dev_t, struct uio *, int); int (*d_write)(dev_t, struct uio *, int); @@ -115,6 +117,7 @@ devmajor_t bdevsw_lookup_major(const struct bdevsw *); devmajor_t cdevsw_lookup_major(const struct cdevsw *); #define dev_type_open(n) int n (dev_t, int, int, struct lwp *) +#define dev_type_cancel(n) int n (dev_t, int, int, struct lwp *) #define dev_type_close(n) int n (dev_t, int, int, struct lwp *) #define dev_type_read(n) int n (dev_t, struct uio *, int) #define dev_type_write(n) int n (dev_t, struct uio *, int) @@ -165,6 +168,7 @@ paddr_t nommap(dev_t, off_t, int); /* device access wrappers. */ dev_type_open(bdev_open); +dev_type_cancel(bdev_cancel); dev_type_close(bdev_close); dev_type_strategy(bdev_strategy); dev_type_ioctl(bdev_ioctl); @@ -175,6 +179,7 @@ dev_type_discard(bdev_discard); void bdev_detached(dev_t); dev_type_open(cdev_open); +dev_type_cancel(cdev_cancel); dev_type_close(cdev_close); dev_type_read(cdev_read); dev_type_write(cdev_write);