From 0a683110ce01e9ba2ac4cbe9577486d61fb90555 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 21 Jan 2022 12:09:14 +0000 Subject: [PATCH 25/37] specfs: Resolve a race between close and a failing reopen. --- sys/miscfs/specfs/spec_vnops.c | 71 +++++++++++++++++++++++++++++----- sys/miscfs/specfs/specdev.h | 1 + 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index 05b897dc5a13..b6e83ddefb15 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -258,6 +258,7 @@ spec_node_init(vnode_t *vp, dev_t rdev) sd->sd_refcnt = 1; sd->sd_opencnt = 0; sd->sd_bdevvp = NULL; + sd->sd_opened = false; sn->sn_dev = sd; sd = NULL; } else { @@ -519,6 +520,7 @@ spec_open(void *v) spec_ioctl_t ioctl; u_int gen; const char *name; + bool needclose = false; struct partinfo pi; l = curlwp; @@ -689,12 +691,9 @@ spec_open(void *v) * must fail with EBADF. * * Otherwise, if opening it failed, back out and release the - * open reference. - * - * XXX This is wrong -- we might release the last open - * reference here, but we don't close the device. If only this - * thread's call to open failed, that's fine, but we might - * have: + * open reference. If it was ever successfully opened and we + * got the last reference this way, it's now our job to close + * it. This might happen in the following scenario: * * Thread 1 Thread 2 * VOP_OPEN @@ -711,21 +710,54 @@ spec_open(void *v) * release vnode lock * acquire vnode lock * --sd_opencnt == 0 - * but no .d_close (***) + * + * We can't resolve this by making spec_close wait for .d_open + * to complete before examining sd_opencnt, because .d_open can + * hang indefinitely, e.g. for a tty. */ mutex_enter(&device_lock); if (sn->sn_gone) { if (error == 0) error = EBADF; - } else if (error != 0) { + } else if (error == 0) { + sd->sd_opened = true; + } else if (sd->sd_opencnt == 1 && sd->sd_opened) { + /* + * We're the last reference to a _previous_ open even + * though this one failed, so we have to close it. + * Don't decrement the reference count here -- + * spec_close will do that. + */ + KASSERT(sn->sn_opencnt == 1); + needclose = true; + } else { sd->sd_opencnt--; sn->sn_opencnt--; if (vp->v_type == VBLK) sd->sd_bdevvp = NULL; - } mutex_exit(&device_lock); + /* + * If this open failed, but the device was previously opened, + * and another thread concurrently closed the vnode while we + * were in the middle of reopening it, the other thread will + * see sd_opencnt > 0 and thus decide not to call .d_close -- + * it is now our responsibility to do so. + * + * XXX The flags passed to VOP_CLOSE here are wrong, but + * drivers can't rely on FREAD|FWRITE anyway -- e.g., consider + * a device opened by thread 0 with O_READ, then opened by + * thread 1 with O_WRITE, then closed by thread 0, and finally + * closed by thread 1; the last .d_close call will have FWRITE + * but not FREAD. We should just eliminate the FREAD/FWRITE + * parameter to .d_close altogether. + */ + if (needclose) { + KASSERT(error); + VOP_CLOSE(vp, FNONBLOCK, NOCRED); + } + /* If anything went wrong, we're done. */ if (error) return error; @@ -1342,6 +1374,25 @@ spec_close(void *v) * device. For block devices, the open reference count must be * 1 at this point. If the device's open reference count goes * to zero, we're the last one out so get the lights. + * + * We may find --sd->sd_opencnt gives zero, and yet + * sd->sd_opened is false. This happens if the vnode is + * revoked at the same time as it is being opened, which can + * happen when opening a tty blocks indefinitely. In that + * case, we still must call close -- it is the job of close to + * interrupt the open. Either way, the device will be no + * longer opened, so we have to clear sd->sd_opened; subsequent + * opens will have responsibility for issuing close. + * + * This has the side effect that the sequence of opens might + * happen out of order -- we might end up doing open, open, + * close, close, instead of open, close, open, close. This is + * unavoidable with the current devsw API, where open is + * allowed to block and close must be able to run concurrently + * to interrupt it. It is the driver's responsibility to + * ensure that close is idempotent so that this works. Drivers + * requiring per-open state and exact 1:1 correspondence + * between open and close can use fd_clone. */ mutex_enter(&device_lock); sn->sn_opencnt--; @@ -1351,6 +1402,8 @@ spec_close(void *v) count + 1); sd->sd_bdevvp = NULL; } + if (count == 0) + sd->sd_opened = false; mutex_exit(&device_lock); if (count != 0) diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h index 7e1dd84fe330..018679ffaef2 100644 --- a/sys/miscfs/specfs/specdev.h +++ b/sys/miscfs/specfs/specdev.h @@ -78,6 +78,7 @@ typedef struct specdev { u_int sd_opencnt; /* # of opens; close when ->0 */ u_int sd_refcnt; /* # of specnodes referencing this */ dev_t sd_rdev; + bool sd_opened; /* true if successfully opened */ } specdev_t; /*