From 3aca82d6045dcfe8d512026556f52230a3875ca6 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh@NetBSD.org>
Date: Sat, 29 Jan 2022 18:13:17 +0000
Subject: [PATCH 32/37] specfs: Let spec_node_lookup_by_dev wait for reclaim to
 finish.

vdevgone relies on this to ensure that if there is a concurrent
revoke in progress, it will wait for that revoke to finish -- that
way, it can guarantee all I/O operations have completed and the
device is closed.
---
 sys/coda/coda_vfsops.c         |  2 +-
 sys/kern/vfs_mount.c           |  3 ++-
 sys/kern/vfs_subr.c            |  9 +++++++--
 sys/kern/vfs_vnode.c           |  3 ++-
 sys/miscfs/specfs/spec_vnops.c | 21 +++++++++++++++++++--
 sys/miscfs/specfs/specdev.h    |  2 +-
 6 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/sys/coda/coda_vfsops.c b/sys/coda/coda_vfsops.c
index 9e25704b6b2f..c0a1b98a9331 100644
--- a/sys/coda/coda_vfsops.c
+++ b/sys/coda/coda_vfsops.c
@@ -636,7 +636,7 @@ struct mount *devtomp(dev_t dev)
     struct mount *mp;
     struct vnode *vp;
 
-    if (spec_node_lookup_by_dev(VBLK, dev, &vp) == 0) {
+    if (spec_node_lookup_by_dev(VBLK, dev, VDEAD_NOWAIT, &vp) == 0) {
 	mp = spec_node_getmountedfs(vp);
 	vrele(vp);
     } else {
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 2d4d6c28c292..58d3dff41d57 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -1372,7 +1372,8 @@ vfs_mountedon(vnode_t *vp)
 		return ENOTBLK;
 	if (spec_node_getmountedfs(vp) != NULL)
 		return EBUSY;
-	if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) {
+	if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq)
+	    == 0) {
 		if (spec_node_getmountedfs(vq) != NULL)
 			error = EBUSY;
 		vrele(vq);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 10cb4e57e129..a64cffb97054 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -495,7 +495,7 @@ int
 vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
 {
 
-	return (spec_node_lookup_by_dev(type, dev, vpp) == 0);
+	return (spec_node_lookup_by_dev(type, dev, VDEAD_NOWAIT, vpp) == 0);
 }
 
 /*
@@ -511,7 +511,12 @@ vdevgone(int maj, int minl, int minh, enum vtype type)
 
 	for (mn = minl; mn <= minh; mn++) {
 		dev = makedev(maj, mn);
-		while (spec_node_lookup_by_dev(type, dev, &vp) == 0) {
+		/*
+		 * Passing 0 as flags, instead of VDEAD_NOWAIT, means
+		 * spec_node_lookup_by_dev will wait for vnodes it
+		 * finds concurrently being revoked before returning.
+		 */
+		while (spec_node_lookup_by_dev(type, dev, 0, &vp) == 0) {
 			VOP_REVOKE(vp, REVOKEALL);
 			vrele(vp);
 		}
diff --git a/sys/kern/vfs_vnode.c b/sys/kern/vfs_vnode.c
index 48065852a6ac..58db8e0e97e7 100644
--- a/sys/kern/vfs_vnode.c
+++ b/sys/kern/vfs_vnode.c
@@ -1234,7 +1234,8 @@ vrevoke(vnode_t *vp)
 		type = vp->v_type;
 		mutex_exit(vp->v_interlock);
 
-		while (spec_node_lookup_by_dev(type, dev, &vq) == 0) {
+		while (spec_node_lookup_by_dev(type, dev, VDEAD_NOWAIT, &vq)
+		    == 0) {
 			mp = vrevoke_suspend_next(mp, vq->v_mount);
 			vgone(vq);
 		}
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index b4aea28ff948..c727a42de614 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -424,18 +424,35 @@ spec_node_init(vnode_t *vp, dev_t rdev)
  * Lookup a vnode by device number and return it referenced.
  */
 int
-spec_node_lookup_by_dev(enum vtype type, dev_t dev, vnode_t **vpp)
+spec_node_lookup_by_dev(enum vtype type, dev_t dev, int flags, vnode_t **vpp)
 {
 	int error;
 	vnode_t *vp;
 
-	mutex_enter(&device_lock);
+top:	mutex_enter(&device_lock);
 	for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 		if (type == vp->v_type && dev == vp->v_rdev) {
 			mutex_enter(vp->v_interlock);
 			/* If clean or being cleaned, then ignore it. */
 			if (vdead_check(vp, VDEAD_NOWAIT) == 0)
 				break;
+			if ((flags & VDEAD_NOWAIT) == 0) {
+				mutex_exit(&device_lock);
+				/*
+				 * It may be being revoked as we speak,
+				 * and the caller wants to wait until
+				 * all revocation has completed.  Let
+				 * vcache_vget wait for it to finish
+				 * dying; as a side effect, vcache_vget
+				 * releases vp->v_interlock.  Note that
+				 * vcache_vget cannot succeed at this
+				 * point because vdead_check already
+				 * failed.
+				 */
+				error = vcache_vget(vp);
+				KASSERT(error);
+				goto top;
+			}
 			mutex_exit(vp->v_interlock);
 		}
 	}
diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h
index 8b44cf4339f4..c095cf24d2d5 100644
--- a/sys/miscfs/specfs/specdev.h
+++ b/sys/miscfs/specfs/specdev.h
@@ -95,7 +95,7 @@ typedef struct specdev {
  */
 void	spec_node_init(vnode_t *, dev_t);
 void	spec_node_destroy(vnode_t *);
-int	spec_node_lookup_by_dev(enum vtype, dev_t, vnode_t **);
+int	spec_node_lookup_by_dev(enum vtype, dev_t, int, vnode_t **);
 int	spec_node_lookup_by_mount(struct mount *, vnode_t **);
 struct mount *spec_node_getmountedfs(vnode_t *);
 void	spec_node_setmountedfs(vnode_t *, struct mount *);