/* * Vnode life cycle * * Vnodes exist in one of six states: * * - UNINITIALIZED * - ACTIVE * - REVOKING * - DEACTIVATING * - INACTIVE * - DESTROYING * * To get a file for a vnode on disk, a file system will do this: * * 1. Look it up in inode table; if there, try vget and return. * 2. If not there or if vget fails, getnewvnode and allocate data * structures needed by the file system. * 3. Look it up again in inode table; if there, try vget, * ungetnewvnode, free data structures, and return. * 4. If not there or if vget fails, initialize the new vnode, call * vnode_ready, and store it in the inode table. * * This yields an ACTIVE vnode and stores it in the inode table. * * When done, either vrele or vrele_async the vnode. If you held the * last reference (except for the inode table), then the vnode will * become DEACTIVATING, file system's VOP_INACTIVE will be called, and * finally either * * (a) the vnode will become INACTIVE, or * (b) the vnode will become DESTROYING, the file system's VOP_RECLAIM * will be called, and at last the vnode will be freed. * * If you have an ACTIVE vnode, you can revoke access to it with * vrevoke, usually called by VOP_REVOKE. This causes it to enter the * REVOKING state temporarily while the file system's VOP_RECLAIM is * called, and then it changes identity to an ACTIVE vnode in deadfs. * * When a vnode is revoked, while it is temporarily REVOKING or when it * is ACTIVE in deadfs, vn_lock fails. File systems should thus use * vn_lock to lock their own vnodes, and check for failure. * * Vnodes in the DEACTIVATING and DESTROYING states should be seen only * by VOP_INACTIVE and VOP_RECLAIM, respectively. * * The state is preserved by v_interlock. When a vnode is in the * REVOKING, DEACTIVATING, or DESTROYING state, one thread is * exclusively responsible for taking an action and transitioning the * vnode to another state, or freeing it in the case of DESTROYING. * State transitions from REVOKING and DEACTIVATING are signalled by * the condvar v_cv. No other state transitions are signalled. * * File system's responsibilities: * * - VOP_INACTIVE is called when a vnode is no longer referenced and is * in the DEACTIVATING state. It must tell the caller whether the * vnode is still useful, in which case it will become INACTIVE, or not * (e.g., because the last link on disk has been unlinked), in which * case it will be reclaimed. VOP_INACTIVE must release the vnode * lock. * * - VOP_RECLAIM is called to release resources associated with a vnode * it is either being destroyed (DESTROYING) or revoked (REVOKING). * VOP_RECLAIM must remove the vnode from all file system data * structures where inactive vnodes live, so that when it returns, the * vnode is no longer available for someone to try to vget it. * * - VOP_PUTPAGES and VOP_FSYNC may be given ACTIVE, REVOKING, or * DESTROYING vnodes. * * - VOP_BWRITE may also be given an ACTIVE, REVOKING, or DESTROYING * vnode, but the possibility of REVOKING or DESTROYING looks like an * artefact of an old version of the NFS code before NFS vnodes had * locks. (See the XXX in vinvalbuf from vfs_subr.c rev. 1.441.) * * - All other vops take ACTIVE vnodes. */ /* * Vnode allocation */ /* * vnalloc: Allocate a vnode. If mp is nonnull, this is a marker vnode * for it; otherwise, it is a normal vnode. Must be freed with vnfree. */ struct vnode * vnalloc(struct mount *mp) { static const struct vnode zero_vnode; struct vnode *vp; if (mp == NULL) /* not a marker */ vdrain_vnode_created(); vp = pool_cache_get(vnode_cache, PR_WAITOK); KASSERT(vp != NULL); *vp = zero_vnode; vp->v_state = VS_UNINITIALIZED; uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); cv_init(&vp->v_cv, "vnode"); LIST_INIT(&vp->v_nclist); LIST_INIT(&vp->v_dnclist); if (mp == NULL) { rw_init(&vp->v_lock); } else { vp->v_mount = mp; vp->v_type = VBAD; vp->v_iflag = VI_MARKER; } return vp; } /* * vnfree: Free a vnode allocated with vnalloc. * * - vp must be UNINITIALIZED or DESTROYING. */ void vnfree(struct vnode *vp) { bool marker; KASSERT((vp->v_state == VS_UNINITIALIZED) || (vp->v_state == VS_DESTROYING)); KASSERT(vp->v_usecount == 0); marker = ISSET(vp->v_iflag, VI_MARKER); if (marker) { KASSERT(vp->v_type == VBAD); } else { rw_destroy(&vp->v_lock); } KASSERT(LIST_EMPTY(&vp->v_dnclist)); KASSERT(LIST_EMPTY(&vp->v_nclist)); cv_destroy(&vp->v_cv); uvm_obj_destroy(&vp->v_uobj, true); /* Make use-after-free fail noisily. */ vp->v_state = VS_UNINITIALIZED; pool_cache_put(vnode_cache, vp); if (!marker) vdrain_vnode_destroyed(); } /* * Vnode creation */ /* * getnewvnode: Create a new vnode, which must be destroyed either with * ungetnewvnode or with vnode_ready and then vnode_destroy. */ int getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), kmutex_t *interlock, struct vnode *vpp) { struct vnode *vp = NULL; int error; if (mp != NULL) { error = vfs_busy(mp, NULL); if (error) return error; } vp = vnalloc(NULL); KASSERT(vp->v_mount == NULL); vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; vp->v_data = NULL; vp->v_writecount = 0; vp->v_holdcnt = 0; /* These should be set up by uvm_obj_init in vnalloc. */ KASSERT(vp->v_usecount == 0); KASSERT(vp->v_uobj.pgops == &uvm_vnodeops); KASSERT(vp->v_uobj.uo_npages == 0); KASSERT(TAILQ_FIRST(&vp->v_uobj.memq) == NULL); vp->v_size = vp->v_writesize = VSIZENOTSET; if (interlock) { mutex_obj_hold(interlock); uvm_obj_setlock(&vp->v_uobj, interlock); KASSERT(vp->v_interlock == interlock); } vfs_insmntque(vp, mp); if (mp != NULL) { if (ISSET(mp->mnt_iflag, IMNT_MPSAFE)) vp->v_vflag |= VV_MPSAFE; /* * XXX This should add a reference to mp, rather than * using a kludgey boolean flag in vfs_unbusy. */ vfs_unbusy(mp, true, NULL); } *vpp = vp; return 0; } /* * ungetnewvnode: Undo a getnewvnode before it is initialized. * * - vp must be UNINITIALIZED. */ void ungetnewvnode(struct vnode *vp) { KASSERT(vp->v_state == VS_UNINITIALIZED); KASSERT(vp->v_type == VNON); KASSERT(vp->v_data == NULL); KASSERT(vp->v_writecount == 0); KASSERT(vp->v_holdcnt == 0); KASSERT(vp->v_usecount == 0); vfs_insmntque(vp, NULL); vnfree(vp); } /* * vnode_ready: Make a vnode from getnewvnode ACTIVE. * * - vp must be UNINITIALIZED. */ void vnode_ready(struct vnode *vp) { /* * We ought to have the sole reference to vp here, because it * is newly created, but getnewvnode has put it on the file * systems's vnode list, so some nefarious party trying to walk * that list may get at it first. */ mutex_enter(vp->v_interlock); KASSERT(vp->v_state == VS_UNINITIALIZED); KASSERT(vp->v_writecount == 0); KASSERT(vp->v_holdcnt == 0); KASSERT(vp->v_usecount == 0); vp->v_usecount = 1; vp->v_state = VS_ACTIVE; mutex_exit(vp->v_interlock); } /* * vnode_ready_p: Return true if someone has called vnode_ready(vp), * false if not. To be used when walking struct mount::mp_vnodelist to * skip entries still being initialized. * * - vp->v_interlock must be held. */ bool vnode_ready_p(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); return (vp->v_state != VS_UNINITIALIZED); } /* * Activating vnodes */ /* * vget: Try to activate a vnode. If already ACTIVE, good. If * INACTIVE, make it ACTIVE. If DEACTIVATING, wait for it to finish * and retry, or return EBUSY if we can't wait. Otherwise, fail. * * - vp may be in any state except UNINITIALIZED. * - vp->v_interlock must be held. */ int vget(struct vnode *vp, int flags) { int error; KASSERT(mutex_owned(vp->v_interlock)); KASSERT(!ISSET(vp->v_iflag, VI_MARKER)); KASSERT(!ISSET(flags, ~VGET_WAITOK)); retry: switch (vp->v_state) { case VS_UNINITIALIZED: vnpanic(vp, "vget uninitialized vnode"); case VS_ACTIVE: { unsigned int uc; do { uc = vp->v_usecount; KASSERT(0 < c); if (uc == UINT_MAX) return EBUSY; } while (atomic_cas_uint(&vp->v_usecount, uc, (uc + 1)) != uc); return 0; } case VS_DEACTIVATING: if (ISSET(flags, VGET_WAITOK)) { do cv_wait(&vp->v_cv, vp->v_interlock); while (vp->v_state == VS_DEACTIVATING); goto retry; } return EBUSY; case VS_INACTIVE: KASSERT(vp->v_usecount == 0); vnode_remove_from_freelist(vp); vp->v_usecount = 1; vp->v_state = VS_ACTIVE; return 0; case VS_REVOKING: case VS_DESTROYING: return ENOENT; default: vnpanic(vp, "vnode in invalid state: %d", (int)vp->v_state); } } /* * Use counts */ /* * vref: Bump vp's use count. * * - vp must be ACTIVE or REVOKING. * (Otherwise use vget to get a reference.) */ void vref(struct vnode *vp) { unsigned int usecount; #if DIAGNOSTIC { mutex_enter(vp->v_interlock); KASSERT((vp->v_state == VS_ACTIVE) || (vp->v_state == VS_REVOKING)); KASSERT(0 < vp->v_usecount); mutex_exit(vp->v_interlock); } #endif usecount = atomic_inc_uint_nv(&vp->v_usecount); if (__predict_true(2 <= usecount)) return; if (usecount == 0) vnpanic(vp, "vnode reference count overflow"); if (usecount == 1) vnpanic(vp, "vref of unreferenced vnode"); } /* * vrele: Drop vp's use count. If it drops to zero, call VOP_INACTIVE * and maybe VOP_RECLAIM. May take and drop vp->v_interlock. * * Because this may call VOP_INACTIVE or VOP_RECLAIM synchronously, it * may take vp's vnode lock, so caller must not hold any locks that are * out of order with vp's vnode lock. If this is an issue, or if * calling VOP_INACTIVE or VOP_RECLAIM is otherwise an issue, use * vrele_async instead. * * - vp must be ACTIVE or REVOKING. */ void vrele(struct vnode *vp) { vrele_with(vp, &vnode_deactivate); } /* * vrele_async: Drop vp's use count. If it drops to zero, schedule an * asynchronous call to VOP_INACTIVE and maybe VOP_RECLAIM. May take * and drop vp->v_interlock. * * - vp must be ACTIVE or REVOKING. */ void vrele_async(struct vnode *vp) { vrele_with(vp, &vnode_deactivate_async); } static inline void vrele_with(struct vnode *vp, void (*deactivate)(struct vnode *vp)) { if (__predict_true(atomic_dec_uint_lock_if_zero(&vp->v_usecount, vp->v_interlock))) return; KASSERT(mutex_owned(vp->v_interlock)); KASSERT((vp->v_state == VS_ACTIVE) || (vp->v_state == VS_REVOKING)); KASSERT(vp->v_usecount == 0); vp->v_state = VS_DEACTIVATING; (*deactivate)(vp); mutex_exit(vp->v_interlock); } static void vnode_deactivate(struct vnode *vp) { bool destroy; KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_DEACTIVATING); mutex_exit(vp->v_interlock); VOP_LOCK(vp, LK_EXCLUSIVE); /* XXX This is silly. */ VOP_INACTIVE(vp, &destroy); mutex_enter(vp->v_interlock); KASSERT(vp->v_state == VS_DEACTIVATING); vp->v_state = VP_INACTIVE; cv_broadcast(&vp->v_cv); if (destroy) vnode_destroy(vp); else vnode_add_to_freelist(vp); } static void vnode_deactivate_async(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_DEACTIVATING); /* Pretend it's active until the thread can get to it. */ vp->v_state = VS_ACTIVE; vp->v_usecount = 1; cv_broadcast(&vp->v_cv); mutex_enter(&vrele_lock); TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); /* XXX Why delay? */ /* XXX Use a per-CPU workqueue instead? */ if ((vrele_pending == UINT_MAX) || (++vrele_pending > (desiredvnodes >> 8))) cv_signal(&vrele_cv); mutex_exit(&vrele_lock); } static void vrele_thread(void *arg __unused) { struct vnode *vp; for (;;) { mutex_enter(&vrele_lock); while (TAILQ_EMPTY(&vrele_list)) cv_wait(&vrele_cv, &vrele_lock); vp = TAILQ_FIRST(&vrele_list); TAILQ_REMOVE(&vrele_list, vp, v_freelist); /* May be zero if we overflowed, but that's OK. */ if (vrele_pending) vrele_pending--; mutex_exit(&vrele_lock); vrele(vp); } } /* * Vnode lock */ /* * vn_lock: Lock vp with VOP_LOCK. Fail and leave it unlocked if it * has been revoked. Return zero on success, error code on failure. * * - vp must be ACTIVE or REVOKING. */ int vn_lock(struct vnode *vp, int flags) { int error = 0; VOP_LOCK(vp, flags); mutex_enter(vp->v_interlock); if (__predict_false(vp->v_state == VS_REVOKING) || (vp->v_op == dead_vnodeop_p)) error = ENOENT; else KASSERT(vp->v_state == VS_ACTIVE); mutex_exit(vp->v_interlock); if (error) VOP_UNLOCK(vp); return error; } /* * vn_lock_deadok: Lock vp with VOP_LOCK. If it is being revoked, wait * until it is revoked and then lock it as a deadfs node with VOP_LOCK. * * - vp must be ACTIVE or REVOKING. */ void vn_lock_deadok(struct vnode *vp, int flags) { retry: VOP_LOCK(vp, flags); mutex_enter(vp->v_interlock); if (__predict_false(vp->v_state == VS_REVOKING)) { VOP_UNLOCK(vp); do cv_wait(&vp->v_cv, vp->v_interlock); while (vp->v_state == VS_REVOKING); goto retry; } else { KASSERT(vp->v_state == VS_ACTIVE); } mutex_exit(vp->v_interlock); } /* * vn_unlock: Unlock a vnode that was locked with vn_lock or * vn_lock_deadok. * * - vp's vnode lock must be held. * - vp must be ACTIVE or REVOKING. */ void vn_unlock(struct vnode *vp) { VOP_UNLOCK(vp); } /* * Revocation and reclamation */ /* * vrevoke: Turn vp into a dead vnode, to implement VOP_REVOKE. Will * take and drop vp->v_interlock. * * - vp must be ACTIVE. * * NOTE: If you hold the vnode lock on entry to this, either you must * use the same VOP_UNLOCK as deadfs afterward or you must arrange some * other way to have exclusive access. (XXXRACE Currently genfs_revoke * does NOT guarantee exclusive access!) * * XXX Caller currently can't hold vnode lock because vnode_reclaim * takes it. We could do unlock/reclaim/lock if we made all callers do * lock/vrevoke/unlock, though. * * XXX The old vrevoke would replace v_op by spec_vnodeop_p in some * cases. I have not yet disentangled what those cases are. Help? */ void vrevoke(struct vnode *vp) { #if notyet /* XXX */ KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); #endif mutex_enter(vp->v_interlock); KASSERT(vp->v_state == VS_ACTIVE); KASSERT(0 < vp->v_usecount); vp->v_state = VS_REVOKING; mutex_exit(vp->v_interlock); #if notyet /* XXX */ VOP_UNLOCK(vp); #endif vnode_reclaim(vp); #if notyet /* XXX */ VOP_LOCK(vp, LK_EXCLUSIVE); #endif mutex_enter(vp->v_interlock); KASSERT(vp->v_state == VS_REVOKING); vp->v_state = VS_ACTIVE; /* * XXX Pointer writes must be atomic; otherwise concurrent * VOP_* trying to use v_op will fail horribly. */ vp->v_op = dead_vnodeop_p; /* XXX What about specnodes? */ vp->v_tag = VT_NON; KNOTE(&vp->v_klist, NOTE_REVOKE); cv_broadcast(&vp->v_cv); KASSERT(!ISSET(vp->v_iflag, VI_ONWORKLST)); mutex_exit(vp->v_interlock); } /* * vrecycle: If vp is inactive, then take vp->v_interlock, unlock * interlock if nonnull, destroy vp, and return true. Otherwise, * return false. * * XXX Should this wait for vnodes in changing states? Old vrecycle * didn't, but it's not clear whether that's a bug or that's what the * callers actually want. */ int vrecycle(struct vnode *vp, kmutex_t *interlock) { mutex_enter(vp->v_interlock); KASSERT(!ISSET(vp->v_iflag, VI_MARKER)); #if notyet /* XXX */ while ((vp->v_state == VS_REVOKING) || (vp->v_state == VS_DEACTIVATING)) cv_wait(&vp->v_cv, vp->v_interlock); #endif if (vp->v_state != VS_INACTIVE) { mutex_exit(vp->v_interlock); return 0; } if (interlock) mutex_exit(interlock); vnode_remove_from_freelist(vp); vnode_destroy(vp); return 1; } /* * vnode_destroy: Do bookkeeping, reclaim vp, and free vp. Will drop * vp->v_interlock and will take and drop the vnode lock. * * - vp must be INACTIVE. * - vp->v_interlock must be held. * * VOP_RECLAIM guarantees we have the sole reference to vp when * vnode_reclaim returns, even after we drop vp->v_interlock. */ static void vnode_destroy(struct vnode *vp) { int error __diagused; KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_INACTIVE); vp->v_state = VS_DESTROYING; /* XXX Begin crud cargo-culted from old vclean. */ if (ISSET(vp->v_iflag, VI_EXECMAP)) { atomic_add_int(&uvmexp.execpages, -vp.v_uobj.uo_npages); atomic_add_int(&uvmexp.filepages, vp.v_uobj.uo_npages); } /* XXX Old vrelel cleared VI_WRMAP; old vclean didn't. Hmm. */ vp->v_iflag &= ~(VI_TEXT | VI_EXECMAP | VI_WRMAP); /* XXX End crud cargo-culted from old vclean. */ mutex_exit(vp->v_interlock); /* Nuke the file system's data structures. */ vnode_reclaim(vp); /* * XXX How do we ensure nobody grabs onto a reference from * mnt_vnodelist before we've taken it off here? Can we do * this before vnode_reclaim? */ vfs_insmntque(vp, NULL); vnfree(vp); } /* * vnode_reclaim: Flush buffers associated with vp, call VOP_RECLAIM, * and purge caches related to vp in preparation for revoking or * destroying it. * * - vp must be DESTROYING or REVOKING. * * XXX It would be nice if it were not necessary to take the vnode lock * here, because then we could do reclamation synchronously in * getnewvnode. */ static void vnode_reclaim(struct vnode *vp) { #if DIAGNOSTIC { mutex_enter(vp->v_interlock); KASSERT((vp->v_state == VS_DESTROYING) || (vp->v_state == REVOKING)); mutex_exit(vp->v_interlock); } #endif /* XXX Begin crud cargo-culted from old vclean. */ VOP_LOCK(vp, LK_EXCLUSIVE); /* XXX XXX Can vinvalbuf reactivate vp? That would suck. */ error = vinvalbuf(vp, V_SAVE, NOCRED, 1, 0, 0); if (error) { if (wapbl_vphaswapbl(vp)) WAPBL_DISCARD(wapbl_vptomp(vp)); error = vinvalbuf(vp, 0, NOCRED, 1, 0, 0); KASSERT(error == 0); } KASSERT(!ISSET(vp->v_iflag, VI_ONWORKLST)); VOP_UNLOCK(vp, LK_EXCLUSIVE); /* XXX End crud cargo-culted from old vclean. */ VOP_RECLAIM(vp); KASSERT(vp->v_data == NULL); KASSERT(vp->v_uobj.uo_npages == 0); /* XXX Begin crud cargo-culted from old vclean. */ if ((vp->v_type == VREG) && (vp->v_ractx != NULL)) { uvm_ra_freectx(vp->v_ractx); vp->v_ractx = NULL; } cache_purge(vp); /* XXX End crud cargo-culted from old vclean. */ } /* * Hold counts. When there are buffers in the kernel (buffer cache or * uvm) for a vnode, we would prefer to destroy that vnode later. The * hold count records how many such buffers there are. */ /* * vholdl: Bump vp's hold count. * * - vp must be INACTIVE, ACTIVE, or REVOKING. * - vp->v_interlock must be held. * - vp must not be a marker vnode. */ void vholdl(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT(!ISSET(vp->v_iflag, VI_MARKER)); KASSERT((vp->v_state == VS_INACTIVE) || (vp->v_state == VS_ACTIVE) || (vp->v_state == VS_REVOKING)); if (vp->v_holdcnt == UINT_MAX) vnpanic(vp, "vnode hold count overflow"); vp->v_holdcnt++; if ((vp->v_state == VS_INACTIVE) && (vp->v_holdcnt == 1)) { /* * If it was inactive and unheld, it is on the free * list and should now be on the hold list, so move it. */ mutex_enter(&vnode_free_list_lock); KASSERT(vp->v_freelisthd == &vnode_free_list); TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); vp->v_freelisthd = &vnode_hold_list; TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); mutex_exit(&vnode_free_list_lock); } } /* * holdrelel: Drop vp's hold count. * * - vp must be INACTIVE, ACTIVE, or REVOKING. * - vp->v_interlock must be held. * - vp must not be a marker vnode. */ void holdrelel(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT((vp->v_state == VS_INACTIVE) || (vp->v_state == VS_ACTIVE) || (vp->v_state == VS_REVOKING)); KASSERT(!ISSET(vp->v_iflag, VI_MARKER)); KASSERT(0 < vp->v_holdcnt); vp->v_holdcnt--; if ((vp->v_state == VS_INACTIVE) && (vp->v_holdcnt == 0)) { /* * If it was inactive and this was the last hold, it * was on the hold list and should now be on the free * list, so move it. */ mutex_enter(&vnode_free_list_lock); KASSERT(vp->v_freelisthd == &vnode_hold_list); TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); vp->v_freelisthd = &vnode_free_list; TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); mutex_exit(&vnode_free_list_lock); } } /* * Freelists. Vnodes that are not actively being used stay cached in * case someone wants them soon, but get queued up to be destroyed when * the number of vnodes in the system gets too high. Vnodes not used * by buffers in the kernel are on the /free list/, and get destroyed * first; vnodes used by buffers in the kernel are on the /hold list/, * and get destroyed after everything in the free list. * * Reclamation happens asynchronously, in the vdrain thread. Each file * system's VOP_RECLAIM cannot allocate or otherwise wait for resources * that allocating vnodes in any file system may require. (Yikes!) */ /* * vnode_add_to_freelist: Add vp to the free list or hold list as * appropriate. * * - vp must be INACTIVE. * - vp->v_interlock must be held. * - vp must not be a marker vnode. */ static void vnode_add_to_freelist(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_INACTIVE); KASSERT(!ISSET(vp->v_iflag, VI_MARKER)); mutex_enter(&vnode_free_list_lock); vp->v_freelisthd = (0 == vp->v_holdcnt? &vnode_free_list : &vnode_hold_list); TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); mutex_exit(&vnode_free_list_lock); } /* * vnode_remove_from_freelist: Remove vp from whichever freelist it is * on. * * - vp must be INACTIVE. * - vp->v_interlock must be held. * - vp must not be a marker vnode. */ static void vnode_remove_from_freelist(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_INACTIVE); KASSERT(!ISSET(vp->v_iflag, VI_MARKER)); mutex_enter(&vnode_free_list_lock); KASSERT(vp->v_freelisthd == (0 == vp->v_holdcnt? &vnode_free_list : &vnode_hold_list)); TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); vp->v_freelisthd = NULL; mutex_exit(&vnode_free_list_lock); } static void vdrain_vnode_created(struct vnode *vp) { mutex_enter(&vnode_free_list_lock); if (numvnodes == UINT_MAX) vnpanic(vp, "too many vnodes"); numvnodes++; if ((desiredvnodes + (desiredvnodes/10)) < numvnodes) cv_signal(&vdrain_cv); mutex_exit(&vnode_free_list_lock); } static void vdrain_vnode_destroyed(struct vnode *vp) { mutex_enter(&vnode_free_list_lock); numvnodes--; mutex_exit(&vnode_free_list_lock); } static void vdrain_thread(void *arg __unused) { for (;;) { mutex_enter(&vnode_free_list_lock); while (numvnodes < desiredvnodes) cv_wait(&vdrain_cv, &vnode_free_list_lock); if (vdrain_1() == EBUSY) kpause("vdrain", false, hz, NULL); /* vdrain_1 drops vnode_free_list_lock for us. */ } } static void vdrain_1(void) { static struct vnodelst *freelists[] = { &vnode_free_list, &vnode_hold_list, }; size_t i; struct vnode *vp; struct mount *mp; int error = ENOENT; KASSERT(mutex_owned(&vnode_free_list_lock)); for (i = 0; i < __arraycount(freelists); i++) { if (TAILQ_EMPTY(freelists[i])) continue; TAILQ_FOREACH(vp, freelists[i], v_freelist) { /* * XXX Lock order reversal! We can get rid of * this by removing vp from the queue before * taking its interlock and putting it back on * the queue if the fstrans can't start. * However, that also requires changing * everything else that manages vnodes on the * freelists to handle the case that vdrain may * have taken the vnode off the freelist and * may be about to put it back on. That is * more trouble than it is worth to avoid a * single speculative grab of this vnode. */ if (!mutex_tryenter(vp->v_interlock)) { error = EBUSY; continue; } KASSERT(vp->v_state == VS_INACTIVE); KASSERT(vp->v_usecount == 0); KASSERT(vp->v_freelisthd == freelists[i]); mp = vp->v_mount; if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { mutex_exit(vp->v_interlock); error = EBUSY; continue; } goto found; } } mutex_exit(&vnode_free_list_lock); return error; found: TAILQ_REMOVE(freelists[i], vp, v_freelist); vp->v_freelisthd = NULL; mutex_exit(&vnode_free_list_lock); vnode_destroy(vp); fstrans_done(mp); return 0; }