/* $NetBSD$ */ /* * XXX WARNING WARNING WARNING XXX * * This code is not tested! It is a draft of an idea. * * This is a sketch of adapting fstrans(9) to cache transaction records * CPU-locally and to add a state for unmounted file systems in which * fstrans_start immediately fails, with the vague intent of replacing * vfs_busy/vfs_unbusy by fstrans_start/fstrans_done and using fstrans * to implement forced unmount. * * This draft uses a pool cache per mount point to manage caching of * transaction records. */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Taylor R. Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __KERNEL_RCSID(0, "$NetBSD$"); #define _FSTRANS_API_PRIVATE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Data structures */ struct fstrans_mount_info; struct fscow_handler; struct fstrans_lwp_info; struct fstrans_txn; /* * struct fstrans_mount_info: Per-mount state. Access to all fields is * serialized with fstrans.lock, except fmi_cow_handlers, which is read * under fscow_enter/exit and changed under fscow_change_enter/exit. */ struct fstrans_mount_info { enum fstrans_state fmi_state; LIST_HEAD(, fstrans_txn) fmi_txns; bool fmi_cow_changing; LIST_HEAD(, fscow_handler) fmi_cow_handlers; pool_cache_t fmi_txn_pc; }; /* * struct fscow_handler: A copy-on-write handler record. */ struct fscow_handler { LIST_ENTRY(fscow_handler) ch_list; int (*ch_func)(void *, struct buf *, bool); void *ch_arg; }; /* * struct fstrans_lwp_info: Per-LWP state. Contains a stack of active * transaction records. We wrap this up in a separate object rather * than simply using lwp_getspecific and lwp_setspecific to manage the * stack because lwp_setspecific requires taking a mutex, but we want * to avoid interprocessor synchronization. */ struct fstrans_lwp_info { struct fstrans_txn *fli_stack; }; /* * struct fstrans_txn: Record for a transaction by a particular LWP on * a particular mount (or a nested set of such transactions). Each LWP * has a stack of these that are active, each CPU has a cache of these * that are inactive, and each mount point has a list of these so that * it can find which ones are active and which ones are inactive. * * : stable * f access under fstrans.lock * c access by {curcpu, curlwp} only, if {inactive, active} * l write only by curlwp under pserialize_read_enter/exit, read by anyone */ struct fstrans_txn { struct mount *txn_mount; /* : */ LIST_ENTRY(fstrans_txn) txn_mnt_list; /* f */ struct fstrans_txn *txn_next; /* c */ unsigned int txn_depth; /* l */ unsigned int txn_cow_depth; /* l */ enum fstrans_lock txn_lock; /* l */ }; /* Forward declarations */ static int fstrans_grant_lock(enum fstrans_state, enum fstrans_lock); static int _fstrans_start(struct mount *, enum fstrans_lock, int, struct fstrans_txn **); static void _fstrans_done(struct mount *, struct fstrans_txn *); static bool fstrans_invalid_active_transactions_p(struct mount *); static void fscow_change_enter(struct mount *); static void fscow_change_exit(struct mount *); static bool fscow_running_p(struct mount *); static int fscow_enter(struct mount *, struct fstrans_txn **); static void fscow_exit(struct mount *, struct fstrans_txn *); static struct fstrans_txn * fstrans_txn_lwp_find(struct mount *); static int fstrans_txn_lwp_push(struct fstrans_txn *, int); static void fstrans_txn_lwp_pop(struct fstrans_txn *); static void fstrans_txn_lwp_dtor(void *); static int fstrans_txn_ctor(void *, void *, int); static void fstrans_txn_dtor(void *, void *); /* Global state */ /* Key for per-LWP transaction stack. */ static specificdata_key_t fstrans_txn_perlwp __read_mostly; /* Pool cache for struct fstrans_mount_info. */ static pool_cache_t fstrans_mount_pc __read_mostly; /* Pool cache for struct fstrans_lwp_info. */ static pool_cache_t fstrans_lwp_pc __read_mostly; /* Pool cache for struct fscow_handler. */ static pool_cache_t fscow_handler_pc __read_mostly; /* Global transaction state. (Could be per-mount instead.) */ struct { /* fstrans.lock: Serializes access to various state. */ kmutex_t lock; /* * fstrans.state_cv: Notifies changes of file system state and * cow-changing flag. */ kcondvar_t state_cv; /* * fstrans.done_cv: Notifies transaction completion, including * fstrans_done and fscow_exit. */ kcondvar_t done_cv; /* * fstrans.psz: Waits for changes of file system state and * cow-changing flag to be propagated. */ pserialize_t psz; } fstrans __cacheline_aligned; /* Transaction lock matrix */ static int fstrans_grant_lock(enum fstrans_state state, enum fstrans_lock lock) { if (__predict_true(state == FSTRANS_STATE_NORMAL)) return 0; switch (state) { case FSTRANS_STATE_NORMAL: switch (lock) { case FSTRANS_LOCK_LAZY: return 0; case FSTRANS_LOCK_SHARED: return 0; case FSTRANS_LOCK_EXCL: return 0; default: panic("bad fstrans lock: %d", (int)lock); } case FSTRANS_STATE_SUSPENDING: switch (lock) { case FSTRANS_LOCK_LAZY: return 0; case FSTRANS_LOCK_SHARED: return EBUSY; case FSTRANS_LOCK_EXCL: return 0; default: panic("bad fstrans lock: %d", (int)lock); } case FSTRANS_STATE_SUSPENDED: switch (lock) { case FSTRANS_LOCK_LAZY: return EBUSY; case FSTRANS_LOCK_SHARED: return EBUSY; case FSTRANS_LOCK_EXCL: return 0; default: panic("bad fstrans lock: %d", (int)lock); } case FSTRANS_STATE_UNMOUNTED: switch (lock) { case FSTRANS_LOCK_LAZY: return EIO; case FSTRANS_LOCK_SHARED: return EIO; case FSTRANS_LOCK_EXCL: return EIO; default: panic("bad fstrans lock: %d", (int)lock); } default: panic("bad fstrans state: %d", (int)state); } } /* Initialization */ void fstrans_init(void) { int error; ASSERT_SLEEPABLE(); error = lwp_specific_key_create(&fstrans_txn_perlwp, &fstrans_txn_lwp_dtor); if (error) panic("unable to create lwp-specific fstrans key: %d", error); fstrans_mount_pc = pool_cache_init(sizeof(struct fstrans_mount_info), 0, 0, 0, "fstrans_mount_info", NULL, IPL_NONE, NULL, NULL, NULL); fstrans_lwp_pc = pool_cache_init(sizeof(struct fstrans_lwp_info), 0, 0, 0, "fstrans_lwp_info", NULL, IPL_NONE, NULL, NULL, NULL); fscow_handler_pc = pool_cache_init(sizeof(struct fscow_handler), 0, 0, 0, "fscow_handler", NULL, IPL_NONE, NULL, NULL, NULL); mutex_init(&fstrans.lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&fstrans.state_cv, "fstxstat"); cv_init(&fstrans.done_cv, "fstxdone"); fstrans.psz = pserialize_create(); } /* Per-mount setup and teardown */ int fstrans_mount(struct mount *mp) { struct fstrans_mount_info *fmi; int error; ASSERT_SLEEPABLE(); /* Make sure the mount point doesn't go away. */ error = vfs_busy(mp, NULL); if (error) return error; /* Allocate per-mount transaction state. */ fmi = pool_cache_get(fstrans_mount_pc, PR_WAITOK); fmi->fmi_state = FSTRANS_STATE_NORMAL; LIST_INIT(&fmi->fmi_txns); fmi->fmi_cow_changing = false; LIST_INIT(&fmi->fmi_cow_handlers); fmi->fmi_txn_pc = pool_cache_init(sizeof(struct fstrans_txn), 0, 0, 0, "fstrans_txn", NULL, IPL_NONE, &fstrans_txn_ctor, &fstrans_txn_dtor, mp); /* Hook it up. */ mp->mnt_transinfo = fmi; mp->mnt_iflag |= IMNT_HAS_TRANS; /* Unbusy the mount point but keep a reference. */ vfs_unbusy(mp, true /*keepref*/, NULL); return 0; } void fstrans_unmount(struct mount *mp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; /* We may sleep to execute a cross-call. */ ASSERT_SLEEPABLE(); KASSERT(fmi != NULL); KASSERT(fmi->fmi_state == FSTRANS_STATE_UNMOUNTED); KASSERT(LIST_EMPTY(&fmi->fmi_cow_handlers)); /* Destroy the pool cache of transactions. */ pool_cache_destroy(fmi->fmi_txn_pc); /* There had better be none left now. */ KASSERT(LIST_EMPTY(&fmi->fmi_txns)); /* Unhook the per-mount transaction state and drop our reference. */ mp->mnt_iflag &= ~IMNT_HAS_TRANS; mp->mnt_transinfo = NULL; vfs_destroy(mp); /* Free the per-mount transaction state. */ pool_cache_put(fstrans_mount_pc, fmi); } /* Transactions */ bool fstrans_in_transaction(struct mount *mp) { return fstrans_txn_lwp_find(mp) != NULL; } static int _fstrans_start(struct mount *mp, enum fstrans_lock lock, int flags, struct fstrans_txn **txnp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; struct fstrans_txn *txn; const int pool_flags = (ISSET(flags, FSTRANS_FLAG_NOWAIT)? PR_NOWAIT : PR_WAITOK); int s, error; KASSERT(fmi != NULL); /* Look for an existing transaction record for this mount point. */ txn = fstrans_txn_lwp_find(mp); if (txn) { /* If we're nested, just increase nesting depth. */ KASSERT(0 < txn->txn_depth); KASSERT(lock != FSTRANS_LOCK_EXCL); if (txn->txn_depth == UINT_MAX) { error = EBUSY; /* XXX What error code? */ goto fail0; } txn->txn_depth++; goto win0; } /* Get a transaction record for this mount point. */ txn = pool_cache_get(fmi->fmi_txn_pc, pool_flags); if (txn == NULL) { /* * XXX Can't distinguish here between transient memory * allocation failure and permanent unmounting failure. */ error = EBUSY; goto fail0; } /* Check whether a transaction is allowed to begin. */ s = pserialize_read_enter(); error = fstrans_grant_lock(fmi->fmi_state, lock); if (__predict_true(error == 0)) { txn->txn_depth = 1; txn->txn_lock = lock; pserialize_read_exit(s); goto win1; } pserialize_read_exit(s); /* * Transaction not allowed. If we must wait for suspension but * the caller is not allowed to wait, or if the file system has * been unmounted, fail now. */ KASSERT(error != 0); if ((error != EBUSY) || ISSET(flags, FSTRANS_FLAG_NOWAIT)) goto fail1; /* * Wait until either the lock is granted, the file system has * been unmounted, or we have been interrupted. */ mutex_enter(&fstrans.lock); while ((error = fstrans_grant_lock(fmi->fmi_state, lock)) == EBUSY) { if (ISSET(flags, FSTRANS_FLAG_INTR)) { error = cv_wait_sig(&fstrans.state_cv, &fstrans.lock); if (error) break; } else { cv_wait(&fstrans.state_cv, &fstrans.lock); } } if (__predict_true(error == 0)) { txn->txn_depth = 1; txn->txn_lock = lock; mutex_exit(&fstrans.lock); goto win1; } mutex_exit(&fstrans.lock); fail1: /* Free new transaction on failure. */ pool_cache_put(fmi->fmi_txn_pc, txn); fail0: KASSERT(error); return error; win1: /* Push new transaction on success. */ KASSERT(txn != NULL); error = fstrans_txn_lwp_push(txn, flags); if (error) goto fail1; win0: KASSERT(txn != NULL); if (txnp) *txnp = txn; return 0; } static void _fstrans_done(struct mount *mp, struct fstrans_txn *txn) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; int s; KASSERT(fmi != NULL); KASSERT(txn != NULL); KASSERT(0 < txn->txn_depth); /* If we're nested, just decrease nesting depth. */ if (1 < txn->txn_depth) { txn->txn_depth -= 1; return; } /* Check whether anyone is waiting for transactions to drain. */ s = pserialize_read_enter(); if (__predict_true(fmi->fmi_state == FSTRANS_STATE_NORMAL)) { /* Nobody's waiting. No need to notify anyone. */ txn->txn_depth = 0; pserialize_read_exit(s); goto out; } pserialize_read_exit(s); /* Notify waiters that we're done. */ mutex_enter(&fstrans.lock); txn->txn_depth = 0; cv_broadcast(&fstrans.done_cv); mutex_exit(&fstrans.lock); out: /* Remove the transaction from curlwp's stack and free it. */ fstrans_txn_lwp_pop(txn); pool_cache_put(fmi->fmi_txn_pc, txn); } int fstrans_start(struct mount *mp, enum fstrans_lock lock, int flags) { /* We may take an adaptive lock even if FSTRANS_FLAG_NOWAIT is set. */ ASSERT_SLEEPABLE(); /* Feign success if mp doesn't exist (!?) or has no transactions. */ if ((mp == NULL) || !ISSET(mp->mnt_iflag, IMNT_HAS_TRANS)) return 0; return _fstrans_start(mp, lock, flags, NULL); } void fstrans_done(struct mount *mp) { struct fstrans_txn *txn; /* We may take an adaptive lock. */ ASSERT_SLEEPABLE(); /* Feign success if mp doesn't exist (!?) or has no transactions. */ if (mp == NULL || !ISSET(mp->mnt_iflag, IMNT_HAS_TRANS)) return; /* Find the transaction record for this mount point. */ txn = fstrans_txn_lwp_find(mp); KASSERT(txn != NULL); return _fstrans_done(mp, txn); } /* State changes */ enum fstrans_state fstrans_getstate(struct mount *mp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; KASSERT(fmi != NULL); return fmi->fmi_state; } int fstrans_setstate(struct mount *mp, enum fstrans_state new_state) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; int error; /* We're gonna sleep big-time to wait for all transactions. */ ASSERT_SLEEPABLE(); KASSERT(fmi != NULL); const enum fstrans_state old_state = fmi->fmi_state; KASSERT(old_state != FSTRANS_STATE_UNMOUNTED); /* FSTRANS_STATE_UNMOUNTED is permanent. */ KASSERT(old_state != FSTRANS_STATE_UNMOUNTED); /* Nothing to do if we're already there. */ /* XXX When does this happen? */ if (old_state == new_state) return 0; /* Change state and wait for every CPU to observe the state change. */ mutex_enter(&fstrans.lock); fmi->fmi_state = new_state; pserialize_perform(fstrans.psz); /* Wait for any active transactions which are now invalid to drain. */ while (fstrans_invalid_active_transactions_p(mp)) { error = cv_wait_sig(&fstrans.done_cv, &fstrans.lock); if (error) { /* Restore the old state, notify waiters, and fail. */ fmi->fmi_state = old_state; cv_broadcast(&fstrans.state_cv); mutex_exit(&fstrans.lock); return error; } } /* Notify waiters that the state has changed. */ cv_broadcast(&fstrans.state_cv); mutex_exit(&fstrans.lock); /* * When transitioning NORMAL->SUSPENDING->SUSPENDED->NORMAL or * NORMAL->SUSPENDING->UNMOUNTED, callers will typically need * to start transactions flush buffers. To allow this, we * bracket the intermediate transitions by an exclusive * transaction; otherwise, the transactions to flush buffers * would wait forever. * * XXX Is this really necessary? Can't all transactions to * flush buffers be FSTRANS_LOCK_LAZY so that they are safe * during SUSPENDING? Alternatively, could we just eliminate * fstrans_setstate altogether in favour of exposing * FSTRANS_LOCK_EXCL and using fstrans_start/fstrans_done for * it? */ if (old_state == FSTRANS_STATE_NORMAL) fstrans_start(mp, FSTRANS_LOCK_EXCL, 0); else if ((new_state == FSTRANS_STATE_NORMAL) || (new_state == FSTRANS_STATE_UNMOUNTED)) fstrans_done(mp); return 0; } /* * Scan through mp's transaction records and return true if any are * currently active and invalid. New transactions which are not valid * in mp's current state cannot begin. */ static bool fstrans_invalid_active_transactions_p(struct mount *mp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; struct fstrans_txn *txn; KASSERT(fmi != NULL); KASSERT(mutex_owned(&fstrans.lock)); LIST_FOREACH(txn, &fmi->fmi_txns, txn_mnt_list) { if (txn->txn_depth == 0) continue; if (fstrans_grant_lock(fmi->fmi_state, txn->txn_lock) == 0) continue; return true; } return false; } /* Copy-on-write handlers */ /* * Management of the list of copy-on-write handlers is complicated so * that fscow_run need not touch global state except while someone is * trying to change the list of copy-on-write handlers. If someone is * already trying to change it, fscow_run must block until they're * done; if someone starts to try to change it while fscow_run is in * progress, fscow_run must notify them when done. */ int fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool), void *arg, struct fscow_handler **handlerp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; /* We're gonna sleep big-time to wait for the cows to come home. */ ASSERT_SLEEPABLE(); /* Can't use this unless you've got transactions. */ if (!ISSET(mp->mnt_iflag, IMNT_HAS_TRANS)) return EINVAL; KASSERT(fmi != NULL); /* Allocate a new cow handler. */ struct fscow_handler *const handler = pool_cache_get(fscow_handler_pc, PR_WAITOK); handler->ch_func = func; handler->ch_arg = arg; /* Carefully install it. */ fscow_change_enter(mp); LIST_INSERT_HEAD(&fmi->fmi_cow_handlers, handler, ch_list); fscow_change_exit(mp); /* Success! */ *handlerp = handler; return 0; } void fscow_disestablish(struct mount *mp, struct fscow_handler *handler) { /* We're gonna sleep big-time to wait for the cows to come home. */ ASSERT_SLEEPABLE(); /* Carefully remove the cow handler. */ fscow_change_enter(mp); LIST_REMOVE(handler, ch_list); fscow_change_exit(mp); /* Free it. */ pool_cache_put(fscow_handler_pc, handler); } static void fscow_change_enter(struct mount *mp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; KASSERT(fmi != NULL); /* Wait for anyone else to finish changing the cow handlers. */ mutex_enter(&fstrans.lock); while (fmi->fmi_cow_changing) cv_wait(&fstrans.state_cv, &fstrans.lock); /* Announce that we want to change the cow handlers. */ fmi->fmi_cow_changing = true; /* Wait for our announcement to reach all CPUs. */ pserialize_perform(fstrans.psz); /* Wait for fscow_run calls to drain. */ while (fscow_running_p(mp)) cv_wait(&fstrans.done_cv, &fstrans.lock); } static void fscow_change_exit(struct mount *mp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; KASSERT(fmi != NULL); KASSERT(mutex_owned(&fstrans.lock)); /* Notify everyone that we're done changing the cow handlers. */ fmi->fmi_cow_changing = false; cv_broadcast(&fstrans.state_cv); mutex_exit(&fstrans.lock); } static bool fscow_running_p(struct mount *mp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; struct fstrans_txn *txn; KASSERT(fmi != NULL); KASSERT(mutex_owned(&fstrans.lock)); KASSERT(fmi->fmi_cow_changing); /* * Go through all transactions with the mount point. If any is * in the middle of fscow_run, we must wait to change the cow * handler list. */ LIST_FOREACH(txn, &fmi->fmi_txns, txn_mnt_list) { if (txn->txn_cow_depth) return true; } return false; } int fscow_run(struct buf *bp, bool data_valid) { struct mount *mp; struct fstrans_mount_info *fmi; struct fstrans_txn *txn; struct fscow_handler *handler; int error = 0; /* We may sleep on an adaptive lock or to wait for handler changes. */ ASSERT_SLEEPABLE(); /* If it's already done, nothing to do. */ if (ISSET(bp->b_flags, B_COWDONE)) goto out; /* If there is no vnode, there is no mount, so do nothing. */ if (bp->b_vp == NULL) goto out; /* !?!? */ if (bp->b_vp->v_type == VBLK) mp = spec_node_getmountedfs(bp->b_vp); else mp = bp->b_vp->v_mount; /* * If there is no mounted file system, or if it has no * transactions, nothing to do. */ if (mp == NULL || !ISSET(mp->mnt_iflag, IMNT_HAS_TRANS)) goto out; fmi = mp->mnt_transinfo; KASSERT(fmi != NULL); /* Enter a cow transaction; block changes to the cow handler list. */ error = fscow_enter(mp, &txn); if (error) goto out; /* Run the cow handlers in order until one fails. */ LIST_FOREACH(handler, &fmi->fmi_cow_handlers, ch_list) { error = (*handler->ch_func)(handler->ch_arg, bp, data_valid); if (error) break; } /* All done. Let the cow handler list change once more. */ fscow_exit(mp, txn); out: if (error) return error; /* Mark it done. */ bp->b_flags |= B_COWDONE; /* Success! */ return 0; } static int fscow_enter(struct mount *mp, struct fstrans_txn **txnp) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; struct fstrans_txn *txn; int s, error; KASSERT(fmi != NULL); /* Start a transaction. */ error = _fstrans_start(mp, FSTRANS_LOCK_LAZY, 0, &txn); if (error) goto fail0; KASSERT(txn != NULL); /* If we are recursively handling cows, just increase depth. */ if (0 < txn->txn_cow_depth) { if (txn->txn_cow_depth == UINT_MAX) { error = EBUSY; goto fail1; } txn->txn_cow_depth++; goto win; } /* Check whether anyone wants to change the cow handlers. */ s = pserialize_read_enter(); if (__predict_false(fmi->fmi_cow_changing)) { pserialize_read_exit(s); /* Wait for cow handler changes to complete. */ mutex_enter(&fstrans.lock); while (fmi->fmi_cow_changing) cv_wait(&fstrans.state_cv, &fstrans.lock); /* Done. Block out cow handler changes until we're done. */ txn->txn_cow_depth = 1; mutex_exit(&fstrans.lock); goto win; } /* Nope. Block out cow handler changes until we're done. */ txn->txn_cow_depth = 1; pserialize_read_exit(s); win: KASSERT(txn != NULL); *txnp = txn; return 0; fail1: _fstrans_done(mp, txn); fail0: KASSERT(error); return error; } static void fscow_exit(struct mount *mp, struct fstrans_txn *txn) { struct fstrans_mount_info *const fmi = mp->mnt_transinfo; int s; KASSERT(fmi != NULL); KASSERT(txn != NULL); KASSERT(mp == txn->txn_mount); KASSERT(0 < txn->txn_cow_depth); /* If we were recursively handling cows, just decrease depth. */ if (1 < txn->txn_cow_depth) { txn->txn_cow_depth--; goto out; } /* Check whether anyone is waiting to change the cow handlers. */ s = pserialize_read_enter(); if (__predict_false(fmi->fmi_cow_changing)) { pserialize_read_exit(s); /* Yes. Notify them that we're done. */ mutex_enter(&fstrans.lock); txn->txn_cow_depth = 0; cv_broadcast(&fstrans.done_cv); mutex_exit(&fstrans.lock); goto out; } /* No. Let future cow handler changes happen. */ txn->txn_cow_depth = 0; pserialize_read_exit(s); /* Done with the transaction we started in fscow_enter. */ out: _fstrans_done(mp, txn); } /* Per-LWP transaction stack */ static inline struct fstrans_txn * fstrans_txn_lwp_find(struct mount *mp) { struct fstrans_lwp_info *fli; struct fstrans_txn *txn; fli = lwp_getspecific(fstrans_txn_perlwp); if (__predict_false(fli == NULL)) return NULL; txn = fli->fli_stack; if (__predict_false(txn == NULL)) return NULL; if (__predict_true(txn->txn_mount == mp)) return txn; while ((txn = txn->txn_next) != NULL) if (txn->txn_mount == mp) return txn; return NULL; } static inline int fstrans_txn_lwp_push(struct fstrans_txn *txn, int flags) { struct fstrans_lwp_info *fli; fli = lwp_getspecific(fstrans_txn_perlwp); if (__predict_false(fli == NULL)) { const int pool_flags = (ISSET(flags, FSTRANS_FLAG_NOWAIT)? PR_NOWAIT : PR_WAITOK); fli = pool_cache_get(fstrans_lwp_pc, pool_flags); if (fli == NULL) return EBUSY; lwp_setspecific(fstrans_txn_perlwp, fli); } KASSERT(fli != NULL); txn->txn_next = fli->fli_stack; fli->fli_stack = txn; return 0; } static inline void fstrans_txn_lwp_pop(struct fstrans_txn *txn) { struct fstrans_lwp_info *fli; struct fstrans_txn **txnp; KASSERT(txn->txn_depth == 0); KASSERT(txn->txn_cow_depth == 0); fli = lwp_getspecific(fstrans_txn_perlwp); KASSERT(fli != NULL); txnp = &fli->fli_stack; if (__predict_false(*txnp != txn)) { for (; *txnp != NULL; txnp = &(*txnp)->txn_next) { if (*txnp == txn) break; } } KASSERT(*txnp == txn); *txnp = txn->txn_next; txn->txn_next = NULL; /* paranoia */ } static void fstrans_txn_lwp_dtor(void *arg) { struct fstrans_lwp_info *const fli = arg; KASSERT(fli != NULL); if (fli->fli_stack != NULL) panic("lwp exiting with pending vfs transactions: %p", fli->fli_stack); pool_cache_put(fstrans_lwp_pc, fli); } /* Transaction record construction and destruction */ /* * Pool cache constructors and destructors for struct fstrans_txn. * pool_cache(9) takes care of judiciously deciding when to call these * and when to cache struct fstrans_txns CPU-locally. */ static int fstrans_txn_ctor(void *vmp, void *vtxn, int flags __unused) { struct mount *const mp = vmp; struct fstrans_txn *const txn = vtxn; struct fstrans_mount_info *const fmi = mp->mnt_transinfo; static const struct fstrans_txn zero_txn; KASSERT(fmi != NULL); *txn = zero_txn; txn->txn_mount = mp; txn->txn_next = NULL; txn->txn_depth = 0; txn->txn_cow_depth = 0; mutex_enter(&fstrans.lock); if (fmi->fmi_state == FSTRANS_STATE_UNMOUNTED) { mutex_exit(&fstrans.lock); return EIO; } LIST_INSERT_HEAD(&fmi->fmi_txns, txn, txn_mnt_list); mutex_exit(&fstrans.lock); return 0; } static void fstrans_txn_dtor(void *vmp, void *vtxn) { struct mount *const mp __diagused = vmp; struct fstrans_txn *const txn = vtxn; static const struct fstrans_txn zero_txn; KASSERT(txn->txn_mount == mp); KASSERT(txn->txn_next == NULL); KASSERT(txn->txn_depth == 0); KASSERT(txn->txn_cow_depth == 0); mutex_enter(&fstrans.lock); LIST_REMOVE(txn, txn_mnt_list); mutex_exit(&fstrans.lock); *txn = zero_txn; }