Index: arch/aarch64/aarch64/pmap.c
===================================================================
RCS file: /cvsroot/src/sys/arch/aarch64/aarch64/pmap.c,v
retrieving revision 1.75
diff -u -p -r1.75 pmap.c
--- arch/aarch64/aarch64/pmap.c	15 May 2020 05:39:15 -0000	1.75
+++ arch/aarch64/aarch64/pmap.c	1 Jun 2020 20:36:59 -0000
@@ -35,6 +35,10 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.7
 #include "opt_pmap.h"
 #include "opt_uvmhist.h"
 
+/* borrow unused space from kmutex_t to keep vm_page <= 128 bytes. */
+#define	__MUTEX_PRIVATE
+#define	pp_flags pp_pvlock.mtx_pad
+
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kmem.h>
@@ -102,8 +106,9 @@ PMAP_COUNTER(pdp_alloc_boot, "page table
 PMAP_COUNTER(pdp_alloc, "page table page allocate (uvm_pagealloc)");
 PMAP_COUNTER(pdp_free, "page table page free (uvm_pagefree)");
 
-PMAP_COUNTER(pv_enter, "pv_entry allocate and link");
-PMAP_COUNTER(pv_remove, "pv_entry free and unlink");
+PMAP_COUNTER(pv_enter, "pv_entry fill");
+PMAP_COUNTER(pv_remove_dyn, "pv_entry free and unlink dynamic");
+PMAP_COUNTER(pv_remove_emb, "pv_entry clear embedded");
 PMAP_COUNTER(pv_remove_nopv, "no pv_entry found when removing pv");
 
 PMAP_COUNTER(activate, "pmap_activate call");
@@ -188,15 +193,6 @@ PMAP_COUNTER(unwire_failure, "pmap_unwir
 
 #define VM_PAGE_TO_PP(pg)	(&(pg)->mdpage.mdpg_pp)
 
-struct pv_entry {
-	LIST_ENTRY(pv_entry) pv_link;
-	struct pmap *pv_pmap;
-	vaddr_t pv_va;
-	paddr_t pv_pa;		/* debug */
-	pt_entry_t *pv_ptep;	/* for fast pte lookup */
-};
-#define pv_next	pv_link.le_next
-
 #define L3INDEXMASK	(L3_SIZE * Ln_ENTRIES - 1)
 #define PDPSWEEP_TRIGGER	512
 
@@ -208,7 +204,7 @@ static void _pmap_remove(struct pmap *, 
     struct pv_entry **);
 static int _pmap_enter(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int, bool);
 
-static struct pmap kernel_pmap;
+static struct pmap kernel_pmap __cacheline_aligned;
 
 struct pmap * const kernel_pmap_ptr = &kernel_pmap;
 static vaddr_t pmap_maxkvaddr;
@@ -227,27 +223,48 @@ static inline void
 pmap_pv_lock(struct pmap_page *pp)
 {
 
-	mutex_enter(&pp->pp_pvlock);
+	mutex_spin_enter(&pp->pp_pvlock);
 }
 
 static inline void
 pmap_pv_unlock(struct pmap_page *pp)
 {
 
-	mutex_exit(&pp->pp_pvlock);
+	mutex_spin_exit(&pp->pp_pvlock);
 }
 
 
 static inline void
 pm_lock(struct pmap *pm)
 {
-	mutex_enter(&pm->pm_lock);
+	mutex_spin_enter(&pm->pm_lock);
 }
 
 static inline void
 pm_unlock(struct pmap *pm)
 {
-	mutex_exit(&pm->pm_lock);
+	mutex_spin_exit(&pm->pm_lock);
+}
+
+static bool
+pm_reverse_lock(struct pmap *pm, struct pmap_page *pp)
+{
+
+	KASSERT(mutex_owned(&pp->pp_pvlock));
+
+	if (__predict_true(mutex_tryenter(&pm->pm_lock)))
+		return true;
+
+	if (pm != pmap_kernel())
+		pmap_reference(pm);
+	mutex_spin_exit(&pp->pp_pvlock);
+	mutex_spin_enter(&pm->pm_lock);
+	/* nothing, just wait for lock */
+	mutex_spin_exit(&pm->pm_lock);
+	if (pm != pmap_kernel())
+		pmap_destroy(pm);
+	mutex_spin_enter(&pp->pp_pvlock);
+	return false;
 }
 
 static inline struct pmap_page *
@@ -470,14 +487,22 @@ pmap_bootstrap(vaddr_t vstart, vaddr_t v
 
 	CTASSERT(sizeof(kpm->pm_stats.wired_count) == sizeof(long));
 	CTASSERT(sizeof(kpm->pm_stats.resident_count) == sizeof(long));
-#define PMSTAT_INC_WIRED_COUNT(pm)	\
-	atomic_inc_ulong(&(pm)->pm_stats.wired_count)
-#define PMSTAT_DEC_WIRED_COUNT(pm)	\
-	atomic_dec_ulong(&(pm)->pm_stats.wired_count)
-#define PMSTAT_INC_RESIDENT_COUNT(pm)	\
-	atomic_inc_ulong(&(pm)->pm_stats.resident_count)
-#define PMSTAT_DEC_RESIDENT_COUNT(pm)	\
-	atomic_dec_ulong(&(pm)->pm_stats.resident_count)
+#define PMSTAT_INC_WIRED_COUNT(pm) do { \
+	KASSERT(mutex_owned(&(pm)->pm_lock)); \
+	(pm)->pm_stats.wired_count++; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_DEC_WIRED_COUNT(pm) do{ \
+	KASSERT(mutex_owned(&(pm)->pm_lock)); \
+	(pm)->pm_stats.wired_count--; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_INC_RESIDENT_COUNT(pm) do { \
+	KASSERT(mutex_owned(&(pm)->pm_lock)); \
+	(pm)->pm_stats.resident_count++; \
+} while (/* CONSTCOND */ 0);
+#define PMSTAT_DEC_RESIDENT_COUNT(pm) do { \
+	KASSERT(mutex_owned(&(pm)->pm_lock)); \
+	(pm)->pm_stats.resident_count--; \
+} while (/* CONSTCOND */ 0);
 }
 
 inline static int
@@ -504,11 +529,14 @@ void
 pmap_init(void)
 {
 
+	/* align to coherency_unit to minimise cache misses */
 	pool_cache_bootstrap(&_pmap_cache, sizeof(struct pmap),
-	    0, 0, 0, "pmappl", NULL, IPL_NONE, _pmap_pmap_ctor, NULL, NULL);
-	pool_cache_bootstrap(&_pmap_pv_pool, sizeof(struct pv_entry),
-	    0, 0, 0, "pvpl", NULL, IPL_VM, _pmap_pv_ctor, NULL, NULL);
+	    coherency_unit, 0, 0, "pmappl", NULL, IPL_NONE, _pmap_pmap_ctor,
+	    NULL, NULL);
 
+	pool_cache_bootstrap(&_pmap_pv_pool, sizeof(struct pv_entry),
+	    32, 0, PR_LARGECACHE, "pvpl", NULL, IPL_NONE, _pmap_pv_ctor,
+	    NULL, NULL);
 }
 
 void
@@ -588,7 +616,7 @@ pmap_alloc_pdp(struct pmap *pm, struct v
 			return POOL_PADDR_INVALID;
 		}
 
-		LIST_INSERT_HEAD(&pm->pm_vmlist, pg, mdpage.mdpg_vmlist);
+		LIST_INSERT_HEAD(&pm->pm_vmlist, pg, pageq.list);
 		pg->flags &= ~PG_BUSY;	/* never busy */
 		pg->wire_count = 1;	/* max = 1 + Ln_ENTRIES = 513 */
 		pa = VM_PAGE_TO_PHYS(pg);
@@ -618,12 +646,15 @@ pmap_alloc_pdp(struct pmap *pm, struct v
 static void
 pmap_free_pdp(struct pmap *pm, struct vm_page *pg)
 {
-	LIST_REMOVE(pg, mdpage.mdpg_vmlist);
-	pg->flags |= PG_BUSY;
+
+	/* pdp pages are always zeroed.  inform the VM system. */
+	LIST_REMOVE(pg, pageq.list);
+	pg->flags |= PG_ZERO;
 	pg->wire_count = 0;
 
 	struct pmap_page *pp __diagused = VM_PAGE_TO_PP(pg);
-	KASSERT(LIST_EMPTY(&pp->pp_pvhead));
+	KASSERT(pp->pp_pv.pv_pmap == NULL);
+	KASSERT(pp->pp_pv.pv_next == NULL);
 
 	uvm_pagefree(pg);
 	PMAP_COUNT(pdp_free);
@@ -639,8 +670,10 @@ _pmap_sweep_pdp(struct pmap *pm)
 	int nsweep;
 	uint16_t wirecount __diagused;
 
+	KASSERT(mutex_owned(&pm->pm_lock));
+
 	nsweep = 0;
-	LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, mdpage.mdpg_vmlist, tmp) {
+	LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, pageq.list, tmp) {
 		if (pg->wire_count != 1)
 			continue;
 
@@ -659,7 +692,7 @@ _pmap_sweep_pdp(struct pmap *pm)
 		/* unlink from parent */
 		opte = atomic_swap_64(ptep_in_parent, 0);
 		KASSERT(lxpde_valid(opte));
-		wirecount = atomic_add_32_nv(&pg->wire_count, -1); /* 1 -> 0 */
+		wirecount = --(pg->wire_count); /* 1 -> 0 */
 		KASSERT(wirecount == 0);
 		pmap_free_pdp(pm, pg);
 		nsweep++;
@@ -674,12 +707,12 @@ _pmap_sweep_pdp(struct pmap *pm)
 		KASSERTMSG(pg->wire_count >= 1,
 		    "wire_count=%d", pg->wire_count);
 		/* decrement wire_count of parent */
-		wirecount = atomic_add_32_nv(&pg->wire_count, -1);
+		wirecount = --(pg->wire_count);
 		KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1),
 		    "pm=%p[%d], pg=%p, wire_count=%d",
 		    pm, pm->pm_asid, pg, pg->wire_count);
 	}
-	atomic_swap_uint(&pm->pm_idlepdp, 0);
+	pm->pm_idlepdp = 0;
 
 	return nsweep;
 }
@@ -687,9 +720,9 @@ _pmap_sweep_pdp(struct pmap *pm)
 static void
 _pmap_free_pdp_all(struct pmap *pm)
 {
-	struct vm_page *pg, *tmp;
+	struct vm_page *pg;
 
-	LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, mdpage.mdpg_vmlist, tmp) {
+	while ((pg = LIST_FIRST(&pm->pm_vmlist)) != NULL) {
 		pmap_free_pdp(pm, pg);
 	}
 }
@@ -1019,9 +1052,10 @@ _pmap_pte_adjust_cacheflags(pt_entry_t p
 }
 
 static struct pv_entry *
-_pmap_remove_pv(struct pmap_page *pp, struct pmap *pm, vaddr_t va, pt_entry_t pte)
+_pmap_remove_pv(struct pmap_page *pp, struct pmap *pm, vaddr_t va,
+    pt_entry_t pte)
 {
-	struct pv_entry *pv;
+	struct pv_entry *pv, *ppv;
 
 	UVMHIST_FUNC(__func__);
 	UVMHIST_CALLED(pmaphist);
@@ -1029,18 +1063,26 @@ _pmap_remove_pv(struct pmap_page *pp, st
 	UVMHIST_LOG(pmaphist, "pp=%p, pm=%p, va=%llx, pte=%llx",
 	    pp, pm, va, pte);
 
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
-		if ((pm == pv->pv_pmap) && (va == pv->pv_va)) {
-			LIST_REMOVE(pv, pv_link);
-			PMAP_COUNT(pv_remove);
+	KASSERT(mutex_owned(&pp->pp_pvlock));
+
+	for (ppv = NULL, pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+		if (pv->pv_pmap == pm && pv->pv_va == va) {
 			break;
 		}
+		ppv = pv;
 	}
-#ifdef PMAPCOUNTERS
-	if (pv == NULL) {
+	if (ppv == NULL) {
+		/* embedded in pmap_page */
+		pv->pv_pmap = NULL;
+		pv = NULL;
+		PMAP_COUNT(pv_remove_emb);
+	} else if (pv != NULL) {
+		/* dynamically allocated */
+		ppv->pv_next = pv->pv_next;
+		PMAP_COUNT(pv_remove_dyn);
+	} else {
 		PMAP_COUNT(pv_remove_nopv);
 	}
-#endif
 
 	return pv;
 }
@@ -1094,15 +1136,21 @@ pv_dump(struct pmap_page *pp, void (*pr)
 	pr(" pp->pp_flags=%08x %s\n", pp->pp_flags,
 	    str_vmflags(pp->pp_flags));
 
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+	for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+		if (pv->pv_pmap == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+			continue;
+		}
 		pr("  pv[%d] pv=%p\n",
 		    i, pv);
 		pr("    pv[%d].pv_pmap = %p (asid=%d)\n",
 		    i, pv->pv_pmap, pv->pv_pmap->pm_asid);
 		pr("    pv[%d].pv_va   = %016lx (color=%d)\n",
 		    i, pv->pv_va, _pmap_color(pv->pv_va));
+#ifdef XXXAD
 		pr("    pv[%d].pv_pa   = %016lx (color=%d)\n",
 		    i, pv->pv_pa, _pmap_color(pv->pv_pa));
+#endif
 		pr("    pv[%d].pv_ptep = %p\n",
 		    i, pv->pv_ptep);
 		i++;
@@ -1122,14 +1170,15 @@ _pmap_enter_pv(struct pmap_page *pp, str
 	UVMHIST_LOG(pmaphist, "pp=%p, pm=%p, va=%llx, pa=%llx", pp, pm, va, pa);
 	UVMHIST_LOG(pmaphist, "ptep=%p, flags=%08x", ptep, flags, 0, 0);
 
-	/* pv is already registered? */
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
-		if ((pm == pv->pv_pmap) && (va == pv->pv_va)) {
-			break;
-		}
-	}
+	KASSERT(mutex_owned(&pp->pp_pvlock));
 
-	if (pv == NULL) {
+	/*
+	 * mapping cannot be already registered at this VA.
+	 */
+	if (pp->pp_pv.pv_pmap == NULL) {
+		/* claim pv_entry embedded in pmap_page. */
+		pv = &pp->pp_pv;
+	} else {
 		/*
 		 * create and link new pv.
 		 * pv is already allocated at beginning of _pmap_enter().
@@ -1138,23 +1187,18 @@ _pmap_enter_pv(struct pmap_page *pp, str
 		if (pv == NULL)
 			return ENOMEM;
 		*pvp = NULL;
-
-		pv->pv_pmap = pm;
-		pv->pv_va = va;
-		pv->pv_pa = pa;
-		pv->pv_ptep = ptep;
-
-		LIST_INSERT_HEAD(&pp->pp_pvhead, pv, pv_link);
-		PMAP_COUNT(pv_enter);
+		pv->pv_next = pp->pp_pv.pv_next;
+		pp->pp_pv.pv_next = pv;
+	}
+	pv->pv_pmap = pm;
+	pv->pv_va = va;
+	pv->pv_ptep = ptep;
+	PMAP_COUNT(pv_enter);
 
 #ifdef PMAP_PV_DEBUG
-		if (!LIST_EMPTY(&pp->pp_pvhead)){
-			printf("pv %p alias added va=%016lx -> pa=%016lx\n",
-			    pv, va, pa);
-			pv_dump(pp, printf);
-		}
+	printf("pv %p alias added va=%016lx -> pa=%016lx\n", pv, va, pa);
+	pv_dump(pp, printf);
 #endif
-	}
 
 	return 0;
 }
@@ -1162,18 +1206,14 @@ _pmap_enter_pv(struct pmap_page *pp, str
 void
 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
 {
-	int s;
 
-	s = splvm();
 	_pmap_enter(pmap_kernel(), va, pa, prot, flags | PMAP_WIRED, true);
-	splx(s);
 }
 
 void
 pmap_kremove(vaddr_t va, vsize_t size)
 {
 	struct pmap *kpm = pmap_kernel();
-	int s;
 
 	UVMHIST_FUNC(__func__);
 	UVMHIST_CALLED(pmaphist);
@@ -1186,11 +1226,9 @@ pmap_kremove(vaddr_t va, vsize_t size)
 	KDASSERT(!IN_KSEG_ADDR(va));
 	KDASSERT(IN_RANGE(va, VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS));
 
-	s = splvm();
 	pm_lock(kpm);
 	_pmap_remove(kpm, va, va + size, true, NULL);
 	pm_unlock(kpm);
-	splx(s);
 }
 
 static void
@@ -1205,13 +1243,12 @@ _pmap_protect_pv(struct pmap_page *pp, s
 	UVMHIST_CALLED(pmaphist);
 
 	UVMHIST_LOG(pmaphist, "pp=%p, pv=%p, prot=%08x", pp, pv, prot, 0);
+	KASSERT(mutex_owned(&pv->pv_pmap->pm_lock));
 
 	/* get prot mask from referenced/modified */
 	mdattr = pp->pp_flags &
 	    (VM_PROT_READ | VM_PROT_WRITE);
 
-	pm_lock(pv->pv_pmap);
-
 	ptep = pv->pv_ptep;
 	pte = *ptep;
 
@@ -1228,8 +1265,6 @@ _pmap_protect_pv(struct pmap_page *pp, s
 	pte = _pmap_pte_adjust_prot(pte, prot & pteprot, mdattr, user);
 	atomic_swap_64(ptep, pte);
 	AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid, pv->pv_va, true);
-
-	pm_unlock(pv->pv_pmap);
 }
 
 void
@@ -1501,8 +1536,7 @@ _pmap_pdp_addref(struct pmap *pm, paddr_
 		pg = PHYS_TO_VM_PAGE(pdppa);
 	KASSERT(pg != NULL);
 
-	CTASSERT(sizeof(pg->wire_count) == sizeof(uint32_t));
-	atomic_add_32(&pg->wire_count, 1);
+	pg->wire_count++;
 
 	KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1),
 	    "pg=%p, wire_count=%d", pg, pg->wire_count);
@@ -1521,6 +1555,8 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
 	bool removed;
 	uint16_t wirecount;
 
+	KASSERT(mutex_owned(&pm->pm_lock));
+
 	/* kernel L0-L3 page will be never freed */
 	if (pm == pmap_kernel())
 		return false;
@@ -1531,7 +1567,7 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
 	pg = PHYS_TO_VM_PAGE(pdppa);
 	KASSERT(pg != NULL);
 
-	wirecount = atomic_add_32_nv(&pg->wire_count, -1);
+	wirecount = --(pg->wire_count);
 
 	if (!do_free_pdp) {
 		/*
@@ -1540,7 +1576,7 @@ _pmap_pdp_delref(struct pmap *pm, paddr_
 		 * pmap_enter(), but useful hint to just sweep.
 		 */
 		if (wirecount == 1)
-			atomic_inc_uint(&pm->pm_idlepdp);
+			pm->pm_idlepdp++;
 		return false;
 	}
 
@@ -1598,7 +1634,7 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 	unsigned int idx;
 	int error = 0;
 	const bool user = (pm != pmap_kernel());
-	bool need_sync_icache, need_update_pv;
+	bool need_sync_icache, need_enter_pv;
 	bool l3only = true;
 
 	UVMHIST_FUNC(__func__);
@@ -1663,10 +1699,10 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 		 * pool_cache_get() may call pmap_kenter() internally.
 		 */
 		spv = pool_cache_get(&_pmap_pv_pool, PR_NOWAIT);
-		need_update_pv = true;
+		need_enter_pv = true;
 	} else {
 		spv = NULL;
-		need_update_pv = false;
+		need_enter_pv = false;
 	}
 
 	pm_lock(pm);
@@ -1768,12 +1804,12 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 		bool need_remove_pv;
 
 		KASSERT(!kenter);	/* pmap_kenter_pa() cannot override */
-#ifdef PMAPCOUNTERS
-		PMAP_COUNT(remappings);
 		if (opte & LX_BLKPAG_OS_WIRED) {
 			PMSTAT_DEC_WIRED_COUNT(pm);
 		}
 		PMSTAT_DEC_RESIDENT_COUNT(pm);
+#ifdef PMAPCOUNTERS
+		PMAP_COUNT(remappings);
 		if (user) {
 			PMAP_COUNT(user_mappings_changed);
 		} else {
@@ -1788,7 +1824,7 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 		if (pa == l3pte_pa(opte)) {
 			/* old and new pte have same pa, no need to update pv */
 			need_remove_pv = (pp == NULL);
-			need_update_pv = false;
+			need_enter_pv = false;
 			if (need_sync_icache && l3pte_executable(opte, user))
 				need_sync_icache = false;
 		} else {
@@ -1835,7 +1871,7 @@ _pmap_enter(struct pmap *pm, vaddr_t va,
 		flags |= VM_PROT_READ;
 
 	mdattr = VM_PROT_READ | VM_PROT_WRITE;
-	if (need_update_pv) {
+	if (need_enter_pv) {
 		error = _pmap_enter_pv(pp, pm, &spv, va, ptep, pa, flags);
 		if (error != 0) {
 			/*
@@ -2032,37 +2068,53 @@ pmap_page_remove(struct pmap_page *pp, v
 {
 	struct pv_entry *pv, *pvtmp;
 	struct pv_entry *pvtofree = NULL;
+	struct pmap *pm;
 	pt_entry_t opte;
 
-		/* remove all pages reference to this physical page */
-		pmap_pv_lock(pp);
-		LIST_FOREACH_SAFE(pv, &pp->pp_pvhead, pv_link, pvtmp) {
-
-			opte = atomic_swap_64(pv->pv_ptep, 0);
-			if (lxpde_valid(opte)) {
-				_pmap_pdp_delref(pv->pv_pmap,
-				    AARCH64_KVA_TO_PA(trunc_page(
-				    (vaddr_t)pv->pv_ptep)), false);
-				AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid,
-				    pv->pv_va, true);
+	/* remove all pages reference to this physical page */
+	pmap_pv_lock(pp);
+	for (pv = &pp->pp_pv; pv != NULL;) {
+		if ((pm = pv->pv_pmap) == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+			pv = pp->pp_pv.pv_next;
+			continue;
+		}
+		if (!pm_reverse_lock(pm, pp)) {
+			/* now retry */
+			pv = &pp->pp_pv;
+			continue;
+		}
+		opte = atomic_swap_64(pv->pv_ptep, 0);
+		if (lxpde_valid(opte)) {
+			_pmap_pdp_delref(pv->pv_pmap,
+			    AARCH64_KVA_TO_PA(trunc_page(
+			    (vaddr_t)pv->pv_ptep)), false);
+			AARCH64_TLBI_BY_ASID_VA(pv->pv_pmap->pm_asid,
+			    pv->pv_va, true);
 
-				if ((opte & LX_BLKPAG_OS_WIRED) != 0) {
-					PMSTAT_DEC_WIRED_COUNT(pv->pv_pmap);
-				}
-				PMSTAT_DEC_RESIDENT_COUNT(pv->pv_pmap);
+			if ((opte & LX_BLKPAG_OS_WIRED) != 0) {
+				PMSTAT_DEC_WIRED_COUNT(pv->pv_pmap);
 			}
-			LIST_REMOVE(pv, pv_link);
-			PMAP_COUNT(pv_remove);
-
+			PMSTAT_DEC_RESIDENT_COUNT(pv->pv_pmap);
+		}
+		pvtmp = _pmap_remove_pv(pp, pm, pv->pv_va, opte);
+		if (pvtmp == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+		} else {
+			KASSERT(pv == pvtmp);
+			pp->pp_pv.pv_next = pv->pv_next;
 			pv->pv_next = pvtofree;
 			pvtofree = pv;
 		}
-		pmap_pv_unlock(pp);
+		pm_unlock(pm);
+		pv = pp->pp_pv.pv_next;
+	}
+	pmap_pv_unlock(pp);
 
-		for (pv = pvtofree; pv != NULL; pv = pvtmp) {
-			pvtmp = pv->pv_next;
-			pool_cache_put(&_pmap_pv_pool, pv);
-		}
+	for (pv = pvtofree; pv != NULL; pv = pvtmp) {
+		pvtmp = pv->pv_next;
+		pool_cache_put(&_pmap_pv_pool, pv);
+	}
 }
 
 #ifdef __HAVE_PMAP_PV_TRACK
@@ -2091,6 +2143,7 @@ pmap_page_protect(struct vm_page *pg, vm
 {
 	struct pv_entry *pv;
 	struct pmap_page *pp;
+	struct pmap *pm;
 
 	KASSERT((prot & VM_PROT_READ) || !(prot & VM_PROT_WRITE));
 
@@ -2102,13 +2155,32 @@ pmap_page_protect(struct vm_page *pg, vm
 	UVMHIST_LOG(pmaphist, "pg=%p, pp=%p, pa=%016lx, prot=%08x",
 	    pg, pp, VM_PAGE_TO_PHYS(pg), prot);
 
+	/* do an unlocked check first */
+	if (atomic_load_relaxed(&pp->pp_pv.pv_pmap) == NULL &&
+	    atomic_load_relaxed(&pp->pp_pv.pv_next) == NULL) {
+		return;
+	}
+
 	if ((prot & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 	    VM_PROT_NONE) {
 		pmap_page_remove(pp, prot);
 	} else {
 		pmap_pv_lock(pp);
-		LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+		pv = &pp->pp_pv;
+		while (pv != NULL) {
+			if ((pm = pv->pv_pmap) == NULL) {
+				KASSERT(pv == &pp->pp_pv);
+				pv = pv->pv_next;
+				continue;
+			}
+			if (!pm_reverse_lock(pm, pp)) {
+				/* retry */
+				pv = &pp->pp_pv;
+				continue;
+			}
 			_pmap_protect_pv(pp, pv, prot);
+			pm_unlock(pm);
+			pv = pv->pv_next;
 		}
 		pmap_pv_unlock(pp);
 	}
@@ -2291,6 +2363,19 @@ pmap_clear_modify(struct vm_page *pg)
 	UVMHIST_LOG(pmaphist, "pg=%p, pp_flags=%08x",
 	    pg, pp->pp_flags, 0, 0);
 
+	PMAP_COUNT(clear_modify);
+
+	/*
+	 * if this is a new page, assert it has no mappings and simply zap
+	 * the stored attributes without taking any locks.
+	 */
+	if ((pg->flags & PG_FAKE) != 0) {
+		KASSERT(atomic_load_relaxed(&pp->pp_pv.pv_pmap) == NULL);
+		KASSERT(atomic_load_relaxed(&pp->pp_pv.pv_next) == NULL);
+		atomic_store_relaxed(&pp->pp_flags, 0);
+		return false;
+	}
+
 	pmap_pv_lock(pp);
 
 	if ((pp->pp_flags & VM_PROT_WRITE) == 0) {
@@ -2300,8 +2385,12 @@ pmap_clear_modify(struct vm_page *pg)
 
 	pp->pp_flags &= ~VM_PROT_WRITE;
 
-	PMAP_COUNT(clear_modify);
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+	for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+		if (pv->pv_pmap == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+			continue;
+		}
+
 		PMAP_COUNT(clear_modify_pages);
 
 		va = pv->pv_va;
@@ -2357,7 +2446,12 @@ pmap_clear_reference(struct vm_page *pg)
 	pp->pp_flags &= ~VM_PROT_READ;
 
 	PMAP_COUNT(clear_reference);
-	LIST_FOREACH(pv, &pp->pp_pvhead, pv_link) {
+	for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) {
+		if (pv->pv_pmap == NULL) {
+			KASSERT(pv == &pp->pp_pv);
+			continue;
+		}
+
 		PMAP_COUNT(clear_reference_pages);
 
 		va = pv->pv_va;
Index: arch/aarch64/include/pmap.h
===================================================================
RCS file: /cvsroot/src/sys/arch/aarch64/include/pmap.h,v
retrieving revision 1.39
diff -u -p -r1.39 pmap.h
--- arch/aarch64/include/pmap.h	14 May 2020 07:59:03 -0000	1.39
+++ arch/aarch64/include/pmap.h	1 Jun 2020 20:36:59 -0000
@@ -84,34 +84,36 @@ struct pmap {
 	bool pm_activated;
 };
 
-struct pv_entry;
+struct pv_entry {
+	struct pv_entry *pv_next;
+	struct pmap *pv_pmap;
+	vaddr_t pv_va;
+	void *pv_ptep;	/* pointer for fast pte lookup */
+};
 
 struct pmap_page {
 	kmutex_t pp_pvlock;
-	LIST_HEAD(, pv_entry) pp_pvhead;
-
-	/* VM_PROT_READ means referenced, VM_PROT_WRITE means modified */
-	uint32_t pp_flags;
+	struct pv_entry pp_pv;
 };
 
 struct vm_page_md {
-	LIST_ENTRY(vm_page) mdpg_vmlist;	/* L[0123] table vm_page list */
-	pd_entry_t *mdpg_ptep_parent;	/* for page descriptor page only */
-
 	struct pmap_page mdpg_pp;
 };
+/* for page descriptor page only */
+#define	mdpg_ptep_parent	mdpg_pp.pp_pv.pv_ptep
 
 #define VM_MDPAGE_INIT(pg)					\
 	do {							\
-		(pg)->mdpage.mdpg_ptep_parent = NULL;		\
 		PMAP_PAGE_INIT(&(pg)->mdpage.mdpg_pp);		\
 	} while (/*CONSTCOND*/ 0)
 
 #define PMAP_PAGE_INIT(pp)						\
 	do {								\
 		mutex_init(&(pp)->pp_pvlock, MUTEX_NODEBUG, IPL_VM);	\
-		LIST_INIT(&(pp)->pp_pvhead);				\
-		(pp)->pp_flags = 0;					\
+		(pp)->pp_pv.pv_next = NULL;				\
+		(pp)->pp_pv.pv_pmap = NULL;				\
+		(pp)->pp_pv.pv_va = 0;					\
+		(pp)->pp_pv.pv_ptep = NULL;				\
 	} while (/*CONSTCOND*/ 0)
 
 /* saved permission bit for referenced/modified emulation */
Index: arch/arm/include/mutex.h
===================================================================
RCS file: /cvsroot/src/sys/arch/arm/include/mutex.h,v
retrieving revision 1.23
diff -u -p -r1.23 mutex.h
--- arch/arm/include/mutex.h	5 Mar 2020 17:58:08 -0000	1.23
+++ arch/arm/include/mutex.h	1 Jun 2020 20:36:59 -0000
@@ -63,6 +63,9 @@ struct kmutex {
 			ipl_cookie_t		mtxs_ipl;
 			__cpu_simple_lock_t	mtxs_lock;
 			volatile uint8_t	mtxs_unused;
+#ifdef _LP64
+			uint32_t		mtxs_pad;
+#endif
 		} s;
 #endif
 	} u;
@@ -73,6 +76,9 @@ struct kmutex {
 #define	mtx_owner		u.mtxa_owner
 #define	mtx_ipl			u.s.mtxs_ipl
 #define	mtx_lock		u.s.mtxs_lock
+#ifdef _LP64
+#define	mtx_pad			u.s.mtxs_pad
+#endif
 
 #if 0
 #define	__HAVE_MUTEX_STUBS		1
Index: arch/evbarm/conf/ARMADAXP
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/ARMADAXP,v
retrieving revision 1.27
diff -u -p -r1.27 ARMADAXP
--- arch/evbarm/conf/ARMADAXP	18 Apr 2020 11:00:38 -0000	1.27
+++ arch/evbarm/conf/ARMADAXP	1 Jun 2020 20:36:59 -0000
@@ -115,7 +115,7 @@ options 	LOCKDEBUG
 options 	DIAGNOSTIC		# internal consistency checks
 #options 	DEBUG
 #options 	UVMHIST		# kernhist for uvm/pmap subsystems
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 #options 	VERBOSE_INIT_ARM	# verbose bootstraping messages
 options 	DDB			# in-kernel debugger
 options 	DDB_ONPANIC=1
Index: arch/evbarm/conf/ARMADILLO-IOT-G3
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/ARMADILLO-IOT-G3,v
retrieving revision 1.22
diff -u -p -r1.22 ARMADILLO-IOT-G3
--- arch/evbarm/conf/ARMADILLO-IOT-G3	18 Apr 2020 11:00:38 -0000	1.22
+++ arch/evbarm/conf/ARMADILLO-IOT-G3	1 Jun 2020 20:36:59 -0000
@@ -19,7 +19,7 @@ options 	CPU_CORTEX
 options 	CPU_CORTEXA7
 options 	IMX7
 options 	MULTIPROCESSOR
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 
 options 	MEMSIZE=512
 options 	MEMSIZE_RESERVED=32	# only reserved 32M for Cortex-M4 core
Index: arch/evbarm/conf/BCM5301X
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/BCM5301X,v
retrieving revision 1.35
diff -u -p -r1.35 BCM5301X
--- arch/evbarm/conf/BCM5301X	18 Apr 2020 11:00:38 -0000	1.35
+++ arch/evbarm/conf/BCM5301X	1 Jun 2020 20:36:59 -0000
@@ -26,7 +26,7 @@ options 	BCMETH_COUNTERS
 options 	CPU_CORTEXA9
 #options 	MEMSIZE=256
 options 	BCM5301X
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 options 	BUSDMA_COUNTERS
 makeoptions	KERNEL_BASE_PHYS="0x80100000"
 
Index: arch/evbarm/conf/DUOVERO
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/DUOVERO,v
retrieving revision 1.17
diff -u -p -r1.17 DUOVERO
--- arch/evbarm/conf/DUOVERO	18 Apr 2020 11:00:38 -0000	1.17
+++ arch/evbarm/conf/DUOVERO	1 Jun 2020 20:36:59 -0000
@@ -19,7 +19,7 @@ options 	OMAP_4430
 options 	MULTIPROCESSOR
 options 	 __HAVE_CPU_UAREA_ALLOC_IDLELWP	# need for MULTIPROCESSOR
 options 	FPU_VFP
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 options 	ARM_HAS_VBAR
 options 	__HAVE_MM_MD_DIRECT_MAPPED_PHYS
 makeoptions	CPUFLAGS="-mcpu=cortex-a9 -mfpu=neon"
Index: arch/evbarm/conf/GENERIC.common
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/GENERIC.common,v
retrieving revision 1.36
diff -u -p -r1.36 GENERIC.common
--- arch/evbarm/conf/GENERIC.common	18 Apr 2020 11:00:38 -0000	1.36
+++ arch/evbarm/conf/GENERIC.common	1 Jun 2020 20:36:59 -0000
@@ -14,7 +14,7 @@ options 	RTC_OFFSET=0	# hardware clock i
 options 	NTP		# NTP phase/frequency locked loop
 
 # CPU options
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 options 	BUSDMA_COUNTERS
 
 # Architecture options
Index: arch/evbarm/conf/N900
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/N900,v
retrieving revision 1.34
diff -u -p -r1.34 N900
--- arch/evbarm/conf/N900	18 Apr 2020 11:00:38 -0000	1.34
+++ arch/evbarm/conf/N900	1 Jun 2020 20:36:59 -0000
@@ -20,7 +20,7 @@ options 	RTC_OFFSET=0	# hardware clock i
 #options 	UVMHIST,UVMHIST_PRINT
 options 	CPU_CORTEXA8
 options 	OMAP_3430
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 
 # Architecture options
 
Index: arch/evbarm/conf/OMAP5EVM
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/OMAP5EVM,v
retrieving revision 1.18
diff -u -p -r1.18 OMAP5EVM
--- arch/evbarm/conf/OMAP5EVM	23 May 2020 14:51:49 -0000	1.18
+++ arch/evbarm/conf/OMAP5EVM	1 Jun 2020 20:36:59 -0000
@@ -19,7 +19,7 @@ options 	RTC_OFFSET=0	# hardware clock i
 
 options 	CPU_CORTEXA15
 options 	OMAP_5430
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 options 	MULTIPROCESSOR
 # no options	__HAVE_MM_MD_DIRECT_MAPPED_PHYS # too much memory to direct map
 
Index: arch/evbarm/conf/PANDABOARD
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/PANDABOARD,v
retrieving revision 1.31
diff -u -p -r1.31 PANDABOARD
--- arch/evbarm/conf/PANDABOARD	18 Apr 2020 11:00:38 -0000	1.31
+++ arch/evbarm/conf/PANDABOARD	1 Jun 2020 20:36:59 -0000
@@ -19,7 +19,7 @@ options 	RTC_OFFSET=0	# hardware clock i
 
 options 	CPU_CORTEXA9
 options 	OMAP_4430
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 
 # Architecture options
 
Index: arch/evbarm/conf/PEPPER
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/PEPPER,v
retrieving revision 1.15
diff -u -p -r1.15 PEPPER
--- arch/evbarm/conf/PEPPER	18 Apr 2020 11:00:38 -0000	1.15
+++ arch/evbarm/conf/PEPPER	1 Jun 2020 20:36:59 -0000
@@ -17,7 +17,7 @@ maxusers	32
 options 	CPU_CORTEXA8
 options 	TI_AM335X
 options 	FPU_VFP
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 options 	ARM_HAS_VBAR
 options 	__HAVE_MM_MD_DIRECT_MAPPED_PHYS
 makeoptions	CPUFLAGS="-mcpu=cortex-a8 -mfpu=neon"
Index: arch/evbarm/conf/TISDP2420
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/TISDP2420,v
retrieving revision 1.41
diff -u -p -r1.41 TISDP2420
--- arch/evbarm/conf/TISDP2420	18 Apr 2020 11:00:38 -0000	1.41
+++ arch/evbarm/conf/TISDP2420	1 Jun 2020 20:36:59 -0000
@@ -19,7 +19,7 @@ options 	RTC_OFFSET=0	# hardware clock i
 
 options 	CPU_ARM1136
 options 	OMAP_2420
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 
 # Architecture options
 
Index: arch/evbarm/conf/VTC100
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/conf/VTC100,v
retrieving revision 1.22
diff -u -p -r1.22 VTC100
--- arch/evbarm/conf/VTC100	18 Apr 2020 11:00:38 -0000	1.22
+++ arch/evbarm/conf/VTC100	1 Jun 2020 20:36:59 -0000
@@ -19,7 +19,7 @@ options 	RTC_OFFSET=0	# hardware clock i
 
 options 	CPU_CORTEXA8
 options 	TI_AM335X
-options 	PMAPCOUNTERS
+#options 	PMAPCOUNTERS
 
 # XXX The Cortex PMC delay() doesn't seem to work.
 #no options	CORTEX_PMC