# HG changeset patch
# User Taylor R Campbell <riastradh@NetBSD.org>
# Date 1600962862 0
#      Thu Sep 24 15:54:22 2020 +0000
# Branch trunk
# Node ID 91c2ddbf51ba73551b047345e87b7534a75bbb84
# Parent  51ced2f627032474a14910b0b77beebb45663cf6
# EXP-Topic riastradh-wg
tprof: Use percpu rather than a MAXCPUS-element array.

diff -r 51ced2f62703 -r 91c2ddbf51ba sys/dev/tprof/tprof.c
--- a/sys/dev/tprof/tprof.c	Wed Sep 23 01:03:48 2020 +0000
+++ b/sys/dev/tprof/tprof.c	Thu Sep 24 15:54:22 2020 +0000
@@ -33,14 +33,15 @@
 #include <sys/systm.h>
 #include <sys/kernel.h>
 
-#include <sys/cpu.h>
+#include <sys/callout.h>
 #include <sys/conf.h>
-#include <sys/callout.h>
+#include <sys/cpu.h>
 #include <sys/kmem.h>
 #include <sys/module.h>
+#include <sys/percpu.h>
 #include <sys/proc.h>
+#include <sys/queue.h>
 #include <sys/workqueue.h>
-#include <sys/queue.h>
 
 #include <dev/tprof/tprof.h>
 #include <dev/tprof/tprof_ioctl.h>
@@ -97,7 +98,7 @@ static lwp_t *tprof_owner;
 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */
 static u_int tprof_nbuf_on_list;	/* L: # of buffers on tprof_list */
 static struct workqueue *tprof_wq;
-static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
+static struct percpu *tprof_cpus __read_mostly;	/* tprof_cpu_t * */
 static u_int tprof_samples_per_buf;
 
 static tprof_backend_t *tprof_backend;	/* S: */
@@ -116,8 +117,20 @@ static struct tprof_stat tprof_stat;	/* 
 static tprof_cpu_t *
 tprof_cpu(struct cpu_info *ci)
 {
+	tprof_cpu_t **cp, *c;
 
-	return &tprof_cpus[cpu_index(ci)];
+	/*
+	 * As long as xcalls are blocked -- e.g., by kpreempt_disable
+	 * -- the percpu object will not be swapped and destroyed.  We
+	 * can't write to it, because the data may have already been
+	 * moved to a new buffer, but we can safely read from it.
+	 */
+	kpreempt_disable();
+	cp = percpu_getptr_remote(tprof_cpus, ci);
+	c = *cp;
+	kpreempt_enable();
+
+	return c;
 }
 
 static tprof_cpu_t *
@@ -132,7 +145,7 @@ tprof_buf_alloc(void)
 {
 	tprof_buf_t *new;
 	u_int size = tprof_samples_per_buf;
-	
+
 	new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
 	new->b_used = 0;
 	new->b_size = size;
@@ -682,9 +695,31 @@ tprofattach(int nunits)
 MODULE(MODULE_CLASS_DRIVER, tprof, NULL);
 
 static void
+tprof_cpu_init(void *vcp, void *vcookie, struct cpu_info *ci)
+{
+	tprof_cpu_t **cp = vcp, *c;
+
+	c = kmem_zalloc(sizeof(*c), KM_SLEEP);
+	c->c_buf = NULL;
+	c->c_cpuid = cpu_index(ci);
+	*cp = c;
+}
+
+static void
+tprof_cpu_fini(void *vcp, void *vcookie, struct cpu_info *ci)
+{
+	tprof_cpu_t **cp = vcp, *c;
+
+	c = *cp;
+	KASSERT(c->c_cpuid == cpu_index(ci));
+	KASSERT(c->c_buf == NULL);
+	kmem_free(c, sizeof(*c));
+	*cp = NULL;
+}
+
+static void
 tprof_driver_init(void)
 {
-	unsigned int i;
 
 	mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
@@ -692,18 +727,15 @@ tprof_driver_init(void)
 	cv_init(&tprof_cv, "tprof");
 	cv_init(&tprof_reader_cv, "tprof_rd");
 	STAILQ_INIT(&tprof_list);
-	for (i = 0; i < __arraycount(tprof_cpus); i++) {
-		tprof_cpu_t * const c = &tprof_cpus[i];
-
-		c->c_buf = NULL;
-		c->c_cpuid = i;
-	}
+	tprof_cpus = percpu_create(sizeof(tprof_cpu_t *),
+	    tprof_cpu_init, tprof_cpu_fini, NULL);
 }
 
 static void
 tprof_driver_fini(void)
 {
 
+	percpu_free(tprof_cpus, sizeof(tprof_cpu_t *));
 	mutex_destroy(&tprof_lock);
 	mutex_destroy(&tprof_reader_lock);
 	mutex_destroy(&tprof_startstop_lock);