From d67053a5facc7047c9ba103ac9ed1554cd1a4fd0 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sat, 4 Mar 2023 12:45:23 +0000 Subject: [PATCH] aarch64/kobj_machdep: More closely follow Arm ARM icache sequence. While here, add comments with references and nix a sketchy function pointer cast. --- sys/arch/aarch64/aarch64/kobj_machdep.c | 54 ++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/sys/arch/aarch64/aarch64/kobj_machdep.c b/sys/arch/aarch64/aarch64/kobj_machdep.c index 1330829567d0..229f8f8bab46 100644 --- a/sys/arch/aarch64/aarch64/kobj_machdep.c +++ b/sys/arch/aarch64/aarch64/kobj_machdep.c @@ -362,9 +362,11 @@ kobj_reloc(kobj_t ko, uintptr_t relocbase, const void *data, } static void -kobj_idcache_wbinv_all(void) +kobj_icache_sync_range_xc(void *base, void *vsize) { - cpu_idcache_wbinv_all(); + size_t size = (size_t)(uintptr_t)vsize; + + aarch64_icache_sync_range((vaddr_t)base, size); } int @@ -373,11 +375,53 @@ kobj_machdep(kobj_t ko, void *base, size_t size, bool load) uint64_t where; if (load) { + /* + * For uniprocessor, after writing to instruction + * memory, need: + * + * 1. DC CVAU on each affected address + * 2. DSB ISH + * 3. IC IVAU on each affected address + * 4. DSB ISH + * 5. ISB + * + * For multiprocessor, after that, need ISB on every + * other CPU. + * + * Arm ARM, DDI 0487F.b, ID040120, Sec. K11.5.2 + * Instruction cache maintenance instructions. + * + * aarch64_icache_sync_range does the uniprocessor + * part. Unfortunately, we have no way to know which + * processor did the write to instruction memory, and + * it's not clear whether there's any way to avoid + * having to issue the DC CVAU and IC IVAU on the + * _same_ processor. + * + * So instead we will issue two xcalls. This way, on + * each CPU, we will have the following order, in two + * separate xcalls (1) and (2): + * + * 1. (a) DC CVAU on each affected address + * (b) DSB ISH + * 2. (a) IC IVAU on each affected address + * (b) DSB ISH + * (c) ISB + * + * Using aarch64_icache_sync_range in both xcalls will + * issue an unnecessary IC IVAU in xcall (1) and an + * unnecessary DC CVAU and ISB in xcall (2), but + * presumably there's no harm in that, and this is not + * performance-critical logic. + */ if (cold) { - kobj_idcache_wbinv_all(); + aarch64_icache_sync_range((vaddr_t)base, size); } else { - where = xc_broadcast(0, - (xcfunc_t)kobj_idcache_wbinv_all, NULL, NULL); + where = xc_broadcast(0, kobj_icache_sync_range_xc, + base, (void *)(uintptr_t)size); + xc_wait(where); + where = xc_broadcast(0, kobj_icache_sync_range_xc, + base, (void *)(uintptr_t)size); xc_wait(where); } }