From fb54393def336b69125f5202b890b34afcafdcd8 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 19 Aug 2022 19:49:48 +0000 Subject: [PATCH] WIP: x86: Support EFI runtime services. The current implementation uses 1:1 P/V mapping with a special pmap, efi_runtime_pmap, and special mappings flagged PMAP_MD_EFIRT so that they don't have PTE_U set even if they lie in what would normally be user VM -- this way we don't fall afoul of SMAP/SMEP. Currently this patch disable SVS, because I haven't worked the special efi_runtime_pmap into SVS so that it doesn't try to touch the nonexistent UTLS page. To be fixed. Not sure pmap_activate/deactivate_sync are correct, need more review from an x86 wizard. Also in qemu this seems to return EFI_INVALID_PARAMETER for GetTime and GetNextVariableName. Not sure what's up... --- sys/arch/amd64/amd64/trap.c | 10 + sys/arch/amd64/include/efi.h | 3 + sys/arch/x86/conf/files.x86 | 2 + sys/arch/x86/include/pmap_private.h | 12 + sys/arch/x86/x86/efi_machdep.c | 343 ++++++++++++++++++++++++++++ sys/arch/x86/x86/pmap.c | 87 ++++++- sys/arch/x86/x86/svs.c | 8 + 7 files changed, 464 insertions(+), 1 deletion(-) create mode 100644 sys/arch/amd64/include/efi.h diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index 6e555545ecd4..3abf6fb1a567 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -254,6 +254,7 @@ doubletrap(struct trapframe *frame) * jump directly into the code in x86/fpu.c so they get processed * without interrupts being enabled. */ +#include void trap(struct trapframe *frame) { @@ -517,6 +518,15 @@ pagefltcommon: /* Fault the original page in. */ onfault = pcb->pcb_onfault; pcb->pcb_onfault = NULL; +#ifdef EFI_RUNTIME + /* XXX */ + extern void efi_runtime_onfault(void); + if (onfault == efi_runtime_onfault) { + db_stacktrace(); + onfault_restore(frame, onfault, EFAULT); + return; + } +#endif error = uvm_fault(map, va, ftype); pcb->pcb_onfault = onfault; if (error == 0) { diff --git a/sys/arch/amd64/include/efi.h b/sys/arch/amd64/include/efi.h new file mode 100644 index 000000000000..b612111c32bf --- /dev/null +++ b/sys/arch/amd64/include/efi.h @@ -0,0 +1,3 @@ +/* $NetBSD$ */ + +#include diff --git a/sys/arch/x86/conf/files.x86 b/sys/arch/x86/conf/files.x86 index f30c12fb1ccf..6def63d6832b 100644 --- a/sys/arch/x86/conf/files.x86 +++ b/sys/arch/x86/conf/files.x86 @@ -21,6 +21,8 @@ defflag opt_xen.h DO_NOT_DEFINE # Option to have a static kernel memory layout defflag opt_kaslr.h NO_X86_ASLR +defflag opt_efi.h EFI_RUNTIME + defflag SVS defflag PCPU_IDT diff --git a/sys/arch/x86/include/pmap_private.h b/sys/arch/x86/include/pmap_private.h index f54e10c4a62d..de7e6e52ab47 100644 --- a/sys/arch/x86/include/pmap_private.h +++ b/sys/arch/x86/include/pmap_private.h @@ -377,4 +377,16 @@ extern struct pcpu_area *pcpuarea; void svs_quad_copy(void *, void *, long); +/* XXX move me to pmap.h? */ +#define PMAP_MD_EFIRT 0x01000000u /* EFI runtime mapping */ + +#ifdef _KERNEL_OPT +#include "opt_efi.h" +#endif + +#ifdef EFI_RUNTIME +void * pmap_activate_sync(struct pmap *); +void pmap_deactivate_sync(struct pmap *, void *); +#endif + #endif /* _X86_PMAP_PRIVATE_H_ */ diff --git a/sys/arch/x86/x86/efi_machdep.c b/sys/arch/x86/x86/efi_machdep.c index ac670ee0d595..f00c17977726 100644 --- a/sys/arch/x86/x86/efi_machdep.c +++ b/sys/arch/x86/x86/efi_machdep.c @@ -29,6 +29,9 @@ #include __KERNEL_RCSID(0, "$NetBSD: efi.c,v 1.22 2021/10/07 12:52:27 msaitoh Exp $"); +#include "efi.h" +#include "opt_efi.h" + #include #include #include @@ -37,6 +40,8 @@ __KERNEL_RCSID(0, "$NetBSD: efi.c,v 1.22 2021/10/07 12:52:27 msaitoh Exp $"); #include #include +#include + #include #include #include @@ -67,6 +72,28 @@ static struct efi_e820memmap { struct bi_memmap_entry entry[VM_PHYSSEG_MAX - 1]; } efi_e820memmap; +#ifdef EFI_RUNTIME + +#define EFI_RUNTIME_PMAP 1 + +#include + +#include + +static kmutex_t efi_runtime_lock __cacheline_aligned; +static label_t efi_runtime_label; +#ifdef EFI_RUNTIME_PMAP +static struct pmap *efi_runtime_pmap __read_mostly; +#endif +static struct efi_rt efi_rt __read_mostly; +#if NEFI > 0 +static struct efi_ops efi_runtime_ops __read_mostly; +#endif + +static void efi_runtime_init(void); + +#endif + /* * Map a physical address (PA) to a newly allocated virtual address (VA). * The VA must be freed using efi_relva(). @@ -408,6 +435,10 @@ efi_init(void) #if NPCI > 0 pci_mapreg_map_enable_decode = true; /* PR port-amd64/53286 */ #endif + +#ifdef EFI_RUNTIME + efi_runtime_init(); +#endif } bool @@ -548,3 +579,315 @@ efi_get_e820memmap(void) efi_e820memmap.bim.common.type = BTINFO_MEMMAP; return &efi_e820memmap.bim; } + +#ifdef EFI_RUNTIME + +#ifdef _LP64 +#define EFIERR(x) (0x8000000000000000ul | (x)) +#else +#define EFIERR(x) (0x80000000ul | (x)) +#endif + +#define EFI_UNSUPPORTED EFIERR(3) +#define EFI_DEVICE_ERROR EFIERR(7) + +static void +efi_runtime_init(void) +{ + struct efi_systbl *systbl; + struct btinfo_efimemmap *efimm; + uint32_t i; + + if (efi_is32x64) { + aprint_debug("%s: 32x64 runtime services not supported\n", + __func__); + return; + } + + systbl = efi_getsystbl(); + if (systbl->st_rt == NULL) { + aprint_debug("%s: no runtime\n", __func__); + return; + } + if ((efimm = lookup_bootinfo(BTINFO_EFIMEMMAP)) == NULL) { + aprint_debug("%s: no efi memmap\n", __func__); + return; + } +#ifdef EFI_RUNTIME_PMAP + efi_runtime_pmap = pmap_create(); + void *const cookie = pmap_activate_sync(efi_runtime_pmap); +#endif + for (i = 0; i < efimm->num; i++) { + struct efi_md *md = (void *)(efimm->memmap + efimm->size * i); + uint64_t j; + int prot = VM_PROT_READ, flags = PMAP_MD_EFIRT; + + if ((md->md_attr & EFI_MD_ATTR_RT) == 0) + continue; + + switch (md->md_type) { + case EFI_MD_TYPE_RT_CODE: + prot |= VM_PROT_EXECUTE; + break; + case EFI_MD_TYPE_RT_DATA: + case EFI_MD_TYPE_IOMEM: + prot |= VM_PROT_WRITE; + break; + } + + if (md->md_attr & EFI_MD_ATTR_UC) + flags |= PMAP_NOCACHE; + if (md->md_attr & EFI_MD_ATTR_WC) + flags |= PMAP_WRITE_COMBINE; + if (md->md_attr & EFI_MD_ATTR_RO) + prot &= ~VM_PROT_WRITE; + if (md->md_attr & EFI_MD_ATTR_XP) + prot &= ~VM_PROT_EXECUTE; + + aprint_debug("%s: map %zu pages at %#"PRIxPADDR + " type %"PRIu32" attrs 0x%08"PRIx64"\n", + __func__, (size_t)md->md_pages, (paddr_t)md->md_phys, + md->md_type, md->md_attr); + if (md->md_virt != 0 && md->md_virt != md->md_phys) { + aprint_error("%s: already mapped at 0x%"PRIxVADDR"\n", + __func__, (vaddr_t)md->md_virt); + goto fail; + } + for (j = 0; j < md->md_pages; j++) { +#ifdef EFI_RUNTIME_PMAP + const int error = pmap_enter(efi_runtime_pmap, + md->md_phys + j*PAGE_SIZE, + md->md_phys + j*PAGE_SIZE, + prot, flags); + if (error) { + aprint_error("%s: failed to map va %"PRIxVADDR + "to pa %"PRIxPADDR" for efi runtime: %d\n", + __func__, + (vaddr_t)md->md_phys + j*PAGE_SIZE, + (paddr_t)md->md_phys + j*PAGE_SIZE, + error); + goto fail; + } +#else + pmap_kenter_pa(md->md_phys + j*PAGE_SIZE, + md->md_phys + j*PAGE_SIZE, + prot, flags); +#endif + } + } +#ifdef EFI_RUNTIME_PMAP + pmap_update(efi_runtime_pmap); +#else + pmap_update(pmap_kernel()); +#endif + + memcpy(&efi_rt, systbl->st_rt, sizeof(efi_rt)); + +#ifdef EFI_RUNTIME_PMAP + pmap_deactivate_sync(efi_runtime_pmap, cookie); +#endif + + mutex_init(&efi_runtime_lock, MUTEX_DEFAULT, IPL_VM); + +#if NEFI > 0 + efi_register_ops(&efi_runtime_ops); +#endif + + return; + +fail: +#ifdef EFI_RUNTIME_PMAP + pmap_deactivate_sync(efi_runtime_pmap, cookie); + pmap_destroy(efi_runtime_pmap); + efi_runtime_pmap = NULL; +#endif + return; +} + +/* + * XXX OOPS -- this doesn't work because trap first tries uvm_fault, + * which takes an rwlock, which is forbidden while we hold a spin lock. + * Either we need to change that (e.g., create a new hook into the + * onfault business), or run efi rt at lower ipl so it can block. + */ +void efi_runtime_onfault(void); +void +efi_runtime_onfault(void) +{ + + printf("%s:%d\n", __func__, __LINE__); + longjmp(&efi_runtime_label); +} + +struct efi_runtime_cookie { + void *erc_pmap_cookie; +}; + +static void +efi_runtime_enter(struct efi_runtime_cookie *cookie) +{ + struct pcb *pcb = lwp_getpcb(curlwp); + + KASSERT(pcb->pcb_onfault == NULL); + + mutex_enter(&efi_runtime_lock); + fpu_kern_enter(); +#ifdef EFI_RUNTIME_PMAP + cookie->erc_pmap_cookie = pmap_activate_sync(efi_runtime_pmap); +#else + cookie->erc_pmap_cookie = NULL; +#endif + pcb->pcb_onfault = &efi_runtime_onfault; +} + +static void +efi_runtime_exit(struct efi_runtime_cookie *cookie) +{ + struct pcb *pcb = lwp_getpcb(curlwp); + + KASSERT(pcb->pcb_onfault == &efi_runtime_onfault || + pcb->pcb_onfault == NULL); + + pcb->pcb_onfault = NULL; +#ifdef EFI_RUNTIME_PMAP + pmap_deactivate_sync(efi_runtime_pmap, cookie->erc_pmap_cookie); +#endif + fpu_kern_leave(); + mutex_exit(&efi_runtime_lock); +} + +static efi_status +efi_runtime_gettime(struct efi_tm *tm, struct efi_tmcap *tmcap) +{ + efi_status status; + struct efi_runtime_cookie cookie; + + if (efi_rt.rt_gettime == NULL) + return EFI_UNSUPPORTED; + + printf("%s: enter %p\n", __func__, efi_rt.rt_gettime); + + efi_runtime_enter(&cookie); + if (setjmp(&efi_runtime_label) == 0) + status = efi_rt.rt_gettime(tm, tmcap); + else + status = EFI_DEVICE_ERROR; + efi_runtime_exit(&cookie); + + return status; +} + +static efi_status +efi_runtime_settime(struct efi_tm *tm) +{ + efi_status status; + struct efi_runtime_cookie cookie; + + if (efi_rt.rt_settime == NULL) + return EFI_UNSUPPORTED; + + efi_runtime_enter(&cookie); + if (setjmp(&efi_runtime_label) == 0) + status = efi_rt.rt_settime(tm); + else + status = EFI_DEVICE_ERROR; + efi_runtime_exit(&cookie); + + return status; +} + +static efi_status +efi_runtime_getvar(efi_char *name, struct uuid *vendor, uint32_t *attrib, + unsigned long *datasize, void *data) +{ + efi_status status; + struct efi_runtime_cookie cookie; + + if (efi_rt.rt_getvar == NULL) + return EFI_UNSUPPORTED; + + efi_runtime_enter(&cookie); + if (setjmp(&efi_runtime_label) == 0) { + status = efi_rt.rt_getvar(name, vendor, attrib, datasize, + data); + } else { + status = EFI_DEVICE_ERROR; + } + efi_runtime_exit(&cookie); + + return status; +} + +static efi_status +efi_runtime_nextvar(unsigned long *namesize, efi_char *name, + struct uuid *vendor) +{ + efi_status status; + struct efi_runtime_cookie cookie; + + if (efi_rt.rt_scanvar == NULL) + return EFI_UNSUPPORTED; + +{ + struct efi_tm tm = {0}; + struct efi_tmcap tmcap = {0}; + efi_status status1 = efi_runtime_gettime(&tm, &tmcap); + printf("%s: gettime: status=%lx\n", __func__, status1); + hexdump(printf, "tm", &tm, sizeof(tm)); + hexdump(printf, "tmcap", &tmcap, sizeof(tmcap)); +} + + printf("%s: enter %p\n", __func__, efi_rt.rt_scanvar); + + efi_runtime_enter(&cookie); + if (setjmp(&efi_runtime_label) == 0) { + printf("%s:%d namesize=%lu@%p name@%p vendor@%p\n", + __func__, __LINE__, + *namesize, namesize, name, vendor); + hexdump(printf, "name", name, *namesize); + status = efi_rt.rt_scanvar(namesize, name, vendor); + printf("%s:%d status=0x%lx\n", __func__, __LINE__, status); + } else { + printf("%s:%d -- faulted\n", __func__, __LINE__); + status = EFI_DEVICE_ERROR; + } + efi_runtime_exit(&cookie); + + return status; +} + +static efi_status +efi_runtime_setvar(efi_char *name, struct uuid *vendor, uint32_t attrib, + unsigned long datasize, void *data) +{ + efi_status status; + struct efi_runtime_cookie cookie; + + if (efi_rt.rt_setvar == NULL) + return EFI_UNSUPPORTED; + + efi_runtime_enter(&cookie); + if (setjmp(&efi_runtime_label) == 0) { + status = efi_rt.rt_setvar(name, vendor, attrib, datasize, + data); + } else { + status = EFI_DEVICE_ERROR; + } + efi_runtime_exit(&cookie); + + return status; +} + +#if NEFI > 0 + +static struct efi_ops efi_runtime_ops = { + .efi_gettime = efi_runtime_gettime, + .efi_settime = efi_runtime_settime, + .efi_getvar = efi_runtime_getvar, + .efi_setvar = efi_runtime_setvar, + .efi_nextvar = efi_runtime_nextvar, +}; + +#endif + +#endif /* EFI_RUNTIME */ diff --git a/sys/arch/x86/x86/pmap.c b/sys/arch/x86/x86/pmap.c index 43d000f83495..87c3d84717e9 100644 --- a/sys/arch/x86/x86/pmap.c +++ b/sys/arch/x86/x86/pmap.c @@ -138,6 +138,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.415 2022/05/13 09:39:40 riastradh Exp $") #include "opt_xen.h" #include "opt_svs.h" #include "opt_kaslr.h" +#include "opt_efi.h" #define __MUTEX_PRIVATE /* for assertions */ @@ -3741,6 +3742,90 @@ pmap_deactivate(struct lwp *l) ci->ci_tlbstate = TLBSTATE_LAZY; } +#ifdef EFI_RUNTIME + +/* + * pmap_activate_sync: synchronously activate specified pmap. + * + * => Must be called with kernel preemption disabled (high IPL is enough). + * => Must not sleep before pmap_deactivate_sync. + */ +void * +pmap_activate_sync(struct pmap *pmap) +{ + struct cpu_info *ci = curcpu(); + struct pmap *oldpmap = ci->ci_pmap; + unsigned cid = cpu_index(ci); + + KASSERT(kpreempt_disabled()); + KASSERT(pmap != pmap_kernel()); + + KASSERT(!kcpuset_isset(pmap->pm_cpus, cid)); + KASSERT(!kcpuset_isset(pmap->pm_kernel_cpus, cid)); + + if (oldpmap) { + KASSERT_PDIRPA(oldpmap); + kcpuset_atomic_clear(oldpmap->pm_cpus, cid); + kcpuset_atomic_clear(oldpmap->pm_kernel_cpus, cid); + } + + ci->ci_tlbstate = TLBSTATE_VALID; + kcpuset_atomic_set(pmap->pm_cpus, cid); + kcpuset_atomic_set(pmap->pm_kernel_cpus, cid); + ci->ci_pmap = pmap; + +#if defined(SVS) && defined(USER_LDT) + if (svs_enabled) { + svs_ldt_sync(pmap); + } else +#endif + lldt(pmap->pm_ldt_sel); + + cpu_load_pmap(pmap, oldpmap); + + return oldpmap; +} + +void +pmap_deactivate_sync(struct pmap *pmap, void *cookie) +{ + struct cpu_info *ci = curcpu(); + struct pmap *oldpmap = cookie; + unsigned cid = cpu_index(ci); + + KASSERT(kpreempt_disabled()); + KASSERT(pmap != pmap_kernel()); + KASSERT(ci->ci_pmap == pmap); + + KASSERT_PDIRPA(pmap); + + KASSERT(kcpuset_isset(pmap->pm_cpus, cid)); + KASSERT(kcpuset_isset(pmap->pm_kernel_cpus, cid)); + + pmap_tlb_shootnow(); + + kcpuset_atomic_clear(pmap->pm_cpus, cid); + kcpuset_atomic_clear(pmap->pm_kernel_cpus, cid); + + ci->ci_tlbstate = TLBSTATE_LAZY; + ci->ci_pmap = oldpmap; + if (oldpmap) { + kcpuset_atomic_set(oldpmap->pm_cpus, cid); + kcpuset_atomic_set(oldpmap->pm_kernel_cpus, cid); +#if defined(SVS) && defined(USER_LDT) + if (svs_enabled) { + svs_ldt_sync(oldpmap); + } else +#endif + lldt(oldpmap->pm_ldt_sel); + cpu_load_pmap(oldpmap, pmap); + } else { + lcr3(pmap_pdirpa(pmap_kernel(), 0)); + } +} + +#endif /* EFI_RUNTIME */ + /* * some misc. functions */ @@ -4893,7 +4978,7 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, npte |= pmap_pat_flags(flags); if (wired) npte |= PTE_WIRED; - if (va < VM_MAXUSER_ADDRESS) + if (va < VM_MAXUSER_ADDRESS && (flags & PMAP_MD_EFIRT) == 0) npte |= PTE_U; if (pmap == pmap_kernel()) diff --git a/sys/arch/x86/x86/svs.c b/sys/arch/x86/x86/svs.c index 7d379c9516fe..f8a125d7a930 100644 --- a/sys/arch/x86/x86/svs.c +++ b/sys/arch/x86/x86/svs.c @@ -733,6 +733,14 @@ svs_init(void) { uint64_t msr; + /* + * XXX doesn't work yet with efi runtime -- need to figure out + * what to do in svs_pdir_switch when switching to the efi + * runtime pmap, which has no utls page because it's not a user + * pmap even though it's also not a kernel pmap + */ + return; + if (cpu_vendor != CPUVENDOR_INTEL) { return; }