From 0d6f8549b01d05194f3d22034a11f45b77f2cf93 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Mon, 18 Mar 2024 18:11:16 +0000 Subject: [PATCH 1/3] acpi(4): New iattr `apeibus' for attaching an APEI driver. APEI is the ACPI Platform Error Interface, a standard (if very complicated) interface for reporting hardware errors to the OS. Firmware support for APEI is presented through the ACPI tables BERT (Boot Error Record Table), ERST (Error Record Serialization Table), EINJ (Error Injection Table), and HEST (Hardware Error Source Table), rather than through nodes in the ACPI device tree, so it can't just attach through the existing acpinodebus iattr and instead requires a special pseudo-bus like acpiwdrt(4). No driver yet -- this is just the hook to attach one in a module. The new member sc_apei of struct acpi_softc is placed at the end of the structure so that this change can be safely pulled up to release branches without risk to ABI compatibility in existing modules such as acpiverbose.kmod which may rely on the layout (but not size) of struct acpi_softc. PR kern/58046 --- sys/dev/acpi/acpi.c | 8 ++++++++ sys/dev/acpi/acpivar.h | 7 +++++++ sys/dev/acpi/files.acpi | 1 + 3 files changed, 16 insertions(+) diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c index 4ade9c47efa3..24e023373e0b 100644 --- a/sys/dev/acpi/acpi.c +++ b/sys/dev/acpi/acpi.c @@ -636,6 +636,9 @@ acpi_childdet(device_t self, device_t child) if (sc->sc_wdrt == child) sc->sc_wdrt = NULL; + if (sc->sc_apei == child) + sc->sc_apei = NULL; + SIMPLEQ_FOREACH(ad, &sc->sc_head, ad_list) { if (ad->ad_device == child) @@ -923,6 +926,11 @@ acpi_rescan(device_t self, const char *ifattr, const int *locators) CFARGS(.iattr = "acpiwdrtbus")); } + if (ifattr_match(ifattr, "apeibus") && sc->sc_apei == NULL) { + sc->sc_apei = config_found(sc->sc_dev, NULL, NULL, + CFARGS(.iattr = "apeibus")); + } + return 0; } diff --git a/sys/dev/acpi/acpivar.h b/sys/dev/acpi/acpivar.h index 9dafef7f7ac5..ddce8cdd9c18 100644 --- a/sys/dev/acpi/acpivar.h +++ b/sys/dev/acpi/acpivar.h @@ -177,6 +177,13 @@ struct acpi_softc { struct sysmon_pswitch sc_smpsw_sleep; SIMPLEQ_HEAD(, acpi_devnode) sc_head; + + /* + * Move this section to the other pseudo-bus child pointers + * after pullup -- putting it here avoids potential ABI + * compatibility issues with kernel modules. + */ + device_t sc_apei; /* apei(4) pseudo-bus */ }; /* diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi index ead9bab1e9bd..383b347f38d8 100644 --- a/sys/dev/acpi/files.acpi +++ b/sys/dev/acpi/files.acpi @@ -14,6 +14,7 @@ define acpiwdrtbus { } define acpisdtbus { } define acpigtdtbus { } define acpimadtbus { } +define apeibus { } device acpi: acpica, acpiapmbus, acpinodebus, acpiecdtbus, acpisdtbus, acpigtdtbus, acpimadtbus, acpihpetbus, acpiwdrtbus, sysmon_power, sysmon_taskq attach acpi at acpibus From 9d6cff207a5cc3f4be670f70e7ad6c1d75884cf1 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sun, 17 Mar 2024 10:34:13 +0000 Subject: [PATCH 2/3] apei(4): New driver for ACPI Platform Error Interfaces. For now it is wired up only in x86 ALL kernels, and built as a module for x86 and Arm. Once it gets a little more testing on machines with APEI, I would like to flip it on by default. PR kern/58046 --- distrib/sets/lists/debug/module.md.amd64 | 2 + distrib/sets/lists/debug/module.md.i386 | 2 + distrib/sets/lists/modules/md.amd64 | 2 + distrib/sets/lists/modules/md.i386 | 2 + share/man/man4/apei.4 | 132 +++ sys/arch/amd64/conf/ALL | 1 + sys/arch/i386/conf/ALL | 1 + sys/dev/acpi/apei.c | 929 ++++++++++++++++++++ sys/dev/acpi/apei_bert.c | 138 +++ sys/dev/acpi/apei_bertvar.h | 48 + sys/dev/acpi/apei_cper.h | 234 +++++ sys/dev/acpi/apei_einj.c | 851 ++++++++++++++++++ sys/dev/acpi/apei_einjvar.h | 48 + sys/dev/acpi/apei_erst.c | 577 ++++++++++++ sys/dev/acpi/apei_erstvar.h | 49 ++ sys/dev/acpi/apei_hed.h | 34 + sys/dev/acpi/apei_hest.c | 1017 ++++++++++++++++++++++ sys/dev/acpi/apei_hestvar.h | 85 ++ sys/dev/acpi/apei_interp.c | 382 ++++++++ sys/dev/acpi/apei_interp.h | 55 ++ sys/dev/acpi/apei_mapreg.c | 201 +++++ sys/dev/acpi/apei_mapreg.h | 46 + sys/dev/acpi/apei_reg.c | 103 +++ sys/dev/acpi/apei_reg.h | 40 + sys/dev/acpi/apeivar.h | 81 ++ sys/dev/acpi/files.acpi | 12 + sys/modules/Makefile | 1 + sys/modules/apei/Makefile | 20 + sys/modules/apei/apei.ioconf | 11 + 29 files changed, 5104 insertions(+) create mode 100644 share/man/man4/apei.4 create mode 100644 sys/dev/acpi/apei.c create mode 100644 sys/dev/acpi/apei_bert.c create mode 100644 sys/dev/acpi/apei_bertvar.h create mode 100644 sys/dev/acpi/apei_cper.h create mode 100644 sys/dev/acpi/apei_einj.c create mode 100644 sys/dev/acpi/apei_einjvar.h create mode 100644 sys/dev/acpi/apei_erst.c create mode 100644 sys/dev/acpi/apei_erstvar.h create mode 100644 sys/dev/acpi/apei_hed.h create mode 100644 sys/dev/acpi/apei_hest.c create mode 100644 sys/dev/acpi/apei_hestvar.h create mode 100644 sys/dev/acpi/apei_interp.c create mode 100644 sys/dev/acpi/apei_interp.h create mode 100644 sys/dev/acpi/apei_mapreg.c create mode 100644 sys/dev/acpi/apei_mapreg.h create mode 100644 sys/dev/acpi/apei_reg.c create mode 100644 sys/dev/acpi/apei_reg.h create mode 100644 sys/dev/acpi/apeivar.h create mode 100644 sys/modules/apei/Makefile create mode 100644 sys/modules/apei/apei.ioconf diff --git a/distrib/sets/lists/debug/module.md.amd64 b/distrib/sets/lists/debug/module.md.amd64 index fa740faa50b5..ac5758bbd180 100644 --- a/distrib/sets/lists/debug/module.md.amd64 +++ b/distrib/sets/lists/debug/module.md.amd64 @@ -35,6 +35,8 @@ ./usr/libdata/debug/@MODULEDIR@/amdtemp/amdtemp.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/amdzentemp modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/amdzentemp/amdzentemp.kmod.debug modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/apei modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/apei/apei.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/aps modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/aps/aps.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/asus modules-base-kernel kmod,debug diff --git a/distrib/sets/lists/debug/module.md.i386 b/distrib/sets/lists/debug/module.md.i386 index 431139a0991f..dcba7e40fce7 100644 --- a/distrib/sets/lists/debug/module.md.i386 +++ b/distrib/sets/lists/debug/module.md.i386 @@ -35,6 +35,8 @@ ./usr/libdata/debug/@MODULEDIR@/amdtemp/amdtemp.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/amdzentemp modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/amdzentemp/amdzentemp.kmod.debug modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/apei modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/apei/apei.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/aps modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/aps/aps.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/asus modules-base-kernel kmod,debug diff --git a/distrib/sets/lists/modules/md.amd64 b/distrib/sets/lists/modules/md.amd64 index 6a04608c36e7..8b7c99c65e56 100644 --- a/distrib/sets/lists/modules/md.amd64 +++ b/distrib/sets/lists/modules/md.amd64 @@ -36,6 +36,8 @@ ./@MODULEDIR@/amdtemp/amdtemp.kmod modules-base-kernel kmod ./@MODULEDIR@/amdzentemp modules-base-kernel kmod ./@MODULEDIR@/amdzentemp/amdzentemp.kmod modules-base-kernel kmod +./@MODULEDIR@/apei modules-base-kernel kmod +./@MODULEDIR@/apei/apei.kmod modules-base-kernel kmod ./@MODULEDIR@/aps modules-base-kernel kmod ./@MODULEDIR@/aps/aps.kmod modules-base-kernel kmod ./@MODULEDIR@/asus modules-base-kernel kmod diff --git a/distrib/sets/lists/modules/md.i386 b/distrib/sets/lists/modules/md.i386 index 6536f8ad8936..a65537e9e260 100644 --- a/distrib/sets/lists/modules/md.i386 +++ b/distrib/sets/lists/modules/md.i386 @@ -36,6 +36,8 @@ ./@MODULEDIR@/amdtemp/amdtemp.kmod modules-base-kernel kmod ./@MODULEDIR@/amdzentemp modules-base-kernel kmod ./@MODULEDIR@/amdzentemp/amdzentemp.kmod modules-base-kernel kmod +./@MODULEDIR@/apei modules-base-kernel kmod +./@MODULEDIR@/apei/apei.kmod modules-base-kernel kmod ./@MODULEDIR@/aps modules-base-kernel kmod ./@MODULEDIR@/aps/aps.kmod modules-base-kernel kmod ./@MODULEDIR@/asus modules-base-kernel kmod diff --git a/share/man/man4/apei.4 b/share/man/man4/apei.4 new file mode 100644 index 000000000000..7441361e04af --- /dev/null +++ b/share/man/man4/apei.4 @@ -0,0 +1,132 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2024 +.Dt APEI 4 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm apei +.Nd ACPI Platform Error Interfaces +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.Cd "apei* at apeibus?" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +.Nm +reports hardware errors discovered through +.Tn APEI , +the +.Tn ACPI +Platform Error Interfaces. +.Pp +.Nm +also supports injecting errors. +.\" .Nm +.\" also supports reading/writing/clearing error records in a persistent +.\" firmware store (XXX not yet: nothing uses the ERST). +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DIAGNOSTICS +When the hardware detects an error and reports it to +.Nm , +it will print information about the error to the console. +.Pp +Example of a correctable memory error, automatically corrected by the +system, with no further intervention needed: +.Bd -literal +apei0: error source 1 reported hardware error: severity=corrected nentries=1 status=0x12 +apei0: error source 1 entry 0: SectionType={0xa5bc1114,0x6f64,0x4ede,0xb8b8,{0x3e,0x83,0xed,0x7c,0x83,0xb1}} (memory error) +apei0: error source 1 entry 0: ErrorSeverity=2 (corrected) +apei0: error source 1 entry 0: Revision=0x201 +apei0: error source 1 entry 0: Flags=0x1 +apei0: error source 1 entry 0: FruText=CorrectedErr +apei0: error source 1 entry 0: MemoryErrorType=8 (PARITY_ERROR) +.Pp +Example of a fatal uncorrectable memory error: +.Bd -literal +apei0: error source 0 reported hardware error: severity=fatal nentries=1 status=0x11 +apei0: error source 0 entry 0: SectionType={0xa5bc1114,0x6f64,0x4ede,0xb8b8,{0x3e,0x83,0xed,0x7c,0x83,0xb1}} (memory error) +apei0: error source 0 entry 0: ErrorSeverity=1 (fatal) +apei0: error source 0 entry 0: Revision=0x201 +apei0: error source 0 entry 0: Flags=0x1 +apei0: error source 0 entry 0: FruText=UncorrectedErr +apei0: error source 0 entry 0: ErrorStatus=0x400 +apei0: error source 0 entry 0: Node=0x0 +apei0: error source 0 entry 0: Module=0x0 +apei0: error source 0 entry 0: Device=0x0 +panic: fatal hardware error +.Ed +.Pp +Details of the hardware error sources can be dumped with +.Xr acpidump 8 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr acpi 4 , +.Xr acpidump 8 +.Rs +.%B ACPI Specification 6.5 +.%O Chapter 18: ACPI Platform Error Interfaces (APEI) +.%U https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +driver first appeared in +.Nx 11.0 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh AUTHORS +The +.Nm +driver was written by +.An Taylor R Campbell Aq Mt riastradh@NetBSD.org . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh BUGS +No sysctl interface to read BERT after boot. +.Pp +No simple sysctl interface to inject errors with EINJ, or any way to +inject errors at physical addresses in pages allocated for testing. +Perhaps there should be a separate kernel module for that. +.Pp +Nothing reads, writes, or clears ERST. +.Nx +could use it to store dmesg or other diagnostic information on panic. +.Pp +Many hardware error source types in the HEST are missing, such as +.Tn PCIe +errors. +.Pp +.Nm +is not wired to any machine-dependent machine check exception +notifications. +.Pp +No formal log format or sysctl/device interface that programs can +reliably act on. +.Pp +.Nx +makes no attempt to recover from uncorrectable but recoverable errors, +such as discarding a clean cached page where an uncorrectable memory +error has occurred. diff --git a/sys/arch/amd64/conf/ALL b/sys/arch/amd64/conf/ALL index 950f3a011cb4..3304f50c4941 100644 --- a/sys/arch/amd64/conf/ALL +++ b/sys/arch/amd64/conf/ALL @@ -379,6 +379,7 @@ acpivga* at acpi? # ACPI Display Adapter acpiout* at acpivga? # ACPI Display Output Device acpiwdrt* at acpi? # ACPI Watchdog Resource Table acpiwmi* at acpi? # ACPI WMI Mapper +apei* at apeibus? # ACPI Platform Error Interfaces # Mainboard devices aibs* at acpi? # ASUSTeK AI Booster hardware monitor diff --git a/sys/arch/i386/conf/ALL b/sys/arch/i386/conf/ALL index 6b419fba1cce..60ea5675ecef 100644 --- a/sys/arch/i386/conf/ALL +++ b/sys/arch/i386/conf/ALL @@ -366,6 +366,7 @@ acpivga* at acpi? # ACPI Display Adapter acpiout* at acpivga? # ACPI Display Output Device acpiwdrt* at acpi? # ACPI Watchdog Resource Table acpiwmi* at acpi? # ACPI WMI Mapper +apei* at apeibus? # ACPI Platform Error Interfaces # Mainboard devices aibs* at acpi? # ASUSTeK AI Booster hardware monitor diff --git a/sys/dev/acpi/apei.c b/sys/dev/acpi/apei.c new file mode 100644 index 000000000000..fa6865db67d7 --- /dev/null +++ b/sys/dev/acpi/apei.c @@ -0,0 +1,929 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI: ACPI Platform Error Interface + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html + * + * XXX dtrace probes + * + * XXX call _OSC appropriately to announce to the platform that we, the + * OSPM, support APEI + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +static int apei_match(device_t, cfdata_t, void *); +static void apei_attach(device_t, device_t, void *); +static int apei_detach(device_t, int); + +static void apei_get_tables(struct apei_tab *); +static void apei_put_tables(struct apei_tab *); + +static void apei_identify(struct apei_softc *, const char *, + const ACPI_TABLE_HEADER *); + +CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc), + apei_match, apei_attach, apei_detach, NULL); + +static int +apei_match(device_t parent, cfdata_t match, void *aux) +{ + struct apei_tab tab; + int prio = 0; + + /* + * If we have any of the APEI tables, match. + */ + apei_get_tables(&tab); + if (tab.bert || tab.einj || tab.erst || tab.hest) + prio = 1; + apei_put_tables(&tab); + + return prio; +} + +static void +apei_attach(device_t parent, device_t self, void *aux) +{ + struct apei_softc *sc = device_private(self); + const struct sysctlnode *sysctl_hw_acpi; + int error; + + aprint_naive("\n"); + aprint_normal(": ACPI Platform Error Interface\n"); + + pmf_device_register(self, NULL, NULL); + + sc->sc_dev = self; + apei_get_tables(&sc->sc_tab); + + /* + * Get the sysctl hw.acpi node. This should already be created + * but I don't see an easy way to get at it. If this fails, + * something is seriously wrong, so let's stop here. + */ + error = sysctl_createv(&sc->sc_sysctllog, 0, + NULL, &sysctl_hw_acpi, 0, + CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0, + CTL_HW, CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, + "failed to create sysctl hw.acpi: %d\n", error); + return; + } + + /* + * Create sysctl hw.acpi.apei. + */ + error = sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_hw_acpi, &sc->sc_sysctlroot, 0, + CTLTYPE_NODE, "apei", + SYSCTL_DESCR("ACPI Platform Error Interface"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, + "failed to create sysctl hw.acpi.apei: %d\n", error); + return; + } + + /* + * Set up BERT, EINJ, ERST, and HEST. + */ + if (sc->sc_tab.bert) { + apei_identify(sc, "BERT", &sc->sc_tab.bert->Header); + apei_bert_attach(sc); + } + if (sc->sc_tab.einj) { + apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header); + apei_einj_attach(sc); + } + if (sc->sc_tab.erst) { + apei_identify(sc, "ERST", &sc->sc_tab.erst->Header); + apei_erst_attach(sc); + } + if (sc->sc_tab.hest) { + apei_identify(sc, "HEST", &sc->sc_tab.hest->Header); + apei_hest_attach(sc); + } +} + +static int +apei_detach(device_t self, int flags) +{ + struct apei_softc *sc = device_private(self); + int error; + + /* + * Detach children. We don't currently have any but this is + * harmless without children and mandatory if we ever sprouted + * them, so let's just leave it here for good measure. + * + * After this point, we are committed to detaching; failure is + * forbidden. + */ + error = config_detach_children(self, flags); + if (error) + return error; + + /* + * Tear down all the sysctl nodes first, before the software + * state backing them goes away. + */ + sysctl_teardown(&sc->sc_sysctllog); + sc->sc_sysctlroot = NULL; + + /* + * Detach the software state for the APEI tables. + */ + if (sc->sc_tab.hest) + apei_hest_detach(sc); + if (sc->sc_tab.erst) + apei_erst_detach(sc); + if (sc->sc_tab.einj) + apei_einj_detach(sc); + if (sc->sc_tab.bert) + apei_bert_detach(sc); + + /* + * Release the APEI tables and we're done. + */ + apei_put_tables(&sc->sc_tab); + pmf_device_deregister(self); + return 0; +} + +/* + * apei_get_tables(tab) + * + * Get references to whichever APEI-related tables -- BERT, EINJ, + * ERST, HEST -- are available in the system. + */ +static void +apei_get_tables(struct apei_tab *tab) +{ + ACPI_STATUS rv; + + /* + * Probe the BERT -- Boot Error Record Table. + */ + rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert); + if (ACPI_FAILURE(rv)) + tab->bert = NULL; + + /* + * Probe the EINJ -- Error Injection Table. + */ + rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj); + if (ACPI_FAILURE(rv)) + tab->einj = NULL; + + /* + * Probe the ERST -- Error Record Serialization Table. + */ + rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst); + if (ACPI_FAILURE(rv)) + tab->erst = NULL; + + /* + * Probe the HEST -- Hardware Error Source Table. + */ + rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest); + if (ACPI_FAILURE(rv)) + tab->hest = NULL; +} + +/* + * apei_put_tables(tab) + * + * Release the tables acquired by apei_get_tables. + */ +static void +apei_put_tables(struct apei_tab *tab) +{ + + if (tab->bert != NULL) { + AcpiPutTable(&tab->bert->Header); + tab->bert = NULL; + } + if (tab->einj != NULL) { + AcpiPutTable(&tab->einj->Header); + tab->einj = NULL; + } + if (tab->erst != NULL) { + AcpiPutTable(&tab->erst->Header); + tab->erst = NULL; + } + if (tab->hest != NULL) { + AcpiPutTable(&tab->hest->Header); + tab->hest = NULL; + } +} + +/* + * apei_identify(sc, name, header) + * + * Identify the APEI-related table header for dmesg. + */ +static void +apei_identify(struct apei_softc *sc, const char *name, + const ACPI_TABLE_HEADER *h) +{ + + aprint_normal_dev(sc->sc_dev, "%s:" + " OemId <%6.6s,%8.8s,%08x>" + " AslId <%4.4s,%08x>\n", + name, + h->OemId, h->OemTableId, h->OemRevision, + h->AslCompilerId, h->AslCompilerRevision); +} + +/* + * apei_cper_guid_dec(buf, uuid) + * + * Decode a Common Platform Error Record UUID/GUID from an ACPI + * table at buf into a sys/uuid.h struct uuid. + */ +static void +apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid) +{ + + uuid_dec_le(buf, uuid); +} + +/* + * apei_format_guid(uuid, s) + * + * Format a UUID as a string. This uses C initializer notation, + * not UUID notation, in order to match what the text in the UEFI + * specification. + */ +static void +apei_format_guid(const struct uuid *uuid, char guidstr[static 69]) +{ + + snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x," + "0x%02x%02x," + "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}", + uuid->time_low, uuid->time_mid, uuid->time_hi_and_version, + uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved, + uuid->node[0], uuid->node[1], uuid->node[2], + uuid->node[3], uuid->node[4], uuid->node[5]); +} + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + */ + +static const char *const cper_memory_error_type[] = { +#define F(LN, SN, V) [LN] = #SN, + CPER_MEMORY_ERROR_TYPES(F) +#undef F +}; + +/* + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block + * + * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this + * but are designated as being intended for Generic Error Data Entries + * rather than Generic Error Status Blocks. + */ +static const char *const apei_gesb_severity[] = { + [0] = "recoverable", + [1] = "fatal", + [2] = "corrected", + [3] = "none", +}; + +/* + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry + */ +static const char *const apei_gede_severity[] = { + [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable", + [ACPI_HEST_GEN_ERROR_FATAL] = "fatal", + [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected", + [ACPI_HEST_GEN_ERROR_NONE] = "none", +}; + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + */ +static const struct uuid CPER_MEMORY_ERROR_SECTION = + {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}}; + +static void +apei_cper_memory_error_report(struct apei_softc *sc, const void *buf, + size_t len, const char *ctx) +{ + const struct cper_memory_error *ME = buf; + char bitbuf[1024]; + + snprintb(bitbuf, sizeof(bitbuf), + CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits); + aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf); + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) { + /* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status + */ + /* XXX define this format somewhere */ + snprintb(bitbuf, sizeof(bitbuf), "\177\020" + "f\010\010" "ErrorType\0" + "=\001" "ERR_INTERNAL\0" + "=\004" "ERR_MEM\0" + "=\005" "ERR_TLB\0" + "=\006" "ERR_CACHE\0" + "=\007" "ERR_FUNCTION\0" + "=\010" "ERR_SELFTEST\0" + "=\011" "ERR_FLOW\0" + "=\020" "ERR_BUS\0" + "=\021" "ERR_MAP\0" + "=\022" "ERR_IMPROPER\0" + "=\023" "ERR_UNIMPL\0" + "=\024" "ERR_LOL\0" + "=\025" "ERR_RESPONSE\0" + "=\026" "ERR_PARITY\0" + "=\027" "ERR_PROTOCOL\0" + "=\030" "ERR_ERROR\0" + "=\031" "ERR_TIMEOUT\0" + "=\032" "ERR_POISONED\0" + "b\020" "AddressError\0" + "b\021" "ControlError\0" + "b\022" "DataError\0" + "b\023" "ResponderDetected\0" + "b\024" "RequesterDetected\0" + "b\025" "FirstError\0" + "b\026" "Overflow\0" + "\0", ME->ErrorStatus); + device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) { + device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n", + ctx, ME->PhysicalAddress); + } + if (ME->ValidationBits & + CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) { + device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64 + "\n", ctx, ME->PhysicalAddressMask); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) { + device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx, + ME->Node); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) { + device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx, + ME->Card); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) { + device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx, + ME->Module); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) { + device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx, + ME->Bank); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) { + device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx, + ME->Device); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) { + device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx, + ME->Row); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) { + device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx, + ME->Column); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) { + device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n", + ctx, ME->BitPosition); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) { + device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n", + ctx, ME->RequestorId); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) { + device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n", + ctx, ME->ResponderId); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) { + device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n", + ctx, ME->TargetId); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) { + const uint8_t t = ME->MemoryErrorType; + const char *n = t < __arraycount(cper_memory_error_type) + ? cper_memory_error_type[t] : NULL; + + if (n) { + device_printf(sc->sc_dev, "%s: MemoryErrorType=%d" + " (%s)\n", ctx, t, n); + } else { + device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n", + ctx, t); + } + } +} + +/* + * apei_cper_reports + * + * Table of known Common Platform Error Record types, symbolic + * names, minimum data lengths, and functions to report them. + * + * The section types and corresponding section layouts are listed + * at: + * + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html + */ +static const struct apei_cper_report { + const char *name; + const struct uuid *type; + size_t minlength; + void (*func)(struct apei_softc *, const void *, size_t, const char *); +} apei_cper_reports[] = { + { "memory", &CPER_MEMORY_ERROR_SECTION, + sizeof(struct cper_memory_error), + apei_cper_memory_error_report }, +}; + +/* + * apei_gede_report_header(sc, gede, ctx, &headerlen, &report) + * + * Report the header of the ith Generic Error Data Entry in the + * given context. + * + * Return the actual length of the header in headerlen, or 0 if + * not known because the revision isn't recognized. + * + * Return the report type in report, or NULL if not known because + * the section type isn't recognized. + */ +static void +apei_gede_report_header(struct apei_softc *sc, + const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, + size_t *headerlenp, const struct apei_cper_report **reportp) +{ + const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede; + struct uuid sectype; + char guidstr[69]; + char buf[128]; + unsigned i; + + /* + * Print the section type as a C initializer. It would be + * prettier to use standard hyphenated UUID notation, but that + * notation is slightly ambiguous here (two octets could be + * written either way, depending on Microsoft convention -- + * which influenced ACPI and UEFI -- or internet convention), + * and the UEFI spec writes the C initializer notation, so this + * makes it easier to search for. + * + * Also print out a symbolic name, if we know it. + */ + apei_cper_guid_dec(gede->SectionType, §ype); + apei_format_guid(§ype, guidstr); + for (i = 0; i < __arraycount(apei_cper_reports); i++) { + const struct apei_cper_report *const report = + &apei_cper_reports[i]; + + if (memcmp(§ype, report->type, sizeof(sectype)) != 0) + continue; + device_printf(sc->sc_dev, "%s: SectionType=%s (%s error)\n", + ctx, guidstr, report->name); + *reportp = report; + break; + } + if (i == __arraycount(apei_cper_reports)) { + device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx, + guidstr); + *reportp = NULL; + } + + /* + * Print the numeric severity and, if we have it, a symbolic + * name for it. + */ + device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n", ctx, + gede->ErrorSeverity, + (gede->ErrorSeverity < __arraycount(apei_gede_severity) + ? apei_gede_severity[gede->ErrorSeverity] + : "unknown")); + + /* + * The Revision may not often be useful, but this is only ever + * shown at the time of a hardware error report, not something + * you can glean at your convenience with acpidump. So print + * it anyway. + */ + device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx, + gede->Revision); + + /* + * Don't touch anything past the Revision until we've + * determined we understand it. Return the header length to + * the caller, or return zero -- and stop here -- if we don't + * know what the actual header length is. + */ + if (gede->Revision < 0x0300) { + *headerlenp = sizeof(*gede); + } else if (gede->Revision < 0x0400) { + *headerlenp = sizeof(*gede_v3); + } else { + *headerlenp = 0; + return; + } + + /* + * Print the validation bits at debug level. Only really + * helpful if there are bits we _don't_ know about. + */ + /* XXX define this format somewhere */ + snprintb(buf, sizeof(buf), "\177\020" + "b\000" "FRU_ID\0" + "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */ + "b\002" "TIMESTAMP\0" + "\0", gede->ValidationBits); + aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, buf); + + /* + * Print the CPER section flags. + */ + snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, gede->Flags); + device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf); + + /* + * The ErrorDataLength is unlikely to be useful for the log, so + * print it at debug level only. + */ + aprint_debug_dev(sc->sc_dev, "%s: ErrorDataLength=0x%"PRIu32"\n", + ctx, gede->ErrorDataLength); + + /* + * Print the FRU Id and text, if available. + */ + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) { + struct uuid fruid; + + apei_cper_guid_dec(gede->FruId, &fruid); + apei_format_guid(&fruid, guidstr); + device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr); + } + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) { + device_printf(sc->sc_dev, "%s: FruText=%.20s\n", + ctx, gede->FruText); + } + + /* + * Print the timestamp, if available by the revision number and + * the validation bits. + */ + if (gede->Revision >= 0x0300 && gede->Revision < 0x0400 && + gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) { + const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp; + const uint8_t s = t[0]; + const uint8_t m = t[1]; + const uint8_t h = t[2]; + const uint8_t f = t[3]; + const uint8_t D = t[4]; + const uint8_t M = t[5]; + const uint8_t Y = t[6]; + const uint8_t C = t[7]; + + device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64 + " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n", + ctx, gede_v3->TimeStamp, + C,Y, M, D, h,m,s, + f & __BIT(0) ? " (event time)" : " (collect time)"); + } +} + +/* + * apei_gesb_report(sc, gesb, size, ctx) + * + * Check a Generic Error Status Block, of at most the specified + * size in bytes, and report any errors in it. Return the 32-bit + * Block Status in case the caller needs it to acknowledge the + * report to firmware. + */ +uint32_t +apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb, + size_t size, const char *ctx, bool *fatalp) +{ + uint32_t status, unknownstatus, severity, nentries, i; + uint32_t datalen, rawdatalen; + const ACPI_HEST_GENERIC_DATA *gede0, *gede; + const unsigned char *rawdata; + char statusbuf[128]; + bool fatal = false; + + /* + * Verify the buffer is large enough for a Generic Error Status + * Block before we try to touch anything in it. + */ + if (size < sizeof(*gesb)) { + device_printf(sc->sc_dev, "%s: truncated GESB, %zu < %zu\n", + ctx, size, sizeof(*gesb)); + return 0; + } + size -= sizeof(*gesb); + + /* + * Load the status. Access ordering rules are unclear in the + * ACPI specification; I'm guessing that load-acquire of the + * block status is a good idea before any other access to the + * GESB. + */ + status = atomic_load_acquire(&gesb->BlockStatus); + + /* + * If there are no status bits set, the rest of the GESB is + * garbage, so stop here. + */ + if (status == 0) { + /* XXX dtrace */ + /* XXX DPRINTF */ + goto out; + } + + /* XXX define this format somewhere */ + snprintb(statusbuf, sizeof(statusbuf), "\177\020" + "b\000" "UE\0" + "b\001" "CE\0" + "b\002" "MULTI_UE\0" + "b\003" "MULTI_CE\0" + "f\004\010" "GEDE_COUNT\0" + "\0", status); + + /* + * Print a message to the console and dmesg about the severity + * of the error. + */ + severity = gesb->ErrorSeverity; + nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT); + if (severity < __arraycount(apei_gesb_severity)) { + device_printf(sc->sc_dev, "%s reported hardware error:" + " severity=%s nentries=%u status=%s\n", + ctx, apei_gesb_severity[severity], nentries, statusbuf); + } else { + device_printf(sc->sc_dev, "%s reported error:" + " severity=%"PRIu32" nentries=%u status=%s\n", + ctx, severity, nentries, statusbuf); + } + + /* + * Make a determination about whether the error is fatal. + * + * XXX Currently we don't have any mechanism to recover from + * uncorrectable but recoverable errors, so we treat those -- + * and anything else we don't recognize -- as fatal. + */ + switch (severity) { + case ACPI_HEST_GEN_ERROR_CORRECTED: + case ACPI_HEST_GEN_ERROR_NONE: + fatal = false; + break; + case ACPI_HEST_GEN_ERROR_FATAL: + case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */ + default: + fatal = true; + break; + } + + /* + * Clear the bits we know about to warn if there's anything + * left we don't understand. + */ + unknownstatus = status; + unknownstatus &= ~ACPI_HEST_UNCORRECTABLE; + unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE; + unknownstatus &= ~ACPI_HEST_CORRECTABLE; + unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE; + unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT; + if (unknownstatus != 0) { + /* XXX dtrace */ + /* XXX rate-limit? */ + device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:" + " 0x%"PRIx32"\n", ctx, unknownstatus); + } + + /* + * Advance past the Generic Error Status Block (GESB) header to + * the Generic Error Data Entries (GEDEs). + */ + gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1); + + /* + * Verify that the data length (GEDEs) fits within the size. + * If not, truncate the GEDEs. + */ + datalen = gesb->DataLength; + if (size < datalen) { + device_printf(sc->sc_dev, "%s:" + " GESB DataLength exceeds bounds: %zu < %"PRIu32"\n", + ctx, size, datalen); + datalen = size; + } + size -= datalen; + + /* + * Report each of the Generic Error Data Entries. + */ + for (i = 0; i < nentries; i++) { + size_t headerlen; + const struct apei_cper_report *report; + char subctx[128]; + + /* + * Format a subcontext to show this numbered entry of + * the GESB. + */ + snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i); + + /* + * If the remaining GESB data length isn't enough for a + * GEDE header, stop here. + */ + if (datalen < sizeof(*gede)) { + device_printf(sc->sc_dev, "%s:" + " truncated GEDE: %"PRIu32" < %zu bytes\n", + subctx, datalen, sizeof(*gede)); + break; + } + + /* + * Print the GEDE header and get the full length (may + * vary from revision to revision of the GEDE) and the + * CPER report function if possible. + */ + apei_gede_report_header(sc, gede, subctx, + &headerlen, &report); + + /* + * If we don't know the header length because of an + * unfamiliar revision, stop here. + */ + if (headerlen == 0) { + device_printf(sc->sc_dev, "%s:" + " unknown revision: 0x%"PRIx16"\n", + subctx, gede->Revision); + break; + } + + /* + * Stop here if what we mapped is too small for the + * error data length. + */ + datalen -= headerlen; + if (datalen < gede->ErrorDataLength) { + device_printf(sc->sc_dev, "%s: truncated GEDE payload:" + " %"PRIu32" < %"PRIu32" bytes\n", + subctx, datalen, gede->ErrorDataLength); + break; + } + + /* + * Report the Common Platform Error Record appendix to + * this Generic Error Data Entry. + */ + if (report == NULL) { + device_printf(sc->sc_dev, "%s: [unknown type]\n", ctx); + } else { + (*report->func)(sc, (const char *)gede + headerlen, + gede->ErrorDataLength, subctx); + } + + /* + * Advance past the GEDE header and CPER data to the + * next GEDE. + */ + gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede + + + headerlen + gede->ErrorDataLength); + } + + /* + * Advance past the Generic Error Data Entries (GEDEs) to the + * raw error data. + * + * XXX Provide Max Raw Data Length as a parameter, as found in + * various HEST entry types. + */ + rawdata = (const unsigned char *)gede0 + datalen; + + /* + * Verify that the raw data length fits within the size. If + * not, truncate the raw data. + */ + rawdatalen = gesb->RawDataLength; + if (size < rawdatalen) { + device_printf(sc->sc_dev, "%s:" + " GESB RawDataLength exceeds bounds: %zu < %"PRIu32"\n", + ctx, size, rawdatalen); + rawdatalen = size; + } + size -= rawdatalen; + + /* + * Hexdump the raw data, if any. + */ + if (rawdatalen > 0) { + char devctx[128]; + + snprintf(devctx, sizeof(devctx), "%s: %s: raw data", + device_xname(sc->sc_dev), ctx); + hexdump(printf, devctx, rawdata, rawdatalen); + } + + /* + * If there's anything left after the raw data, warn. + */ + if (size > 0) { + device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n", + ctx, size); + } + + /* + * Return the status so the caller can ack it, and tell the + * caller whether this error is fatal. + */ +out: *fatalp = fatal; + return status; +} + +MODULE(MODULE_CLASS_DRIVER, apei, NULL); + +#ifdef _MODULE +#include "ioconf.c" +#endif + +static int +apei_modcmd(modcmd_t cmd, void *opaque) +{ + int error = 0; + + switch (cmd) { + case MODULE_CMD_INIT: +#ifdef _MODULE + error = config_init_component(cfdriver_ioconf_apei, + cfattach_ioconf_apei, cfdata_ioconf_apei); +#endif + return error; + case MODULE_CMD_FINI: +#ifdef _MODULE + error = config_fini_component(cfdriver_ioconf_apei, + cfattach_ioconf_apei, cfdata_ioconf_apei); +#endif + return error; + default: + return ENOTTY; + } +} diff --git a/sys/dev/acpi/apei_bert.c b/sys/dev/acpi/apei_bert.c new file mode 100644 index 000000000000..b19c58a4d906 --- /dev/null +++ b/sys/dev/acpi/apei_bert.c @@ -0,0 +1,138 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI BERT -- Boot Error Record Table + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#boot-error-source + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include + +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +/* + * apei_bert_attach(sc) + * + * Scan the Boot Error Record Table for hardware errors that + * happened early at boot or on the previous boot. + */ +void +apei_bert_attach(struct apei_softc *sc) +{ + const ACPI_TABLE_BERT *bert = sc->sc_tab.bert; + struct apei_bert_softc *bsc = &sc->sc_bert; + bool fatal = false; + + /* + * Verify the table is large enough. + */ + if (bert->Header.Length < sizeof(*bert)) { + aprint_error_dev(sc->sc_dev, "BERT: truncated table:" + " %"PRIu32" < %zu bytes\n", + bert->Header.Length, sizeof(*bert)); + return; + } + + /* + * In verbose boots, print the BERT physical address and + * length. The operator might find this handy for dd'ing it + * from /dev/mem, if allowed. + */ + aprint_verbose_dev(sc->sc_dev, "BERT: 0x%x bytes at 0x%"PRIx64"\n", + bert->RegionLength, bert->Address); + + /* + * Verify the length is enough for a Generic Error Status Block + * header, at least. + */ + if (bert->RegionLength < sizeof(*bsc->bsc_gesb)) { + aprint_error_dev(sc->sc_dev, + "BERT: truncated boot error region, %"PRIu32" < %zu bytes", + bert->RegionLength, sizeof(*bsc->bsc_gesb)); + return; + } + + /* + * Map the GESB and process it, but don't acknowledge it -- + * this is a one-time polled source; it won't (or at least, + * shouldn't) change after boot. + */ + bsc->bsc_gesb = AcpiOsMapMemory(bert->Address, bert->RegionLength); + const uint32_t status = apei_gesb_report(sc, bsc->bsc_gesb, + bert->RegionLength, "boot error record", &fatal); + if (status == 0) { + /* + * If there were no boot errors, leave a note in dmesg + * to this effect without cluttering up the console + * unless you asked for it by `boot -v'. + */ + aprint_verbose_dev(sc->sc_dev, + "BERT: no boot errors recorded\n"); + } + + /* + * If the error was fatal, print a warning to the console. + * Probably not actually fatal now since it is usually related + * to early or previous boot. + */ + if (fatal) { + aprint_error_dev(sc->sc_dev, "BERT:" + " fatal pre-boot error recorded\n"); + } + + /* XXX expose content via sysctl? */ +} + +/* + * apei_bert_detach(sc) + * + * Free any software resources associated with the Boot Error + * Record Table. + */ +void +apei_bert_detach(struct apei_softc *sc) +{ + const ACPI_TABLE_BERT *bert = sc->sc_tab.bert; + struct apei_bert_softc *bsc = &sc->sc_bert; + + if (bsc->bsc_gesb) { + AcpiOsUnmapMemory(bsc->bsc_gesb, bert->RegionLength); + bsc->bsc_gesb = NULL; + } +} diff --git a/sys/dev/acpi/apei_bertvar.h b/sys/dev/acpi/apei_bertvar.h new file mode 100644 index 000000000000..9b91ef1d2a6a --- /dev/null +++ b/sys/dev/acpi/apei_bertvar.h @@ -0,0 +1,48 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_BERTVAR_H_ +#define _SYS_DEV_ACPI_APEI_BERTVAR_H_ + +#include + +struct apei_softc; + +/* + * struct apei_bert_softc + * + * Software state for access to the BERT, Boot Error Record Table. + */ +struct apei_bert_softc { + ACPI_HEST_GENERIC_STATUS *bsc_gesb; +}; + +void apei_bert_attach(struct apei_softc *); +void apei_bert_detach(struct apei_softc *); + +#endif /* _SYS_DEV_ACPI_APEI_BERTVAR_H_ */ diff --git a/sys/dev/acpi/apei_cper.h b/sys/dev/acpi/apei_cper.h new file mode 100644 index 000000000000..9cbfd8b1bd27 --- /dev/null +++ b/sys/dev/acpi/apei_cper.h @@ -0,0 +1,234 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * UEFI Common Platform Error Record + * + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html + */ + +#ifndef _SYS_DEV_ACPI_APEI_CPER_H_ +#define _SYS_DEV_ACPI_APEI_CPER_H_ + +#include + +#include + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#record-header + */ +struct cper_header { + char SignatureStart[4]; /* `CPER" */ + uint16_t Revision; + uint32_t SignatureEnd; /* 0xfffffff */ + uint16_t SectionCount; + uint32_t ErrorSeverity; + uint32_t ValidationBits; + uint32_t RecordLength; + uint64_t Timestamp; + uint8_t PlatformId[16]; + uint8_t PartitionId[16]; + uint8_t CreatorId[16]; + uint8_t NotificationType[16]; + uint64_t RecordId; + uint32_t Flags; + uint64_t PersistenceInfo; + uint8_t Reserved[12]; +} __packed; +__CTASSERT(sizeof(struct cper_header) == 128); + +enum { /* struct cper_header::error_severity */ + CPER_ERROR_SEVERITY_RECOVERABLE = 0, + CPER_ERROR_SEVERITY_FATAL = 1, + CPER_ERROR_SEVERITY_CORRECTED = 2, + CPER_ERROR_SEVERITY_INFORMATIONAL = 3, +}; + +enum { /* struct cper_header::validation_bits */ + CPER_VALID_PLATFORM_ID = __BIT(0), + CPER_VALID_TIMESTAMP = __BIT(1), + CPER_VALID_PARTITION_ID = __BIT(2), +}; + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-record-header-flags + */ +enum { /* struct cper_header::flags */ + CPER_HW_ERROR_FLAG_RECOVERED = __BIT(0), + CPER_HW_ERROR_FLAG_PREVERR = __BIT(1), + CPER_HW_ERROR_FLAG_SIMULATED = __BIT(2), +}; + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#section-descriptor-format + */ +enum { + CPER_SECTION_FLAG_PRIMARY = __BIT(0), + CPER_SECTION_FLAG_CONTAINMENT_WARNING = __BIT(1), + CPER_SECTION_FLAG_RESET = __BIT(2), + CPER_SECTION_FLAG_ERROR_THRESHOLD_EXCEEDED = __BIT(3), + CPER_SECTION_FLAG_RESOURCE_NOT_ACCESSIBLE = __BIT(4), + CPER_SECTION_FLAG_LATENT_ERROR = __BIT(5), + CPER_SECTION_FLAG_PROPAGATED = __BIT(6), + CPER_SECTION_FLAG_OVERFLOW = __BIT(7), +}; + +#define CPER_SECTION_FLAGS_FMT "\177\020" \ + "b\000" "PRIMARY\0" \ + "b\001" "CONTAINMENT_WARNING\0" \ + "b\002" "RESET\0" \ + "b\003" "ERROR_THRESHOLD_EXCEEDED\0" \ + "b\004" "RESOURCE_NOT_ACCESSIBLE\0" \ + "b\005" "LATENT_ERROR\0" \ + "b\006" "PROPAGATED\0" \ + "b\007" "OVERFLOW\0" \ + "\0" + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + * + * Type: {0xa5bc1114,0x6f64,0x4ede,{0xb8,0x63,0x3e,0x83,0xed,0x7c,0x83,0xb1}} + */ + +struct cper_memory_error { + uint64_t ValidationBits; + uint64_t ErrorStatus; + uint64_t PhysicalAddress; + uint64_t PhysicalAddressMask; + uint16_t Node; + uint16_t Card; + uint16_t Module; + uint16_t Bank; + uint16_t Device; + uint16_t Row; + uint16_t Column; + uint16_t BitPosition; + uint64_t RequestorId; + uint64_t ResponderId; + uint64_t TargetId; + uint8_t MemoryErrorType; +} __packed; +__CTASSERT(sizeof(struct cper_memory_error) == 73); + +struct cper_memory_error_ext { + struct cper_memory_error Base; + uint8_t Extended; + uint16_t RankNumber; + uint16_t CardHandle; + uint16_t ModuleHandle; +} __packed; +__CTASSERT(sizeof(struct cper_memory_error_ext) == 80); + +enum { /* struct cper_memory_error::validation_bits */ + CPER_MEMORY_ERROR_VALID_ERROR_STATUS = __BIT(0), + CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS = __BIT(1), + CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK = __BIT(2), + CPER_MEMORY_ERROR_VALID_NODE = __BIT(3), + CPER_MEMORY_ERROR_VALID_CARD = __BIT(4), + CPER_MEMORY_ERROR_VALID_MODULE = __BIT(5), + CPER_MEMORY_ERROR_VALID_BANK = __BIT(6), + CPER_MEMORY_ERROR_VALID_DEVICE = __BIT(7), + CPER_MEMORY_ERROR_VALID_ROW = __BIT(8), + CPER_MEMORY_ERROR_VALID_COLUMN = __BIT(9), + CPER_MEMORY_ERROR_VALID_BIT_POSITION = __BIT(10), + CPER_MEMORY_ERROR_VALID_REQUESTOR_ID = __BIT(11), + CPER_MEMORY_ERROR_VALID_RESPONDER_ID = __BIT(12), + CPER_MEMORY_ERROR_VALID_TARGET_ID = __BIT(13), + CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE = __BIT(14), + CPER_MEMORY_ERROR_VALID_RANK_NUMBER = __BIT(15), + CPER_MEMORY_ERROR_VALID_CARD_HANDLE = __BIT(16), + CPER_MEMORY_ERROR_VALID_MODULE_HANDLE = __BIT(17), + CPER_MEMORY_ERROR_VALID_EXTENDED_ROW = __BIT(18), + CPER_MEMORY_ERROR_VALID_BANK_GROUP = __BIT(19), + CPER_MEMORY_ERROR_VALID_BANK_ADDRESS = __BIT(20), + CPER_MEMORY_ERROR_VALID_CHIP_ID = __BIT(21), +}; + +#define CPER_MEMORY_ERROR_VALIDATION_BITS_FMT "\177\020" \ + "b\000" "ERROR_STATUS\0" \ + "b\001" "PHYSICAL_ADDRESS\0" \ + "b\002" "PHYSICAL_ADDRESS_MASK\0" \ + "b\003" "NODE\0" \ + "b\004" "CARD\0" \ + "b\005" "MODULE\0" \ + "b\006" "BANK\0" \ + "b\007" "DEVICE\0" \ + "b\010" "ROW\0" \ + "b\011" "COLUJMN\0" \ + "b\012" "BIT_POSITION\0" \ + "b\013" "REQUESTOR_ID\0" \ + "b\014" "RESPONDER_ID\0" \ + "b\015" "TARGET_ID\0" \ + "b\016" "MEMORY_ERROR_TYPE\0" \ + "b\017" "RANK_NUMBER\0" \ + "b\020" "CARD_HANDLE\0" \ + "b\021" "MODULE_HANDLE\0" \ + "b\022" "EXTENDED_ROW\0" \ + "b\023" "BANK_GROUP\0" \ + "b\024" "BANK_ADDRESS\0" \ + "b\025" "CHIP_ID\0" \ + "\0" + +enum { /* struct cper_memory_error::bank */ + CPER_MEMORY_ERROR_BANK_ADDRESS = __BITS(7,0), + CPER_MEMORY_ERROR_BANK_GROUP = __BITS(15,8), +}; + +#define CPER_MEMORY_ERROR_TYPES(F) \ + F(CPER_MEMORY_ERROR_UNKNOWN, UNKNOWN, 0) \ + F(CPER_MEMORY_ERROR_NO_ERROR, NO_ERROR, 1) \ + F(CPER_MEMORY_ERROR_SINGLEBIT_ECC, SINGLEBIT_ECC, 2) \ + F(CPER_MEMORY_ERROR_MULTIBIT_ECC, MULTIBIT_ECC, 3) \ + F(CPER_MEMORY_ERROR_SINGLESYM_CHIPKILL_ECC, SINGLESYM_CHIPKILL_ECC, 4)\ + F(CPER_MEMORY_ERROR_MULTISYM_CHIPKILL_ECC, MULTISYM_CHIPKILL_ECC, 5) \ + F(CPER_MEMORY_ERROR_MASTER_ABORT, MASTER_ABORT, 6) \ + F(CPER_MEMORY_ERROR_TARGET_ABORT, TARGET_ABORT, 7) \ + F(CPER_MEMORY_ERROR_PARITY_ERROR, PARITY_ERROR, 8) \ + F(CPER_MEMORY_ERROR_WATCHDOG_TIMEOUT, WATCHDOG_TIMEOUT, 9) \ + F(CPER_MEMORY_ERROR_INVALID_ADDRESS, INVALID_ADDRESS, 10) \ + F(CPER_MEMORY_ERROR_MIRROR_BROKEN, MIRROR_BROKEN, 11) \ + F(CPER_MEMORY_ERROR_MEMORY_SPARING, MEMORY_SPARING, 12) \ + F(CPER_MEMORY_ERROR_SCRUB_CORRECTED_ERROR, SCRUB_CORRECTED_ERROR, 13) \ + F(CPER_MEMORY_ERROR_SCRUB_UNCORRECTED_ERROR, SCRUB_UNCORRECTED_ERROR, \ + 14) \ + F(CPER_MEMORY_ERROR_PHYSMEM_MAPOUT_EVENT, PHYSMEM_MAPOUT_EVENT, 15) \ + /* end of CPER_MEMORY_ERROR_TYPES */ + +enum cper_memory_error_type { /* struct cper_memory_error::memory_error_type */ +#define CPER_MEMORY_ERROR_TYPE_DEF(LN, SN, V) LN = V, + CPER_MEMORY_ERROR_TYPES(CPER_MEMORY_ERROR_TYPE_DEF) +#undef CPER_MEMORY_ERROR_TYPE_DEF +}; + +enum { /* struct cper_memory_error_ext::extended */ + CPER_MEMORY_ERROR_EXTENDED_ROWBIT16 = __BIT(0), + CPER_MEMORY_ERROR_EXTENDED_ROWBIT17 = __BIT(1), + CPER_MEMORY_ERROR_EXTENDED_CHIPID = __BITS(7,5), +}; + +#endif /* _SYS_DEV_ACPI_APEI_CPER_H_ */ diff --git a/sys/dev/acpi/apei_einj.c b/sys/dev/acpi/apei_einj.c new file mode 100644 index 000000000000..0e75dd0583e3 --- /dev/null +++ b/sys/dev/acpi/apei_einj.c @@ -0,0 +1,851 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI EINJ -- Error Injection Table + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-injection + * + * XXX Consider a /dev node with ioctls for error injection rather than + * the somewhat kooky sysctl interface. By representing an error + * injection request in a structure, we can serialize access to the + * platform's EINJ operational context. However, this also requires + * some nontrivial userland support; maybe relying on the user to tread + * carefully with error injection is fine -- after all, many types of + * error injection will cause a system halt/panic. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ioconf.h" + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +static void apei_einj_instfunc(ACPI_WHEA_HEADER *, void *, uint32_t *, + uint32_t); +static uint64_t apei_einj_act(struct apei_softc *, enum AcpiEinjActions, + uint64_t); +static uint64_t apei_einj_trigger(struct apei_softc *, uint64_t); +static int apei_einj_action_sysctl(SYSCTLFN_ARGS); +static int apei_einj_trigger_sysctl(SYSCTLFN_ARGS); +static int apei_einj_types_sysctl(SYSCTLFN_ARGS); + +/* + * apei_einj_action + * + * Symbolic names of the APEI EINJ (Error Injection) logical actions + * are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-injection-actions + */ +static const char *const apei_einj_action[] = { + [ACPI_EINJ_BEGIN_OPERATION] = "begin_injection_operation", + [ACPI_EINJ_GET_TRIGGER_TABLE] = "get_trigger_error_action_table", + [ACPI_EINJ_SET_ERROR_TYPE] = "set_error_type", + [ACPI_EINJ_GET_ERROR_TYPE] = "get_error_type", + [ACPI_EINJ_END_OPERATION] = "end_operation", + [ACPI_EINJ_EXECUTE_OPERATION] = "execute_operation", + [ACPI_EINJ_CHECK_BUSY_STATUS] = "check_busy_status", + [ACPI_EINJ_GET_COMMAND_STATUS] = "get_command_status", + [ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS] = "set_error_type_with_address", + [ACPI_EINJ_GET_EXECUTE_TIMINGS] = "get_execute_operation_timings", +}; + +/* + * apei_einj_instruction + * + * Symbolic names of the APEI EINJ (Error Injection) instructions to + * implement logical actions are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#injection-instructions-table + */ + +static const char *const apei_einj_instruction[] = { + [ACPI_EINJ_READ_REGISTER] = "read_register", + [ACPI_EINJ_READ_REGISTER_VALUE] = "read_register", + [ACPI_EINJ_WRITE_REGISTER] = "write_register", + [ACPI_EINJ_WRITE_REGISTER_VALUE] = "write_register_value", + [ACPI_EINJ_NOOP] = "noop", +}; + +/* + * apei_einj_attach(sc) + * + * Scan the Error Injection table to ascertain what error + * injection actions the firmware supports and how to perform + * them. Create sysctl nodes for triggering error injection. + */ +void +apei_einj_attach(struct apei_softc *sc) +{ + ACPI_TABLE_EINJ *einj = sc->sc_tab.einj; + struct apei_einj_softc *jsc = &sc->sc_einj; + ACPI_EINJ_ENTRY *entry; + const struct sysctlnode *sysctl_einj; + const struct sysctlnode *sysctl_einj_action; + uint32_t i, nentries, maxnentries; + unsigned action; + int error; + + /* + * Verify the table length, table header length, and + * instruction entry count are all sensible. If the header is + * truncated, stop here; if the entries are truncated, stop at + * the largest integral number of full entries that fits. + */ + if (einj->Header.Length < sizeof(*einj)) { + aprint_error_dev(sc->sc_dev, "EINJ: truncated table:" + " %"PRIu32" < %zu minimum bytes\n", + einj->Header.Length, sizeof(*einj)); + return; + } + if (einj->HeaderLength < + sizeof(*einj) - offsetof(ACPI_TABLE_EINJ, HeaderLength)) { + aprint_error_dev(sc->sc_dev, "EINJ: truncated header:" + " %"PRIu32" < %zu bytes\n", + einj->HeaderLength, + sizeof(*einj) - offsetof(ACPI_TABLE_EINJ, HeaderLength)); + return; + } + nentries = einj->Entries; + maxnentries = (einj->Header.Length - sizeof(*einj))/sizeof(*entry); + if (nentries > maxnentries) { + aprint_error_dev(sc->sc_dev, "EINJ: excessive entries:" + " %"PRIu32", truncating to %"PRIu32"\n", + nentries, maxnentries); + nentries = maxnentries; + } + if (nentries*sizeof(*entry) < einj->Header.Length - sizeof(*einj)) { + aprint_error_dev(sc->sc_dev, "EINJ:" + " %zu bytes of trailing garbage after last entry\n", + einj->Header.Length - nentries*sizeof(*entry)); + } + + /* + * Create sysctl hw.acpi.apei.einj for all EINJ-related knobs. + */ + error = sysctl_createv(&sc->sc_sysctllog, 0, + &sc->sc_sysctlroot, &sysctl_einj, 0, + CTLTYPE_NODE, "einj", + SYSCTL_DESCR("Error injection"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " hw.acpi.apei.einj: %d\n", error); + sysctl_einj = NULL; + } + + /* + * Create an interpreter for EINJ actions. + */ + jsc->jsc_interp = apei_interp_create("EINJ", + apei_einj_action, __arraycount(apei_einj_action), + apei_einj_instruction, __arraycount(apei_einj_instruction), + /*instvalid*/NULL, apei_einj_instfunc); + + /* + * Compile the interpreter from the EINJ action instruction + * table. + */ + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < nentries; i++, entry++) + apei_interp_pass1_load(jsc->jsc_interp, i, &entry->WheaHeader); + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < nentries; i++, entry++) { + apei_interp_pass2_verify(jsc->jsc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass3_alloc(jsc->jsc_interp); + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < nentries; i++, entry++) { + apei_interp_pass4_assemble(jsc->jsc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass5_verify(jsc->jsc_interp); + + /* + * Create sysctl hw.acpi.apei.einj.action for individual actions. + */ + error = sysctl_einj == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, &sysctl_einj_action, 0, + CTLTYPE_NODE, "action", + SYSCTL_DESCR("EINJ actions"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " hw.acpi.apei.einj.action: %d\n", error); + sysctl_einj_action = NULL; + } + + /* + * Create sysctl nodes for each action we know about. + */ + for (action = 0; action < __arraycount(apei_einj_action); action++) { + if (apei_einj_action[action] == NULL) + continue; + + /* + * Check to see if there are any instructions for this + * action. + * + * XXX Maybe add this to the apei_interp.h abstraction. + */ + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < nentries; i++, entry++) { + ACPI_WHEA_HEADER *const header = &entry->WheaHeader; + + if (action == header->Action) + break; + } + if (i == nentries) { + /* + * No instructions for this action, so assume + * it's not supported. + */ + continue; + } + + /* + * Create a sysctl knob to perform the action. + */ + error = sysctl_einj_action == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj_action, NULL, CTLFLAG_READWRITE, + CTLTYPE_QUAD, apei_einj_action[action], + NULL, /* description */ + &apei_einj_action_sysctl, 0, NULL, 0, + action, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.action.%s: %d\n", + apei_einj_action[action], error); + continue; + } + } + + /* + * Create a sysctl knob to trigger error. + */ + error = sysctl_einj == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, NULL, CTLFLAG_READWRITE, + CTLTYPE_QUAD, "trigger", + NULL, /* description */ + &apei_einj_trigger_sysctl, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.trigger: %d\n", + error); + } + + /* + * Query the available types of error to inject and print it to + * dmesg. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-types + */ + uint64_t types = apei_einj_act(sc, ACPI_EINJ_GET_ERROR_TYPE, 0); + char typesbuf[1024], *typesp; + /* XXX define this format somewhere */ + snprintb_m(typesbuf, sizeof(typesbuf), "\177\020" + "b\000" "PROC_CORRECTABLE\0" + "b\001" "PROC_UNCORRECTABLE\0" + "b\002" "PROC_FATAL\0" + "b\003" "MEM_CORRECTABLE\0" + "b\004" "MEM_UNCORRECTABLE\0" + "b\005" "MEM_FATAL\0" + "b\006" "PCIE_CORRECTABLE\0" + "b\007" "PCIE_UNCORRECTABLE\0" + "b\010" "PCIE_FATAL\0" + "b\011" "PLAT_CORRECTABLE\0" + "b\012" "PLAT_UNCORRECTABLE\0" + "b\013" "PLAT_FATAL\0" + "b\014" "CXLCACHE_CORRECTABLE\0" + "b\015" "CXLCACHE_UNCORRECTABLE\0" + "b\016" "CXLCACHE_FATAL\0" + "b\017" "CXLMEM_CORRECTABLE\0" + "b\020" "CXLMEM_UNCORRECTABLE\0" + "b\021" "CXLMEM_FATAL\0" +// "f\022\014" "reserved\0" + "b\036" "EINJv2\0" + "b\037" "VENDOR\0" + "\0", types, 36); + for (typesp = typesbuf; strlen(typesp); typesp += strlen(typesp) + 1) { + aprint_normal_dev(sc->sc_dev, "EINJ: can inject:" + " %s\n", typesp); + } + + /* + * Create a sysctl knob to query the available types of error + * to inject. In principle this could change dynamically, so + * we'll make it dynamic. + */ + error = sysctl_einj == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, NULL, 0, + CTLTYPE_QUAD, "types", + SYSCTL_DESCR("Types of errors that can be injected"), + &apei_einj_types_sysctl, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.types: %d\n", + error); + } +} + +/* + * apei_einj_detach(sc) + * + * Free any software resources associated with the Error Injection + * table. + */ +void +apei_einj_detach(struct apei_softc *sc) +{ + struct apei_einj_softc *jsc = &sc->sc_einj; + + if (jsc->jsc_interp) { + apei_interp_destroy(jsc->jsc_interp); + jsc->jsc_interp = NULL; + } +} + +/* + * struct apei_einj_machine + * + * Machine state for executing EINJ instructions. + */ +struct apei_einj_machine { + struct apei_softc *sc; + uint64_t x; /* in */ + uint64_t y; /* out */ +}; + +/* + * apei_einj_instfunc(header, cookie, &ip, maxip) + * + * Run a single instruction in the service of performing an EINJ + * action. Updates the EINJ machine at cookie in place. + * + * This doesn't read or write ip. The TRIGGER_ERROR logic relies + * on this; if you change the fact, you must update that logic + * too. + */ +static void +apei_einj_instfunc(ACPI_WHEA_HEADER *header, void *cookie, uint32_t *ipp, + uint32_t maxip) +{ + struct apei_einj_machine *M = cookie; + ACPI_STATUS rv = AE_OK; + + /* + * Abbreviate some of the intermediate quantities to make the + * instruction logic conciser and more legible. + */ + const uint8_t BitOffset = header->RegisterRegion.BitOffset; + const uint64_t Mask = header->Mask; + const uint64_t Value = header->Value; + ACPI_GENERIC_ADDRESS *const reg = &header->RegisterRegion; + const bool preserve_register = header->Flags & ACPI_EINJ_PRESERVE; + + aprint_debug_dev(M->sc->sc_dev, "%s: instr=0x%02"PRIx8 + " (%s)" + " Address=0x%"PRIx64 + " BitOffset=%"PRIu8" Mask=0x%"PRIx64" Value=0x%"PRIx64 + " Flags=0x%"PRIx8"\n", + __func__, header->Instruction, + (header->Instruction < __arraycount(apei_einj_instruction) + ? apei_einj_instruction[header->Instruction] + : "unknown"), + reg->Address, + BitOffset, Mask, Value, + header->Flags); + + /* + * Zero-initialize the output by default. + */ + M->y = 0; + + /* + * Dispatch the instruction. + */ + switch (header->Instruction) { + case ACPI_EINJ_READ_REGISTER: + rv = apei_read_register(reg, Mask, &M->y); + if (ACPI_FAILURE(rv)) + break; + break; + case ACPI_EINJ_READ_REGISTER_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->y = (v == Value ? 1 : 0); + break; + } + case ACPI_EINJ_WRITE_REGISTER: + rv = apei_write_register(reg, Mask, preserve_register, M->x); + break; + case ACPI_EINJ_WRITE_REGISTER_VALUE: + rv = apei_write_register(reg, Mask, preserve_register, Value); + break; + case ACPI_EINJ_NOOP: + break; + default: + rv = AE_ERROR; + break; + } + + /* + * If any register I/O failed, print the failure message. This + * could be more specific about exactly what failed, but that + * takes a little more effort to write. + */ + if (ACPI_FAILURE(rv)) { + aprint_debug_dev(M->sc->sc_dev, "%s: failed: %s\n", __func__, + AcpiFormatException(rv)); + } +} + +/* + * apei_einj_act(sc, action, x) + * + * Perform the named EINJ action with input x, by executing the + * instruction defined for the action by the EINJ, and return the + * output. + */ +static uint64_t +apei_einj_act(struct apei_softc *sc, enum AcpiEinjActions action, + uint64_t x) +{ + struct apei_einj_softc *const jsc = &sc->sc_einj; + struct apei_einj_machine einj_machine, *const M = &einj_machine; + + aprint_debug_dev(sc->sc_dev, "%s: action=%d (%s) input=0x%"PRIx64"\n", + __func__, + action, + (action < __arraycount(apei_einj_action) + ? apei_einj_action[action] + : "unknown"), + x); + + /* + * Initialize the machine to execute the action's instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + + /* + * Run the interpreter. + */ + apei_interpret(jsc->jsc_interp, action, M); + + /* + * Return the result. + */ + aprint_debug_dev(sc->sc_dev, "%s: output=0x%"PRIx64"\n", __func__, + M->y); + return M->y; +} + +/* + * apei_einj_trigger(sc, x) + * + * Obtain the TRIGGER_ERROR action table and, if there is anything + * to be done with it, execute it with input x and return the + * output. If nothing is to be done, return 0. + */ +static uint64_t +apei_einj_trigger(struct apei_softc *sc, uint64_t x) +{ + uint64_t teatab_pa; + ACPI_EINJ_TRIGGER *teatab = NULL; + size_t mapsize = 0, tabsize; + ACPI_EINJ_ENTRY *entry; + struct apei_einj_machine einj_machine, *const M = &einj_machine; + uint32_t i, nentries; + + /* + * Get the TRIGGER_ERROR action table's physical address. + */ + teatab_pa = apei_einj_act(sc, ACPI_EINJ_GET_TRIGGER_TABLE, 0); + + /* + * Map just the header. We don't know how large the table is + * because we get that from the header. + */ + mapsize = sizeof(*teatab); + teatab = AcpiOsMapMemory(teatab_pa, mapsize); + + /* + * If there's no entries, stop here -- nothing to do separately + * to trigger an error report. + */ + nentries = teatab->EntryCount; + if (nentries == 0) + goto out; + + /* + * If the header size or the table size is nonsense, bail. + */ + if (teatab->HeaderSize < sizeof(*teatab) || + teatab->TableSize < teatab->HeaderSize) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " invalid sizes:" + " HeaderSize=%"PRIu32" TableSize=%"PRIu32"\n", + teatab->HeaderSize, teatab->TableSize); + } + + /* + * If the revision is nonzero, we don't know what to do. I've + * only seen revision zero so far, and the spec doesn't say + * anything about revisions that I've found. + */ + if (teatab->Revision != 0) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " unknown revision: %"PRIx32"\n", teatab->Revision); + goto out; + } + + /* + * Truncate the table to the number of entries requested and + * ignore trailing garbage if the table is long, or round the + * number of entries down to what fits in the table if the + * table is short. + */ + tabsize = teatab->TableSize; + if (nentries < howmany(tabsize, sizeof(ACPI_EINJ_ENTRY))) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " %zu bytes of trailing garbage\n", + tabsize - nentries*sizeof(ACPI_EINJ_ENTRY)); + tabsize = nentries*sizeof(ACPI_EINJ_ENTRY); + } else if (nentries > howmany(tabsize, sizeof(ACPI_EINJ_ENTRY))) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " truncated to %zu entries\n", + nentries*sizeof(ACPI_EINJ_ENTRY)); + nentries = howmany(tabsize, sizeof(ACPI_EINJ_ENTRY)); + } + + /* + * Unmap the header and map the whole table instead. + */ + AcpiOsUnmapMemory(teatab, mapsize); + mapsize = tabsize; + teatab = AcpiOsMapMemory(teatab_pa, mapsize); + + /* + * Initialize the machine to execute the TRIGGER_ERROR action's + * instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + + /* + * Now iterate over the EINJ-type entries and execute the + * trigger error action instructions -- but skip if they're not + * for the TRIGGER_ERROR action, and stop if they're truncated. + * + * Entries are fixed-size, so we can just index them. + */ + entry = (ACPI_EINJ_ENTRY *)(teatab + 1); + for (i = 0; i < nentries; i++) { + ACPI_WHEA_HEADER *const header = &entry[i].WheaHeader; + + /* + * Verify the action is TRIGGER_ERROR. If not, skip. + */ + if (header->Action != ACPI_EINJ_TRIGGER_ERROR) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " other action: %"PRIu32" (%s)\n", + header->Action, + (header->Action < __arraycount(apei_einj_action) + ? apei_einj_action[header->Action] + : "unknown")); + continue; + } + + /* + * Execute the instruction. Since there's only one + * action, we don't bother with the apei_interp + * machinery to collate instruction tables for each + * action. EINJ instructions don't change ip. + */ + uint32_t ip = i + 1; + apei_einj_instfunc(header, M, &ip, nentries); + KASSERT(ip == i + 1); + } + +out: if (teatab) { + AcpiOsUnmapMemory(teatab, mapsize); + teatab = NULL; + mapsize = 0; + } + return M->y; +} + +/* + * apei_einj_action_sysctl: + * + * Handle sysctl queries under hw.acpi.apei.einj.action.*. + */ +static int +apei_einj_action_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + enum AcpiEinjActions action; + struct sysctlnode node = *rnode; + uint64_t v; + int error; + + /* + * As a defence against mistakes, require the user to specify a + * write. + */ + if (newp == NULL) { + error = ENOENT; + goto out; + } + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Identify the requested action. If we don't recognize it, + * fail with EINVAL. + */ + switch (node.sysctl_num) { + case ACPI_EINJ_BEGIN_OPERATION: + case ACPI_EINJ_GET_TRIGGER_TABLE: + case ACPI_EINJ_SET_ERROR_TYPE: + case ACPI_EINJ_GET_ERROR_TYPE: + case ACPI_EINJ_END_OPERATION: + case ACPI_EINJ_EXECUTE_OPERATION: + case ACPI_EINJ_CHECK_BUSY_STATUS: + case ACPI_EINJ_GET_COMMAND_STATUS: + case ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS: + case ACPI_EINJ_GET_EXECUTE_TIMINGS: + action = node.sysctl_num; + break; + default: + error = ENOENT; + goto out; + } + + /* + * Kludge: Copy the `new value' for the sysctl in as an input + * to the injection action. + */ + error = sysctl_copyin(curlwp, newp, &v, sizeof(v)); + if (error) + goto out; + + /* + * Perform the EINJ action by following the table's + * instructions. + */ + v = apei_einj_act(sc, action, v); + + /* + * Return the output of the operation as the `old value' of the + * sysctl. This also updates v with what was written to the + * sysctl was written, but we don't care because we already + * read that in and acted on it. + */ + node.sysctl_data = &v; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} + +/* + * apei_einj_trigger_sysctl + * + * Handle sysctl hw.acpi.apei.einj.trigger. + */ +static int +apei_einj_trigger_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + struct sysctlnode node = *rnode; + uint64_t v; + int error; + + /* + * As a defence against mistakes, require the user to specify a + * write. + */ + if (newp == NULL) { + error = ENOENT; + goto out; + } + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Kludge: Copy the `new value' for the sysctl in as an input + * to the trigger action. + */ + error = sysctl_copyin(curlwp, newp, &v, sizeof(v)); + if (error) + goto out; + + /* + * Perform the TRIGGER_ERROR action. + */ + v = apei_einj_trigger(sc, v); + + /* + * Return the output of the operation as the `old value' of the + * sysctl. This also updates v with what was written to the + * sysctl was written, but we don't care because we already + * read that in and acted on it. + */ + node.sysctl_data = &v; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} + +/* + * apei_einj_types_sysctl + * + * Handle sysctl hw.acpi.apei.einj.types. + */ +static int +apei_einj_types_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + struct sysctlnode node = *rnode; + uint64_t types; + int error; + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + * + * XXX Is this necessary? Shouldn't sysctl_teardown take care + * of preventing new sysctl calls and waiting until all pending + * sysctl calls are done? + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Perform the GET_ERROR_TYPE action and return the value to + * sysctl. + * + * XXX Should this do it between BEGIN_INJECTION_OPERATION and + * END_OPERATION? + */ + types = apei_einj_act(sc, ACPI_EINJ_GET_ERROR_TYPE, 0); + node.sysctl_data = &types; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} diff --git a/sys/dev/acpi/apei_einjvar.h b/sys/dev/acpi/apei_einjvar.h new file mode 100644 index 000000000000..39c2633ff0cd --- /dev/null +++ b/sys/dev/acpi/apei_einjvar.h @@ -0,0 +1,48 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_EINJVAR_H_ +#define _SYS_DEV_ACPI_APEI_EINJVAR_H_ + +struct apei_interp; +struct apei_softc; + +/* + * struct apei_einj_softc + * + * Software state for error injection actions described in the + * EINJ, Error Injection Table. + */ +struct apei_einj_softc { + struct apei_interp *jsc_interp; +}; + +void apei_einj_attach(struct apei_softc *); +void apei_einj_detach(struct apei_softc *); + +#endif /* _SYS_DEV_ACPI_APEI_EINJVAR_H_ */ diff --git a/sys/dev/acpi/apei_erst.c b/sys/dev/acpi/apei_erst.c new file mode 100644 index 000000000000..9c3d0ad0a3d2 --- /dev/null +++ b/sys/dev/acpi/apei_erst.c @@ -0,0 +1,577 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI ERST -- Error Record Serialization Table + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-serialization + * + * XXX Expose this through a /dev node with ioctls and/or through a + * file system. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +static bool apei_erst_instvalid(ACPI_WHEA_HEADER *, uint32_t, uint32_t); +static void apei_erst_instfunc(ACPI_WHEA_HEADER *, void *, uint32_t *, + uint32_t); +static uint64_t apei_erst_act(struct apei_softc *, enum AcpiErstActions, + uint64_t); + +/* + * apei_erst_action + * + * Symbolic names of the APEI ERST (Error Record Serialization + * Table) logical actions are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-record-serialization-actions-table + */ +static const char *const apei_erst_action[] = { + [ACPI_ERST_BEGIN_WRITE] = "begin_write_operation", + [ACPI_ERST_BEGIN_READ] = "begin_read_operation", + [ACPI_ERST_BEGIN_CLEAR] = "begin_clear_operation", + [ACPI_ERST_END] = "end_operation", + [ACPI_ERST_SET_RECORD_OFFSET] = "set_record_offset", + [ACPI_ERST_EXECUTE_OPERATION] = "execute_operation", + [ACPI_ERST_CHECK_BUSY_STATUS] = "check_busy_status", + [ACPI_ERST_GET_COMMAND_STATUS] = "get_command_status", + [ACPI_ERST_GET_RECORD_ID] = "get_record_identifier", + [ACPI_ERST_SET_RECORD_ID] = "set_record_identifier", + [ACPI_ERST_GET_RECORD_COUNT] = "get_record_count", + [ACPI_ERST_BEGIN_DUMMY_WRIITE] = "begin_dummy_write_operation", + [ACPI_ERST_NOT_USED] = "reserved", + [ACPI_ERST_GET_ERROR_RANGE] = "get_error_log_address_range", + [ACPI_ERST_GET_ERROR_LENGTH] = "get_error_log_address_range_length", + [ACPI_ERST_GET_ERROR_ATTRIBUTES] = + "get_error_log_address_range_attributes", + [ACPI_ERST_EXECUTE_TIMINGS] = "get_execute_operations_timings", +}; + +/* + * apei_erst_instruction + * + * Symbolic names of the APEI ERST (Error Record Serialization + * Table) instructions to implement logical actions are taken (and + * downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#serialization-instructions + */ +static const char *apei_erst_instruction[] = { + [ACPI_ERST_READ_REGISTER] = "read_register", + [ACPI_ERST_READ_REGISTER_VALUE] = "read_register_value", + [ACPI_ERST_WRITE_REGISTER] = "write_register", + [ACPI_ERST_WRITE_REGISTER_VALUE] = "write_register_value", + [ACPI_ERST_NOOP] = "noop", + [ACPI_ERST_LOAD_VAR1] = "load_var1", + [ACPI_ERST_LOAD_VAR2] = "load_var2", + [ACPI_ERST_STORE_VAR1] = "store_var1", + [ACPI_ERST_ADD] = "add", + [ACPI_ERST_SUBTRACT] = "subtract", + [ACPI_ERST_ADD_VALUE] = "add_value", + [ACPI_ERST_SUBTRACT_VALUE] = "subtract_value", + [ACPI_ERST_STALL] = "stall", + [ACPI_ERST_STALL_WHILE_TRUE] = "stall_while_true", + [ACPI_ERST_SKIP_NEXT_IF_TRUE] = "skip_next_instruction_if_true", + [ACPI_ERST_GOTO] = "goto", + [ACPI_ERST_SET_SRC_ADDRESS_BASE] = "set_src_address_base", + [ACPI_ERST_SET_DST_ADDRESS_BASE] = "set_dst_address_base", + [ACPI_ERST_MOVE_DATA] = "move_data", +}; + +/* + * XXX dtrace and kernhist + */ +static void +apei_pmemmove(uint64_t pdst, uint64_t psrc, uint64_t nbytes) +{ + char *vdst, *vsrc; + + aprint_debug("ERST: move" + " %"PRIu64" bytes from 0x%"PRIx64" to 0x%"PRIx64"\n", + nbytes, psrc, pdst); + + /* + * Carefully check for overlap. + */ + if (pdst < psrc && psrc < pdst + nbytes) { + /* + * psrc ______ psrc + nbytes + * / \ + * <---------------------> + * \______/ + * pdst pdst + nbytes + */ + vdst = AcpiOsMapMemory(pdst, nbytes + (psrc - pdst)); + vsrc = vdst + (psrc - pdst); + memmove(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vdst, nbytes + (psrc - pdst)); + } else if (psrc < pdst && pdst < psrc + nbytes) { + /* + * psrc ______ psrc + nbytes + * / \ + * <---------------------> + * \______/ + * pdst pdst + nbytes + */ + vsrc = AcpiOsMapMemory(psrc, nbytes + (pdst - psrc)); + vdst = vsrc + (pdst - psrc); + memmove(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vsrc, nbytes + (pdst - psrc)); + } else { + /* + * No overlap. + */ + vdst = AcpiOsMapMemory(pdst, nbytes); + vsrc = AcpiOsMapMemory(psrc, nbytes); + memcpy(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vsrc, nbytes); + AcpiOsUnmapMemory(vdst, nbytes); + } +} + +/* + * apei_erst_attach(sc) + * + * Scan the Error Record Serialization Table to collate the + * instructions for each ERST action. + */ +void +apei_erst_attach(struct apei_softc *sc) +{ + ACPI_TABLE_ERST *erst = sc->sc_tab.erst; + struct apei_erst_softc *ssc = &sc->sc_erst; + ACPI_ERST_ENTRY *entry; + uint32_t i, nentries, maxnentries; + + /* + * Verify the table length, table header length, and + * instruction entry count are all sensible. If the header is + * truncated, stop here; if the entries are truncated, stop at + * the largest integral number of full entries that fits. + */ + if (erst->Header.Length < sizeof(*erst)) { + aprint_error_dev(sc->sc_dev, "ERST: truncated table:" + " %"PRIu32" < %zu minimum bytes\n", + erst->Header.Length, sizeof(*erst)); + return; + } + if (erst->HeaderLength < + sizeof(*erst) - offsetof(ACPI_TABLE_ERST, HeaderLength)) { + aprint_error_dev(sc->sc_dev, "ERST: truncated header:" + " %"PRIu32" < %zu bytes\n", + erst->HeaderLength, + sizeof(*erst) - offsetof(ACPI_TABLE_ERST, HeaderLength)); + return; + } + nentries = erst->Entries; + maxnentries = (erst->Header.Length - sizeof(*erst))/sizeof(*entry); + if (nentries > maxnentries) { + aprint_error_dev(sc->sc_dev, "ERST: excessive entries:" + " %"PRIu32", truncating to %"PRIu32"\n", + nentries, maxnentries); + nentries = maxnentries; + } + if (nentries*sizeof(*entry) < erst->Header.Length - sizeof(*erst)) { + aprint_error_dev(sc->sc_dev, "ERST:" + " %zu bytes of trailing garbage after last entry\n", + erst->Header.Length - nentries*sizeof(*entry)); + } + + /* + * Create an interpreter for ERST actions. + */ + ssc->ssc_interp = apei_interp_create("ERST", + apei_erst_action, __arraycount(apei_erst_action), + apei_erst_instruction, __arraycount(apei_erst_instruction), + apei_erst_instvalid, apei_erst_instfunc); + + /* + * Compile the interpreter from the ERST action instruction + * table. + */ + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < nentries; i++, entry++) + apei_interp_pass1_load(ssc->ssc_interp, i, &entry->WheaHeader); + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < nentries; i++, entry++) { + apei_interp_pass2_verify(ssc->ssc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass3_alloc(ssc->ssc_interp); + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < nentries; i++, entry++) { + apei_interp_pass4_assemble(ssc->ssc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass5_verify(ssc->ssc_interp); + + /* + * Print some basic information about the stored records. + */ + uint64_t logaddr = apei_erst_act(sc, ACPI_ERST_GET_ERROR_RANGE, 0); + uint64_t logbytes = apei_erst_act(sc, ACPI_ERST_GET_ERROR_LENGTH, 0); + uint64_t attr = apei_erst_act(sc, ACPI_ERST_GET_ERROR_ATTRIBUTES, 0); + uint64_t nrecords = apei_erst_act(sc, ACPI_ERST_GET_RECORD_COUNT, 0); + char attrbuf[128]; + + /* XXX define this format somewhere */ + snprintb(attrbuf, sizeof(attrbuf), "\177\020" + "\001" "NVRAM\0" + "\002" "SLOW\0" + "\0", attr); + + aprint_normal_dev(sc->sc_dev, "ERST: %"PRIu64" records in error log" + " %"PRIu64" bytes @ 0x%"PRIx64" attr=%s\n", + nrecords, logbytes, logaddr, attrbuf); + + /* + * XXX wire up to sysctl or a file system or something, and/or + * dmesg or crash dumps + */ +} + +/* + * apei_erst_detach(sc) + * + * Free software resource allocated for ERST handling. + */ +void +apei_erst_detach(struct apei_softc *sc) +{ + struct apei_erst_softc *ssc = &sc->sc_erst; + + if (ssc->ssc_interp) { + apei_interp_destroy(ssc->ssc_interp); + ssc->ssc_interp = NULL; + } +} + +/* + * apei_erst_instvalid(header, ninst, i) + * + * Routine to validate the ith entry, for an action with ninst + * instructions. + */ +static bool +apei_erst_instvalid(ACPI_WHEA_HEADER *header, uint32_t ninst, uint32_t i) +{ + + switch (header->Instruction) { + case ACPI_ERST_GOTO: + if (header->Value > ninst) { + aprint_error("ERST[%"PRIu32"]:" + " GOTO(%"PRIu64") out of bounds," + " disabling action %"PRIu32" (%s)\n", i, + header->Value, + header->Action, + apei_erst_action[header->Action]); + return false; + } + } + return true; +} + +/* + * struct apei_erst_machine + * + * Machine state for executing ERST instructions. + */ +struct apei_erst_machine { + struct apei_softc *sc; + uint64_t x; /* in */ + uint64_t y; /* out */ + uint64_t var1; + uint64_t var2; + uint64_t src_base; + uint64_t dst_base; +}; + +/* + * apei_erst_instfunc(header, cookie, &ip, maxip) + * + * Run a single instruction in the service of performing an ERST + * action. Updates the ERST machine at cookie, and the ip if + * necessary, in place. + * + * On entry, ip points to the next instruction after this one + * sequentially; on exit, ip points to the next instruction to + * execute. + */ +static void +apei_erst_instfunc(ACPI_WHEA_HEADER *header, void *cookie, uint32_t *ipp, + uint32_t maxip) +{ + struct apei_erst_machine *const M = cookie; + ACPI_STATUS rv = AE_OK; + + /* + * Abbreviate some of the intermediate quantities to make the + * instruction logic conciser and more legible. + */ + const uint8_t BitOffset = header->RegisterRegion.BitOffset; + const uint64_t Mask = header->Mask; + const uint64_t Value = header->Value; + ACPI_GENERIC_ADDRESS *const reg = &header->RegisterRegion; + const bool preserve_register = header->Flags & ACPI_ERST_PRESERVE; + + aprint_debug_dev(M->sc->sc_dev, "%s: instr=0x%02"PRIx8 + " (%s)" + " Address=0x%"PRIx64 + " BitOffset=%"PRIu8" Mask=0x%"PRIx64" Value=0x%"PRIx64 + " Flags=0x%"PRIx8"\n", + __func__, header->Instruction, + (header->Instruction < __arraycount(apei_erst_instruction) + ? apei_erst_instruction[header->Instruction] + : "unknown"), + reg->Address, + BitOffset, Mask, Value, + header->Flags); + + /* + * Zero-initialize the output by default. + */ + M->y = 0; + + /* + * Dispatch the instruction. + */ + switch (header->Instruction) { + case ACPI_ERST_READ_REGISTER: + rv = apei_read_register(reg, Mask, &M->y); + break; + case ACPI_ERST_READ_REGISTER_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->y = (v == Value ? 1 : 0); + break; + } + case ACPI_ERST_WRITE_REGISTER: + rv = apei_write_register(reg, Mask, preserve_register, M->x); + break; + case ACPI_ERST_WRITE_REGISTER_VALUE: + rv = apei_write_register(reg, Mask, preserve_register, Value); + break; + case ACPI_ERST_NOOP: + break; + case ACPI_ERST_LOAD_VAR1: + rv = apei_read_register(reg, Mask, &M->var1); + break; + case ACPI_ERST_LOAD_VAR2: + rv = apei_read_register(reg, Mask, &M->var2); + break; + case ACPI_ERST_STORE_VAR1: + rv = apei_write_register(reg, Mask, preserve_register, + M->var1); + break; + case ACPI_ERST_ADD: + M->var1 += M->var2; + break; + case ACPI_ERST_SUBTRACT: + /* + * The specification at + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#serialization-instructions + * says: + * + * 0x09 SUBTRACT Subtracts VAR1 from VAR2 + * and stores the result in + * VAR1. + * + * So, according to the spec, this is _not_ simply + * + * M->var1 -= M->var2; + */ + M->var1 = M->var2 - M->var1; + break; + case ACPI_ERST_ADD_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + v += Value; + + rv = apei_write_register(reg, Mask, preserve_register, v); + break; + } + case ACPI_ERST_SUBTRACT_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + v -= Value; + + rv = apei_write_register(reg, Mask, preserve_register, v); + break; + } + case ACPI_ERST_STALL: + DELAY(Value); /* XXX avoid excessive delays */ + break; + case ACPI_ERST_STALL_WHILE_TRUE: + for (;;) { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + if (v != Value) + break; + DELAY(M->var1); + } + break; + case ACPI_ERST_SKIP_NEXT_IF_TRUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + /* + * If reading the register yields Value, skip the next + * instruction -- unless that would run past the end of + * the instruction buffer. + */ + if (v == Value) { + if (*ipp < maxip) + (*ipp)++; + } + break; + } + case ACPI_ERST_GOTO: + if (Value >= maxip) /* paranoia */ + *ipp = maxip; + else + *ipp = Value; + break; + case ACPI_ERST_SET_SRC_ADDRESS_BASE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->src_base = v; + break; + } + case ACPI_ERST_SET_DST_ADDRESS_BASE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->src_base = v; + break; + } + case ACPI_ERST_MOVE_DATA: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + apei_pmemmove(M->dst_base + v, M->src_base + v, M->var2); + break; + } + default: + break; + } + + /* + * If any register I/O failed, print the failure message. This + * could be more specific about exactly what failed, but that + * takes a little more effort to write. + */ + if (ACPI_FAILURE(rv)) { + aprint_debug_dev(M->sc->sc_dev, "%s: failed: %s\n", __func__, + AcpiFormatException(rv)); + } +} + +/* + * apei_erst_act(sc, action, x) + * + * Perform the named ERST action with input x, by stepping through + * all the instructions defined for the action by the ERST, and + * return the output. + */ +static uint64_t +apei_erst_act(struct apei_softc *sc, enum AcpiErstActions action, uint64_t x) +{ + struct apei_erst_softc *const ssc = &sc->sc_erst; + struct apei_erst_machine erst_machine, *const M = &erst_machine; + + aprint_debug_dev(sc->sc_dev, "%s: action=%d (%s) input=0x%"PRIx64"\n", + __func__, + action, + (action < __arraycount(apei_erst_action) + ? apei_erst_action[action] + : "unknown"), + x); + + /* + * Initialize the machine to execute the action's instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + M->var1 = 0; + M->var2 = 0; + M->src_base = 0; + M->dst_base = 0; + + /* + * Run the interpreter. + */ + apei_interpret(ssc->ssc_interp, action, M); + + /* + * Return the result. + */ + aprint_debug_dev(sc->sc_dev, "%s: output=0x%"PRIx64"\n", __func__, + M->y); + return M->y; +} diff --git a/sys/dev/acpi/apei_erstvar.h b/sys/dev/acpi/apei_erstvar.h new file mode 100644 index 000000000000..c2e7117b1066 --- /dev/null +++ b/sys/dev/acpi/apei_erstvar.h @@ -0,0 +1,49 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_ERSTVAR_H_ +#define _SYS_DEV_ACPI_APEI_ERSTVAR_H_ + +struct apei_interp; +struct apei_softc; + +/* + * struct apei_erst_softc + * + * Software state for error serialization actions described in the + * ERST, Error Record Serialization Table. + */ +struct apei_erst_softc { + struct apei_interp *ssc_interp; +}; + +void apei_erst_attach(struct apei_softc *); +void apei_erst_detach(struct apei_softc *); + +#endif /* _SYS_DEV_ACPI_APEI_ERSTVAR_H_ */ + diff --git a/sys/dev/acpi/apei_hed.h b/sys/dev/acpi/apei_hed.h new file mode 100644 index 000000000000..988be5292c0a --- /dev/null +++ b/sys/dev/acpi/apei_hed.h @@ -0,0 +1,34 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_HED_H_ +#define _SYS_DEV_ACPI_APEI_HED_H_ + +void apei_hed_notify(void); + +#endif /* _SYS_DEV_ACPI_APEI_HED_H_ */ diff --git a/sys/dev/acpi/apei_hest.c b/sys/dev/acpi/apei_hest.c new file mode 100644 index 000000000000..e5367eee5dfd --- /dev/null +++ b/sys/dev/acpi/apei_hest.c @@ -0,0 +1,1017 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI HEST -- Hardware Error Source Table + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#acpi-error-source + * + * XXX uncorrectable error NMI comes in on all CPUs at once, what to do? + * + * XXX AMD MCA + * + * XXX IA32 machine check stuff + * + * XXX switch-to-polling for GHES notifications + * + * XXX error threshold for GHES notifications + * + * XXX sort out interrupt notification types, e.g. do we ever need to + * do acpi_intr_establish? + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if defined(__i386__) || defined(__x86_64__) +#include +#endif + +#include "ioconf.h" + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +/* + * apei_hest_ghes_handle(sc, src) + * + * Check for, report, and acknowledge any error from a Generic + * Hardware Error Source (GHES, not GHESv2). Return true if there + * was any error to report, false if not. + */ +static bool +apei_hest_ghes_handle(struct apei_softc *sc, struct apei_source *src) +{ + ACPI_HEST_GENERIC *ghes = container_of(src->as_header, + ACPI_HEST_GENERIC, Header); + ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb; + char ctx[sizeof("error source 65535")]; + uint32_t status; + bool fatal = false; + + /* + * Process and report any error. + */ + snprintf(ctx, sizeof(ctx), "error source %"PRIu16, + ghes->Header.SourceId); + status = apei_gesb_report(sc, src->as_ghes.gesb, + ghes->ErrorBlockLength, ctx, &fatal); + + /* + * Acknowledge the error by clearing the block status. To + * avoid races, we probably have to avoid further access to the + * GESB until we get another notification. + * + * As a precaution, we zero this with atomic compare-and-swap + * so at least we can see if the status changed while we were + * working on it. + * + * It is tempting to clear bits with atomic and-complement, but + * the BlockStatus is not just a bit mask -- bits [13:4] are a + * count of Generic Error Data Entries, and who knows what bits + * [31:14] might be used for in the future. + * + * XXX The GHES(v1) protocol is unclear from the specification + * here. The GHESv2 protocol has a separate register write to + * acknowledge, which is a bit clearer. + */ + membar_release(); + const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0); + if (status1 != status) { + device_printf(sc->sc_dev, "%s: status changed from" + " 0x%"PRIx32" to 0x%"PRIx32"\n", + ctx, status, status1); + } + + /* + * If the error was fatal, panic now. + */ + if (fatal) + panic("fatal hardware error"); + + return status != 0; +} + +/* + * apei_hest_ghes_v2_handle(sc, src) + * + * Check for, report, and acknowledge any error from a Generic + * Hardware Error Source v2. Return true if there was any error + * to report, false if not. + */ +static bool +apei_hest_ghes_v2_handle(struct apei_softc *sc, struct apei_source *src) +{ + ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(src->as_header, + ACPI_HEST_GENERIC_V2, Header); + ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb; + char ctx[sizeof("error source 65535")]; + uint64_t X; + uint32_t status; + bool fatal; + + /* + * Process and report any error. + */ + snprintf(ctx, sizeof(ctx), "error source %"PRIu16, + ghes_v2->Header.SourceId); + status = apei_gesb_report(sc, src->as_ghes.gesb, + ghes_v2->ErrorBlockLength, ctx, &fatal); + + /* + * First clear the block status. As a precaution, we zero this + * with atomic compare-and-swap so at least we can see if the + * status changed while we were working on it. + */ + membar_release(); + const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0); + if (status1 != status) { + device_printf(sc->sc_dev, "%s: status changed from" + " 0x%"PRIx32" to 0x%"PRIx32"\n", + ctx, status, status1); + } + + /* + * Next, do the Read Ack dance. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-hardware-error-source-version-2-ghesv2-type-10 + */ + X = apei_mapreg_read(&ghes_v2->ReadAckRegister, + src->as_ghes_v2.read_ack); + X &= ghes_v2->ReadAckPreserve; + X |= ghes_v2->ReadAckWrite; + apei_mapreg_write(&ghes_v2->ReadAckRegister, + src->as_ghes_v2.read_ack, X); + + /* + * If the error was fatal, panic now. + */ + if (fatal) + panic("fatal hardware error"); + + return status != 0; +} + +/* + * apei_hest_ghes_poll(cookie) + * + * Callout handler for periodic polling of a Generic Hardware + * Error Source (GHES, not GHESv2), using Notification Type `0 - + * Polled'. + * + * cookie is the struct apei_source pointer for a single source; + * if there are multiple sources there will be multiple callouts. + */ +static void +apei_hest_ghes_poll(void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + ACPI_HEST_GENERIC *ghes = container_of(src->as_header, + ACPI_HEST_GENERIC, Header); + + /* + * Process and acknowledge any error. + */ + (void)apei_hest_ghes_handle(sc, src); + + /* + * Schedule polling again after the firmware-suggested + * interval. + */ + callout_schedule(&src->as_ch, + MAX(1, mstohz(ghes->Notify.PollInterval))); +} + +/* + * apei_hest_ghes_v2_poll(cookie) + * + * Callout handler for periodic polling of a Generic Hardware + * Error Source v2, using Notification Type `0 - Polled'. + * + * cookie is the struct apei_source pointer for a single source; + * if there are multiple sources there will be multiple callouts. + */ +static void +apei_hest_ghes_v2_poll(void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(src->as_header, + ACPI_HEST_GENERIC_V2, Header); + + /* + * Process and acknowledge any error. + */ + (void)apei_hest_ghes_v2_handle(sc, src); + + /* + * Schedule polling again after the firmware-suggested + * interval. + */ + callout_schedule(&src->as_ch, + MAX(1, mstohz(ghes_v2->Notify.PollInterval))); +} + +#if defined(__i386__) || defined(__x86_64__) + +/* + * The NMI is (sometimes?) delivered to all CPUs at once. To reduce + * confusion, let's try to have only one CPU process error + * notifications at a time. + */ +static __cpu_simple_lock_t apei_hest_nmi_lock; + +/* + * apei_hest_ghes_nmi(tf, cookie) + * + * Nonmaskable interrupt handler for Generic Hardware Error + * Sources (GHES, not GHESv2) with Notification Type `4 - NMI'. + */ +static int +apei_hest_ghes_nmi(const struct trapframe *tf, void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + + __cpu_simple_lock(&apei_hest_nmi_lock); + const bool mine = apei_hest_ghes_handle(sc, src); + __cpu_simple_unlock(&apei_hest_nmi_lock); + + /* + * Tell the NMI subsystem whether this interrupt could have + * been for us or not. + */ + return mine; +} + +/* + * apei_hest_ghes_v2_nmi(tf, cookie) + * + * Nonmaskable interrupt handler for Generic Hardware Error + * Sources v2 with Notification Type `4 - NMI'. + */ +static int +apei_hest_ghes_v2_nmi(const struct trapframe *tf, void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + + __cpu_simple_lock(&apei_hest_nmi_lock); + const bool mine = apei_hest_ghes_v2_handle(sc, src); + __cpu_simple_unlock(&apei_hest_nmi_lock); + + /* + * Tell the NMI subsystem whether this interrupt could have + * been for us or not. + */ + return mine; +} + +#endif /* defined(__i386__) || defined(__x86_64__) */ + +/* + * apei_hest_attach_ghes(sc, ghes, i) + * + * Attach a Generic Hardware Error Source (GHES, not GHESv2) as + * the ith source in the Hardware Error Source Table. + * + * After this point, the system will check for and handle errors + * when notified by this source. + */ +static void +apei_hest_attach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + uint64_t addr; + ACPI_STATUS rv; + char ctx[sizeof("HEST[4294967295, Id=65535]")]; + + snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]", + i, ghes->Header.SourceId); + + /* + * Verify the source is enabled before proceeding. The Enabled + * field is 8 bits with 256 possibilities, but only two of the + * possibilities, 0 and 1, have semantics defined in the spec, + * so out of an abundance of caution let's tread carefully in + * case anything changes and noisily reject any values other + * than 1. + */ + switch (ghes->Enabled) { + case 1: + break; + case 0: + aprint_debug_dev(sc->sc_dev, "%s: disabled\n", ctx); + return; + default: + aprint_error_dev(sc->sc_dev, "%s: unknown GHES Enabled state:" + " 0x%"PRIx8"\n", ctx, ghes->Enabled); + return; + } + + /* + * Verify the Error Status Address bit width is at most 64 bits + * before proceeding with this source. When we get 128-bit + * addressing, this code will have to be updated. + */ + if (ghes->ErrorStatusAddress.BitWidth > 64) { + aprint_error_dev(sc->sc_dev, "%s: excessive address bits:" + " %"PRIu8"\n", ctx, ghes->ErrorStatusAddress.BitWidth); + return; + } + + /* + * Read the GHES Error Status Addresss. This is the physical + * address of a GESB, Generic Error Status Block. Why the + * physical address is exposed via this indirection, and not + * simply stored directly in the GHES, is unclear to me. + * Hoping it's not because the address can change dynamically, + * because the error handling path shouldn't involve mapping + * anything. + */ + rv = AcpiRead(&addr, &ghes->ErrorStatusAddress); + if (ACPI_FAILURE(rv)) { + aprint_error_dev(sc->sc_dev, "%s:" + " failed to read error status address: %s", ctx, + AcpiFormatException(rv)); + return; + } + aprint_debug_dev(sc->sc_dev, "%s: error status @ 0x%"PRIx64"\n", ctx, + addr); + + /* + * Initialize the source and map the GESB so we can get at it + * in the error handling path. + */ + src->as_sc = sc; + src->as_header = &ghes->Header; + src->as_ghes.gesb = AcpiOsMapMemory(addr, ghes->ErrorBlockLength); + + /* + * Arrange to receive notifications. + */ + switch (ghes->Notify.Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_init(&src->as_ch, CALLOUT_MPSAFE); + callout_setfunc(&src->as_ch, &apei_hest_ghes_poll, src); + callout_schedule(&src->as_ch, 0); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * SCI and GPIO notifications are delivered through + * Hardware Error Device (PNP0C33) events. + * + * XXX Where is this spelled out? The text at + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources + * is vague. + */ + SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry); + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + src->as_nmi = nmi_establish(&apei_hest_ghes_nmi, src); + break; +#endif + } + + /* + * Now that we have notification set up, process and + * acknowledge the initial GESB report if any. + */ + apei_hest_ghes_handle(sc, src); +} + +/* + * apei_hest_detach_ghes(sc, ghes, i) + * + * Detach the ith source, which is a Generic Hardware Error Source + * (GHES, not GHESv2). + * + * After this point, the system will ignore notifications from + * this source. + */ +static void +apei_hest_detach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + + /* + * Arrange to stop receiving notifications. + */ + switch (ghes->Notify.Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_halt(&src->as_ch, NULL); + callout_destroy(&src->as_ch); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * No need to spend time removing the entry; no further + * calls via apei_hed_notify are possible at this + * point, now that detach has begun. + */ + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + nmi_disestablish(src->as_nmi); + src->as_nmi = NULL; + break; +#endif + } + + /* + * No more notifications. Unmap the GESB and destroy the + * interrupt source now that it will no longer be used in + * error handling path. + */ + AcpiOsUnmapMemory(src->as_ghes.gesb, ghes->ErrorBlockLength); + src->as_ghes.gesb = NULL; + src->as_header = NULL; + src->as_sc = NULL; +} + + +/* + * apei_hest_attach_ghes_v2(sc, ghes_v2, i) + * + * Attach a Generic Hardware Error Source v2 as the ith source in + * the Hardware Error Source Table. + * + * After this point, the system will check for and handle errors + * when notified by this source. + */ +static void +apei_hest_attach_ghes_v2(struct apei_softc *sc, ACPI_HEST_GENERIC_V2 *ghes_v2, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + uint64_t addr; + struct apei_mapreg *read_ack; + ACPI_STATUS rv; + char ctx[sizeof("HEST[4294967295, Id=65535]")]; + + snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]", + i, ghes_v2->Header.SourceId); + + /* + * Verify the source is enabled before proceeding. The Enabled + * field is 8 bits with 256 possibilities, but only two of the + * possibilities, 0 and 1, have semantics defined in the spec, + * so out of an abundance of caution let's tread carefully in + * case anything changes and noisily reject any values other + * than 1. + */ + switch (ghes_v2->Enabled) { + case 1: + break; + case 0: + aprint_debug_dev(sc->sc_dev, "%s: disabled\n", ctx); + return; + default: + aprint_error_dev(sc->sc_dev, "%s:" + " unknown GHESv2 Enabled state: 0x%"PRIx8"\n", ctx, + ghes_v2->Enabled); + return; + } + + /* + * Verify the Error Status Address bit width is at most 64 bits + * before proceeding with this source. When we get 128-bit + * addressing, this code will have to be updated. + */ + if (ghes_v2->ErrorStatusAddress.BitWidth > 64) { + aprint_error_dev(sc->sc_dev, "%s: excessive address bits:" + " %"PRIu8"\n", ctx, ghes_v2->ErrorStatusAddress.BitWidth); + return; + } + + /* + * Read the GHESv2 Error Status Addresss. This is the physical + * address of a GESB, Generic Error Status Block. Why the + * physical address is exposed via this indirection, and not + * simply stored directly in the GHESv2, is unclear to me. + * Hoping it's not because the address can change dynamically, + * because the error handling path shouldn't involve mapping + * anything. + */ + rv = AcpiRead(&addr, &ghes_v2->ErrorStatusAddress); + if (ACPI_FAILURE(rv)) { + aprint_error_dev(sc->sc_dev, "%s:" + " failed to read error status address: %s", ctx, + AcpiFormatException(rv)); + return; + } + aprint_debug_dev(sc->sc_dev, "%s: error status @ 0x%"PRIx64"\n", ctx, + addr); + + /* + * Try to map the Read Ack register up front, so we don't have + * to allocate and free kva in AcpiRead/AcpiWrite at the time + * we're handling an error. Bail if we can't. + */ + read_ack = apei_mapreg_map(&ghes_v2->ReadAckRegister); + if (read_ack == NULL) { + aprint_error_dev(sc->sc_dev, "%s:" + " unable to map Read Ack register\n", ctx); + return; + } + + /* + * Initialize the source and map the GESB it in the error + * handling path. + */ + src->as_sc = sc; + src->as_header = &ghes_v2->Header; + src->as_ghes_v2.gesb = AcpiOsMapMemory(addr, + ghes_v2->ErrorBlockLength); + src->as_ghes_v2.read_ack = read_ack; + + /* + * Arrange to receive notifications. + */ + switch (ghes_v2->Notify.Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_init(&src->as_ch, CALLOUT_MPSAFE); + callout_setfunc(&src->as_ch, &apei_hest_ghes_v2_poll, src); + callout_schedule(&src->as_ch, 0); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * SCI and GPIO notifications are delivered through + * Hardware Error Device (PNP0C33) events. + * + * XXX Where is this spelled out? The text at + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources + * is vague. + */ + SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry); + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + src->as_nmi = nmi_establish(&apei_hest_ghes_v2_nmi, src); + break; +#endif + } + + /* + * Now that we have notification set up, process and + * acknowledge the initial GESB report if any. + */ + apei_hest_ghes_handle(sc, src); +} + +/* + * apei_hest_detach_ghes_v2(sc, ghes_v2, i) + * + * Detach the ith source, which is a Generic Hardware Error Source + * v2. + * + * After this point, the system will ignore notifications from + * this source. + */ +static void +apei_hest_detach_ghes_v2(struct apei_softc *sc, ACPI_HEST_GENERIC_V2 *ghes_v2, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + + /* + * Arrange to stop receiving notifications. + */ + switch (ghes_v2->Notify.Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_halt(&src->as_ch, NULL); + callout_destroy(&src->as_ch); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * No need to spend time removing the entry; no further + * calls via apei_hed_notify are possible at this + * point, now that detach has begun. + */ + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + nmi_disestablish(src->as_nmi); + src->as_nmi = NULL; + break; +#endif + } + + /* + * No more notifications. Unmap the GESB and read ack register + * now that it will no longer be used in error handling path. + */ + AcpiOsUnmapMemory(src->as_ghes_v2.gesb, ghes_v2->ErrorBlockLength); + src->as_ghes_v2.gesb = NULL; + apei_mapreg_unmap(&ghes_v2->ReadAckRegister, src->as_ghes_v2.read_ack); + src->as_ghes_v2.read_ack = NULL; + src->as_header = NULL; + src->as_sc = NULL; +} + +/* + * apei_hest_attach_source(sc, header, i, size_t maxlen) + * + * Attach the ith source in the Hardware Error Source Table given + * its header, and return a pointer to the header of the next + * source in the table, provided it is no more than maxlen bytes + * past header. Return NULL if the size of the source is unknown + * or would exceed maxlen bytes. + */ +static ACPI_HEST_HEADER * +apei_hest_attach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header, + uint32_t i, size_t maxlen) +{ + char ctx[sizeof("HEST[4294967295, Id=65535]")]; + + snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]", + i, header->SourceId); + + switch (header->Type) { + case ACPI_HEST_TYPE_IA32_CHECK: { + ACPI_HEST_IA_MACHINE_CHECK *const imc = container_of(header, + ACPI_HEST_IA_MACHINE_CHECK, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*imc)) + return NULL; + maxlen -= sizeof(*imc); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imc + 1); + if (maxlen < imc->NumHardwareBanks*sizeof(*bank)) + return NULL; + return (ACPI_HEST_HEADER *)(bank + imc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: { + ACPI_HEST_IA_CORRECTED *const imcc = container_of(header, + ACPI_HEST_IA_CORRECTED, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*imcc)) + return NULL; + maxlen -= sizeof(*imcc); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imcc + 1); + if (maxlen < imcc->NumHardwareBanks*sizeof(*bank)) + return NULL; + return (ACPI_HEST_HEADER *)(bank + imcc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_IA32_NMI: { + ACPI_HEST_IA_NMI *const ianmi = container_of(header, + ACPI_HEST_IA_NMI, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*ianmi)) + return NULL; + return (ACPI_HEST_HEADER *)(ianmi + 1); + } + case ACPI_HEST_TYPE_AER_ROOT_PORT: { + ACPI_HEST_AER_ROOT *const aerroot = container_of(header, + ACPI_HEST_AER_ROOT, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*aerroot)) + return NULL; + return (ACPI_HEST_HEADER *)(aerroot + 1); + } + case ACPI_HEST_TYPE_AER_ENDPOINT: { + ACPI_HEST_AER *const aer = container_of(header, + ACPI_HEST_AER, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*aer)) + return NULL; + return (ACPI_HEST_HEADER *)(aer + 1); + } + case ACPI_HEST_TYPE_AER_BRIDGE: { + ACPI_HEST_AER_BRIDGE *const aerbridge = container_of(header, + ACPI_HEST_AER_BRIDGE, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*aerbridge)) + return NULL; + return (ACPI_HEST_HEADER *)(aerbridge + 1); + } + case ACPI_HEST_TYPE_GENERIC_ERROR: { + ACPI_HEST_GENERIC *const ghes = container_of(header, + ACPI_HEST_GENERIC, Header); + + if (maxlen < sizeof(*ghes)) + return NULL; + apei_hest_attach_ghes(sc, ghes, i); + return (ACPI_HEST_HEADER *)(ghes + 1); + } + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: { + ACPI_HEST_GENERIC_V2 *const ghes_v2 = container_of(header, + ACPI_HEST_GENERIC_V2, Header); + + if (maxlen < sizeof(*ghes_v2)) + return NULL; + apei_hest_attach_ghes_v2(sc, ghes_v2, i); + return (ACPI_HEST_HEADER *)(ghes_v2 + 1); + } + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: { + ACPI_HEST_IA_DEFERRED_CHECK *const imdc = container_of(header, + ACPI_HEST_IA_DEFERRED_CHECK, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*imdc)) + return NULL; + maxlen -= sizeof(*imdc); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imdc + 1); + if (maxlen < imdc->NumHardwareBanks*sizeof(*bank)) + return NULL; + return (ACPI_HEST_HEADER *)(bank + imdc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + default: + aprint_error_dev(sc->sc_dev, "%s: unknown type:" + " 0x%04"PRIx16"\n", ctx, header->Type); + if (header->Type >= 12) { + /* + * `Beginning with error source type 12 and + * onward, each Error Source Structure must + * use the standard Error Source Structure + * Header as defined below.' + * + * Not yet in acpica, though, so we copy this + * down manually. + */ + struct { + UINT16 Type; + UINT16 Length; + } *const essh = (void *)header; + + if (maxlen < sizeof(*essh) || maxlen < essh->Length) + return NULL; + return (ACPI_HEST_HEADER *)((char *)header + + essh->Length); + } + return NULL; + } +} + +/* + * apei_hest_detach_source(sc, header, i) + * + * Detach the ith source in the Hardware Error Status Table. + * Caller is assumed to have stored where each source's header is, + * so no need to return the pointer to the header of the next + * source in the table. + */ +static void +apei_hest_detach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header, + uint32_t i) +{ + + switch (header->Type) { + case ACPI_HEST_TYPE_GENERIC_ERROR: { + ACPI_HEST_GENERIC *ghes = container_of(header, + ACPI_HEST_GENERIC, Header); + + apei_hest_detach_ghes(sc, ghes, i); + break; + } + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: { + ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(header, + ACPI_HEST_GENERIC_V2, Header); + + apei_hest_detach_ghes_v2(sc, ghes_v2, i); + break; + } + case ACPI_HEST_TYPE_IA32_CHECK: + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: + case ACPI_HEST_TYPE_IA32_NMI: + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + case ACPI_HEST_TYPE_AER_ROOT_PORT: + case ACPI_HEST_TYPE_AER_ENDPOINT: + case ACPI_HEST_TYPE_AER_BRIDGE: + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: + default: + /* XXX shouldn't happen */ + break; + } +} + +/* + * apei_hest_attach(sc) + * + * Scan the Hardware Error Source Table and attach sources + * enumerated in it so we can receive and process hardware errors + * during operation. + */ +void +apei_hest_attach(struct apei_softc *sc) +{ + ACPI_TABLE_HEST *hest = sc->sc_tab.hest; + struct apei_hest_softc *hsc = &sc->sc_hest; + ACPI_HEST_HEADER *header, *next; + uint32_t i, n; + size_t resid; + + /* + * Initialize the HED (Hardware Error Device, PNP0C33) + * notification list so apei_hed_notify becomes a noop with no + * extra effort even if we fail to attach anything. + */ + SIMPLEQ_INIT(&hsc->hsc_hed_list); + + /* + * Verify the table is large enough. + */ + if (hest->Header.Length < sizeof(*hest)) { + aprint_error_dev(sc->sc_dev, "HEST: truncated table:" + " %"PRIu32" < %zu minimum bytes\n", + hest->Header.Length, sizeof(*hest)); + return; + } + + n = hest->ErrorSourceCount; + aprint_normal_dev(sc->sc_dev, "HEST: %"PRIu32 + " hardware error source%s\n", n, n == 1 ? "" : "s"); + + /* + * This could be SIZE_MAX but let's put a smaller arbitrary + * limit on it; if you have gigabytes of HEST something is + * probably wrong. + */ + if (n > INT32_MAX/sizeof(hsc->hsc_source[0])) { + aprint_error_dev(sc->sc_dev, "HEST: too many error sources\n"); + return; + } + hsc->hsc_source = kmem_zalloc(n * sizeof(hsc->hsc_source[0]), + KM_SLEEP); + + header = (ACPI_HEST_HEADER *)(hest + 1); + resid = hest->Header.Length - sizeof(*hest); + for (i = 0; i < n && resid; i++, header = next) { + next = apei_hest_attach_source(sc, header, i, resid); + if (next == NULL) { + aprint_error_dev(sc->sc_dev, "truncated source:" + " %"PRIu32"\n", i); + break; + } + KASSERT((const char *)next - (const char *)header <= resid); + resid -= (const char *)next - (const char *)header; + } + if (resid) { + aprint_error_dev(sc->sc_dev, "HEST:" + " %zu bytes of trailing garbage after %"PRIu32" entries\n", + resid, n); + } +} + +/* + * apei_hest_detach(sc) + * + * Stop receiving and processing hardware error notifications and + * free resources set up from the Hardware Error Source Table. + */ +void +apei_hest_detach(struct apei_softc *sc) +{ + ACPI_TABLE_HEST *hest = sc->sc_tab.hest; + struct apei_hest_softc *hsc = &sc->sc_hest; + uint32_t i, n; + + if (hsc->hsc_source) { + n = hest->ErrorSourceCount; + for (i = 0; i < n; i++) { + struct apei_source *src = &hsc->hsc_source[i]; + ACPI_HEST_HEADER *header = src->as_header; + + if (src->as_header == NULL) + continue; + apei_hest_detach_source(sc, header, i); + } + kmem_free(hsc->hsc_source, n * sizeof(hsc->hsc_source[0])); + hsc->hsc_source = NULL; + } +} + +void +apei_hed_notify(void) +{ + device_t apei0; + struct apei_softc *sc; + struct apei_hest_softc *hsc; + struct apei_source *src; + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) + goto out; + sc = device_private(apei0); + + /* + * If there's no HEST, nothing to do. + */ + if (sc->sc_tab.hest == NULL) + goto out; + hsc = &sc->sc_hest; + + /* + * Walk through the HED-notified hardware error sources and + * check them. The list is stable until we release apei0. + */ + SIMPLEQ_FOREACH(src, &hsc->hsc_hed_list, as_entry) { + ACPI_HEST_HEADER *const header = src->as_header; + + switch (header->Type) { + case ACPI_HEST_TYPE_GENERIC_ERROR: + apei_hest_ghes_handle(sc, src); + break; + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: + apei_hest_ghes_v2_handle(sc, src); + break; + case ACPI_HEST_TYPE_IA32_CHECK: + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: + case ACPI_HEST_TYPE_IA32_NMI: + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + case ACPI_HEST_TYPE_AER_ROOT_PORT: + case ACPI_HEST_TYPE_AER_ENDPOINT: + case ACPI_HEST_TYPE_AER_BRIDGE: +// case ACPI_HEST_TYPE_GENERIC_ERROR: +// case ACPI_HEST_TYPE_GENERIC_ERROR_V2: + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: + default: + /* XXX shouldn't happen */ + break; + } + } + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } +} diff --git a/sys/dev/acpi/apei_hestvar.h b/sys/dev/acpi/apei_hestvar.h new file mode 100644 index 000000000000..a816d69dff55 --- /dev/null +++ b/sys/dev/acpi/apei_hestvar.h @@ -0,0 +1,85 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_HESTVAR_H_ +#define _SYS_DEV_ACPI_APEI_HESTVAR_H_ + +#include +#include + +#include + +struct apei_mapreg; +struct apei_softc; + +#if defined(__i386__) || defined(__x86_64__) +struct nmi_handler; +#endif + +/* + * struct apei_source + * + * Software state for a hardware error source from the HEST, + * Hardware Error Source Table, to process error notifications. + */ +struct apei_source { + struct apei_softc *as_sc; + ACPI_HEST_HEADER *as_header; + union { + struct { + ACPI_HEST_GENERIC_STATUS *gesb; + } as_ghes; + struct { + ACPI_HEST_GENERIC_STATUS *gesb; + struct apei_mapreg *read_ack; + } as_ghes_v2; + }; + union { + struct callout as_ch; +#if defined(__i386__) || defined(__x86_64__) + struct nmi_handler *as_nmi; +#endif + SIMPLEQ_ENTRY(apei_source) as_entry; + }; +}; + +/* + * struct apei_hest_softc + * + * Software state for processing hardware error reports during + * operation, from the HEST, Hardware Error Source table. + */ +struct apei_hest_softc { + struct apei_source *hsc_source; + SIMPLEQ_HEAD(, apei_source) hsc_hed_list; +}; + +void apei_hest_attach(struct apei_softc *); +void apei_hest_detach(struct apei_softc *); + +#endif /* _SYS_DEV_ACPI_APEI_HESTVAR_H_ */ diff --git a/sys/dev/acpi/apei_interp.c b/sys/dev/acpi/apei_interp.c new file mode 100644 index 000000000000..65828a6ae7b9 --- /dev/null +++ b/sys/dev/acpi/apei_interp.c @@ -0,0 +1,382 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI action interpreter. + * + * APEI provides a generalized abstraction to implement the actions an + * OS must take to inject an error, or save state in a persistent error + * record for the next boot, since there are many different hardware + * register interfaces for, e.g., injecting errors. + * + * You might think that APEI, being part of ACPI, would use the usual + * ACPI interpreter to run ACPI methods for these actions. You would + * be wrong. Alas. + * + * Instead, there is an entirely different little language of actions + * that an OS must write programs in to inject errors, and an entirely + * different little language of instructions that the interpreter for + * the actions uses to interpret the OS's error injection program. Got + * it? + * + * The EINJ and ERST tables provide a series entries that look like: + * + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=SKIP_NEXT_INSTRUCTION_IF_TRUE | + * | Value=0xdeadbeef | + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=WRITE_REGISTER_VALUE | + * | Register=0x7fabcd14 [memory] | + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=READ_REGISTER | + * | Register=0x7fabcd1c [memory] | + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=WRITE_REGISTER | + * | Register=0x7fabcd20 [memory] | + * +-----------------------------------------------+ + * | Action=EXECUTE_OPERATION | + * | Instruction=LOAD_VAR1 | + * | Register=0x7fabcf00 [memory] | + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=WRITE_REGISTER_VALUE | + * | Register=0x7fabcd24 [memory] | + * | Value=42 | + * +-----------------------------------------------+ + * | ... | + * +-----------------------------------------------+ + * + * The entries tell the OS, for each action the OS might want to + * perform like BEGIN_INJECTION_OPERATION or SET_ERROR_TYPE or + * EXECUTE_OPERATION, what instructions must be executed and in what + * order. + * + * The instructions run in one of two little state machines -- there's + * a different instruction set for EINJ and ERST -- and vary from noops + * to reading and writing registers to arithmetic on registers to + * conditional and unconditional branches. + * + * Yes, that means this little language -- the ERST language, anyway, + * not the EINJ language -- is Turing-complete. + * + * This APEI interpreter first compiles the table into a contiguous + * sequence of instructions for each action, to make execution easier, + * since there's no requirement that the actions be in order, and the + * GOTO instruction relies on contiguous indexing of the instructions + * for an action. + * + * This interpreter also does a little validation so the firmware + * doesn't, e.g., GOTO somewhere in oblivion. The validation is mainly + * a convenience for catching mistakes in firmware, not a security + * measure, since the OS is absolutely vulnerable to malicious firmware + * anyway. + * + * XXX Map instruction registers in advance so ERST is safe in nasty + * contexts, e.g. to save dmesg? + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include + +#include +#include + +/* + * struct apei_actinst + * + * Sequence of instructions to execute for an action. + */ +struct apei_actinst { + uint32_t ninst; + uint32_t ip; + struct acpi_whea_header **inst; +}; + +/* + * struct apei_interp + * + * Table of instructions to interpret APEI actions. +*/ +struct apei_interp { + const char *name; + const char *const *actname; + unsigned nact; + const char *const *instname; + unsigned ninst; + bool (*instvalid)(ACPI_WHEA_HEADER *, uint32_t, + uint32_t); + void (*instfunc)(ACPI_WHEA_HEADER *, void *, + uint32_t *, uint32_t); + struct apei_actinst actinst[]; +}; + +struct apei_interp * +apei_interp_create(const char *name, + const char *const *actname, unsigned nact, + const char *const *instname, unsigned ninst, + bool (*instvalid)(ACPI_WHEA_HEADER *, uint32_t, uint32_t), + void (*instfunc)(ACPI_WHEA_HEADER *, void *, uint32_t *, uint32_t)) +{ + struct apei_interp *I; + + I = kmem_zalloc(offsetof(struct apei_interp, actinst[nact]), KM_SLEEP); + I->name = name; + I->actname = actname; + I->nact = nact; + I->instname = instname; + I->ninst = ninst; + I->instvalid = instvalid; + I->instfunc = instfunc; + + return I; +} + +void +apei_interp_destroy(struct apei_interp *I) +{ + unsigned nact = I->nact; + + kmem_free(I, offsetof(struct apei_interp, actinst[nact])); +} + +/* + * apei_interp_pass1_load(I, i, E) + * + * Load the ith table entry E into the interpreter I. To be + * called for each entry in the table sequentially. + * + * This first pass counts the number of instructions for each + * action, so we can allocate an array of instructions for + * indexing each action. + */ +void +apei_interp_pass1_load(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + /* + * If we don't recognize this action, ignore it and move on. + */ + if (E->Action >= I->nact || I->actname[E->Action] == NULL) { + aprint_error("%s[%"PRIu32"]: unknown action: 0x%"PRIx8"\n", + I->name, i, E->Action); + return; + } + struct apei_actinst *const A = &I->actinst[E->Action]; + + /* + * If we can't interpret this instruction for this action, or + * if we couldn't interpret a previous instruction for this + * action, ignore _all_ instructions for this action -- by + * marking the action as having UINT32_MAX instructions -- and + * move on. + */ + if (E->Instruction >= I->ninst || + I->instname[E->Instruction] == NULL) { + aprint_error("%s[%"PRIu32"]: unknown instruction: 0x%02"PRIx8 + "\n", I->name, i, E->Instruction); + A->ninst = UINT32_MAX; + return; + } + if (A->ninst == UINT32_MAX) + return; + + /* + * Count another instruction. We will make a pointer + * to it in a later pass. + */ + A->ninst++; + + /* + * If it overflows a reasonable size, bail on this instruction. + */ + if (A->ninst >= 256) { + aprint_error("%s[%"PRIu32"]:" + " too many instructions for action %"PRIu32" (%s)\n", + I->name, i, + E->Action, I->actname[E->Action]); + A->ninst = UINT32_MAX; + return; + } +} + +/* + * apei_interp_pass2_verify(I, i, E) + * + * Verify the ith entry's instruction, using the caller's + * instvalid function, now that all the instructions have been + * counted. To be called for each entry in the table + * sequentially. + * + * This second pass checks that GOTO instructions in particular + * don't jump out of bounds. + */ +void +apei_interp_pass2_verify(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + /* + * If there's no instruction validation function, skip this + * pass. + */ + if (I->instvalid == NULL) + return; + + /* + * If we skipped it in earlier passes, skip it now. + */ + if (E->Action > I->nact || I->actname[E->Action] == NULL) + return; + + /* + * If the instruction is invalid, disable the whole action. + */ + struct apei_actinst *const A = &I->actinst[E->Action]; + if (!(*I->instvalid)(E, A->ninst, i)) + A->ninst = UINT32_MAX; +} + +/* + * apei_interp_pass3_alloc(I) + * + * Allocate an array of instructions for each action that we + * didn't decide to bail on, marked with UINT32_MAX. + */ +void +apei_interp_pass3_alloc(struct apei_interp *I) +{ + unsigned action; + + for (action = 0; action < I->nact; action++) { + struct apei_actinst *const A = &I->actinst[action]; + if (A->ninst == 0 || A->ninst == UINT32_MAX) + continue; + A->inst = kmem_zalloc(A->ninst * sizeof(A->inst[0]), KM_SLEEP); + } +} + +/* + * apei_interp_pass4_assemble(I, i, E) + * + * Put the instruction for the ith entry E into the instruction + * array for its action. To be called for each entry in the table + * sequentially. + */ +void +apei_interp_pass4_assemble(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + /* + * If we skipped it in earlier passes, skip it now. + */ + if (E->Action >= I->nact || I->actname[E->Action] == NULL) + return; + + struct apei_actinst *const A = &I->actinst[E->Action]; + if (A->ninst == UINT32_MAX) + return; + + KASSERT(A->ip < A->ninst); + A->inst[A->ip++] = E; +} + +/* + * apei_interp_pass5_verify(I) + * + * Paranoia: Verify we got all the instructions for each action, + * verify the actions point to their own instructions, and dump + * the instructions for each action, collated, with aprint_debug. + */ +void +apei_interp_pass5_verify(struct apei_interp *I) +{ + unsigned action; + + for (action = 0; action < I->nact; action++) { + struct apei_actinst *const A = &I->actinst[action]; + unsigned j; + + /* + * If the action is disabled, it's all set. + */ + if (A->ninst == UINT32_MAX) + continue; + KASSERTMSG(A->ip == A->ninst, + "action %s ip=%"PRIu32" ninstruction=%"PRIu32, + I->actname[action], A->ip, A->ninst); + + /* + * XXX Dump the complete instruction table. + */ + for (j = 0; j < A->ninst; j++) { + ACPI_WHEA_HEADER *const E = A->inst[j]; + + KASSERT(E->Action == action); + aprint_debug("%s: %s[%"PRIu32"]: %s\n", + I->name, I->actname[action], j, + I->instname[E->Instruction]); + } + } +} + +/* + * apei_interpret(I, action, cookie) + * + * Run the instructions associated with the given action by + * calling the interpreter's instfunc for each one. + * + * Halt when the instruction pointer runs past the end of the + * array, or after 1000 cycles, whichever comes first. + */ +void +apei_interpret(struct apei_interp *I, unsigned action, void *cookie) +{ + unsigned juice = 1000; + uint32_t ip = 0; + + if (action > I->nact || I->actname[action] == NULL) + return; + struct apei_actinst *const A = &I->actinst[action]; + + while (ip < A->ninst && juice --> 0) { + ACPI_WHEA_HEADER *const E = A->inst[ip++]; + + (*I->instfunc)(E, cookie, &ip, A->ninst); + } +} diff --git a/sys/dev/acpi/apei_interp.h b/sys/dev/acpi/apei_interp.h new file mode 100644 index 000000000000..8b3570d83b89 --- /dev/null +++ b/sys/dev/acpi/apei_interp.h @@ -0,0 +1,55 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_INTERP_H_ +#define _SYS_DEV_ACPI_APEI_INTERP_H_ + +#include + +struct acpi_whea_header; +struct apei_interp; + +struct apei_interp *apei_interp_create(const char *, + const char *const *, unsigned, + const char *const *, unsigned, + bool (*)(struct acpi_whea_header *, uint32_t, uint32_t), + void (*)(struct acpi_whea_header *, void *, uint32_t *, uint32_t)); +void apei_interp_destroy(struct apei_interp *); + +void apei_interp_pass1_load(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass2_verify(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass3_alloc(struct apei_interp *); +void apei_interp_pass4_assemble(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass5_verify(struct apei_interp *); + +void apei_interpret(struct apei_interp *, unsigned, void *); + +#endif /* _SYS_DEV_ACPI_APEI_INTERP_H_ */ diff --git a/sys/dev/acpi/apei_mapreg.c b/sys/dev/acpi/apei_mapreg.c new file mode 100644 index 000000000000..3f2a77418eb1 --- /dev/null +++ b/sys/dev/acpi/apei_mapreg.c @@ -0,0 +1,201 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Pre-mapped ACPI register access + * + * XXX This isn't APEI-specific -- it should be moved into the general + * ACPI API, and unified with the AcpiRead/AcpiWrite implementation. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include + +#include +#include + +/* + * apei_mapreg_map(reg) + * + * Return a mapping for use with apei_mapreg_read, or NULL if it + * can't be mapped. + */ +struct apei_mapreg * +apei_mapreg_map(const ACPI_GENERIC_ADDRESS *reg) +{ + + /* + * Verify the result is reasonable. + */ + switch (reg->BitWidth) { + case 8: + case 16: + case 32: + case 64: + break; + default: + return NULL; + } + + /* + * Verify we know how to do the access width. + */ + switch (reg->AccessWidth) { + case 1: /* 8-bit */ + case 2: /* 16-bit */ + case 3: /* 32-bit */ + case 4: /* 64-bit */ + break; + default: + return NULL; + } + + /* + * Verify we don't need to shift anything, because I can't + * figure out how the shifting is supposed to work in five + * minutes of looking at the spec. + */ + switch (reg->BitOffset) { + case 0: + break; + default: + return NULL; + } + + /* + * Verify the bit width is a multiple of the access width so + * we're not accessing more than we need. + */ + if (reg->BitWidth % (8*(1 << (reg->AccessWidth - 1)))) + return NULL; + + /* + * Dispatch on the space id. + * + * Currently this only handles memory space because I/O space + * is too painful to contemplate reimplementing here. + */ + switch (reg->SpaceId) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + return AcpiOsMapMemory(reg->Address, + 1 << (reg->AccessWidth - 1)); + default: + return NULL; + } +} + +/* + * apei_mapreg_unmap(reg, map) + * + * Unmap a mapping previously returned by apei_mapreg_map. + */ +void +apei_mapreg_unmap(const ACPI_GENERIC_ADDRESS *reg, + struct apei_mapreg *map) +{ + + AcpiOsUnmapMemory(map, 1 << (reg->AccessWidth - 1)); +} + +/* + * apei_mapreg_read(reg, map) + * + * Read from reg via map previously obtained by apei_mapreg_map. + */ +uint64_t +apei_mapreg_read(const ACPI_GENERIC_ADDRESS *reg, + const struct apei_mapreg *map) +{ + unsigned chunkbits = NBBY*(1 << (reg->AccessWidth - 1)); + unsigned i, n = reg->BitWidth % chunkbits; + uint64_t v = 0; + + for (i = 0; i < n; i++) { + uint64_t chunk; + + switch (reg->AccessWidth) { + case 1: + chunk = *(volatile const uint8_t *)map; + break; + case 2: + chunk = *(volatile const uint16_t *)map; + break; + case 3: + chunk = *(volatile const uint32_t *)map; + break; + case 4: + chunk = *(volatile const uint64_t *)map; + break; + default: + __unreachable(); + } + v |= chunk << (i*chunkbits); + } + + membar_acquire(); /* XXX probably not right for MMIO */ + return v; +} + +/* + * apei_mapreg_write(reg, map, v) + * + * Write to reg via map previously obtained by apei_mapreg_map. + */ +void +apei_mapreg_write(const ACPI_GENERIC_ADDRESS *reg, struct apei_mapreg *map, + uint64_t v) +{ + unsigned chunkbits = NBBY*(1 << (reg->AccessWidth - 1)); + unsigned i, n = reg->BitWidth % chunkbits; + + membar_release(); /* XXX probably not right for MMIO */ + for (i = 0; i < n; i++) { + uint64_t chunk = v >> (i*chunkbits); + + switch (reg->AccessWidth) { + case 1: + *(volatile uint8_t *)map = chunk; + break; + case 2: + *(volatile uint16_t *)map = chunk; + break; + case 3: + *(volatile uint32_t *)map = chunk; + break; + case 4: + *(volatile uint64_t *)map = chunk; + break; + default: + __unreachable(); + } + } +} diff --git a/sys/dev/acpi/apei_mapreg.h b/sys/dev/acpi/apei_mapreg.h new file mode 100644 index 000000000000..36efaee4fe06 --- /dev/null +++ b/sys/dev/acpi/apei_mapreg.h @@ -0,0 +1,46 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_MAPREG_H_ +#define _SYS_DEV_ACPI_APEI_MAPREG_H_ + +#include + +struct acpi_generic_address; +struct apei_mapreg; + +struct apei_mapreg *apei_mapreg_map(const struct acpi_generic_address *); +void apei_mapreg_unmap(const struct acpi_generic_address *, + struct apei_mapreg *); + +uint64_t apei_mapreg_read(const struct acpi_generic_address *, + const struct apei_mapreg *); +void apei_mapreg_write(const struct acpi_generic_address *, + struct apei_mapreg *, uint64_t); + +#endif /* _SYS_DEV_ACPI_APEI_MAPREG_H_ */ diff --git a/sys/dev/acpi/apei_reg.c b/sys/dev/acpi/apei_reg.c new file mode 100644 index 000000000000..ffc2b7d12790 --- /dev/null +++ b/sys/dev/acpi/apei_reg.c @@ -0,0 +1,103 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI register access for ERST/EINJ action instructions + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include + +/* + * apei_read_register(Register, Mask, &X) + * + * Read from Register, shifted out of position and then masked + * with Mask, and store the result in X. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#read-register + * + * (I'm guessing this applies to both ERST and EINJ, even though + * that section is under the ERST part.) + */ +ACPI_STATUS +apei_read_register(ACPI_GENERIC_ADDRESS *Register, uint64_t Mask, uint64_t *p) +{ + const uint8_t BitOffset = Register->BitOffset; + uint64_t X; + ACPI_STATUS rv; + + rv = AcpiRead(&X, Register); + if (ACPI_FAILURE(rv)) { + *p = 0; /* XXX */ + return rv; + } + X >>= BitOffset; + X &= Mask; + + *p = X; + return AE_OK; +} + +/* + * apei_write_register(Register, Mask, preserve_register, X) + * + * Write X, masked with Mask and shifted into position, to + * Register, preserving other bits if preserve_register is true. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#write-register + * + * Note: The Preserve Register semantics is based on the clearer + * indentation at + * https://uefi.org/sites/default/files/resources/ACPI_5_1release.pdf#page=714 + * which has been lost in more recent versions of the spec. + */ +ACPI_STATUS +apei_write_register(ACPI_GENERIC_ADDRESS *Register, uint64_t Mask, + bool preserve_register, uint64_t X) +{ + const uint8_t BitOffset = Register->BitOffset; + ACPI_STATUS rv; + + X &= Mask; + X <<= BitOffset; + if (preserve_register) { + uint64_t Y; + + rv = AcpiRead(&Y, Register); + if (ACPI_FAILURE(rv)) + return rv; + Y &= ~(Mask << BitOffset); + X |= Y; + } + return AcpiWrite(X, Register); +} diff --git a/sys/dev/acpi/apei_reg.h b/sys/dev/acpi/apei_reg.h new file mode 100644 index 000000000000..7afe09b2c2d1 --- /dev/null +++ b/sys/dev/acpi/apei_reg.h @@ -0,0 +1,40 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_REG_H_ +#define _SYS_DEV_ACPI_APEI_REG_H_ + +#include + +#include + +ACPI_STATUS apei_read_register(ACPI_GENERIC_ADDRESS *, uint64_t, uint64_t *); +ACPI_STATUS apei_write_register(ACPI_GENERIC_ADDRESS *, uint64_t, bool, + uint64_t); + +#endif /* _SYS_DEV_ACPI_APEI_REG_H_ */ diff --git a/sys/dev/acpi/apeivar.h b/sys/dev/acpi/apeivar.h new file mode 100644 index 000000000000..4729c2a890af --- /dev/null +++ b/sys/dev/acpi/apeivar.h @@ -0,0 +1,81 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEIVAR_H_ +#define _SYS_DEV_ACPI_APEIVAR_H_ + +#include + +#include + +#include +#include +#include +#include +#include + +struct sysctllog; +struct sysctlnode; + +/* + * struct apei_tab + * + * Collection of pointers to APEI-related ACPI tables. Used + * inside struct apei_softc, and by apei_match without an + * apei_softc. + */ +struct apei_tab { + ACPI_TABLE_BERT *bert; /* Boot Error Record Table */ + ACPI_TABLE_EINJ *einj; /* Error Injection Table */ + ACPI_TABLE_ERST *erst; /* Error Record Serialization Table */ + ACPI_TABLE_HEST *hest; /* Hardware Error Source Table */ +}; + +/* + * struct apei_softc + * + * All software state for APEI. + */ +struct apei_softc { + device_t sc_dev; + struct apei_tab sc_tab; + + struct sysctllog *sc_sysctllog; + const struct sysctlnode *sc_sysctlroot; + + struct apei_bert_softc sc_bert; + struct apei_einj_softc sc_einj; + struct apei_erst_softc sc_erst; + struct apei_hest_softc sc_hest; +}; + +uint32_t apei_gesb_report(struct apei_softc *, + const ACPI_HEST_GENERIC_STATUS *, size_t, const char *, + bool *); + +#endif /* _SYS_DEV_ACPI_APEIVAR_H_ */ diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi index 383b347f38d8..e657cfac4aff 100644 --- a/sys/dev/acpi/files.acpi +++ b/sys/dev/acpi/files.acpi @@ -310,4 +310,16 @@ file dev/acpi/igpio_acpi.c igpio_acpi attach dwcmmc at acpinodebus with dwcmmc_acpi file dev/acpi/dwcmmc_acpi.c dwcmmc_acpi +# ACPI Platform Error Interface +device apei +attach apei at apeibus +file dev/acpi/apei.c apei +file dev/acpi/apei_bert.c apei +file dev/acpi/apei_einj.c apei +file dev/acpi/apei_erst.c apei +file dev/acpi/apei_hest.c apei +file dev/acpi/apei_interp.c apei +file dev/acpi/apei_mapreg.c apei +file dev/acpi/apei_reg.c apei + include "dev/acpi/wmi/files.wmi" diff --git a/sys/modules/Makefile b/sys/modules/Makefile index d8b64a2134cf..bd8f36d6dfd8 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -292,6 +292,7 @@ SUBDIR+= acpivga SUBDIR+= acpiwdrt SUBDIR+= acpiwmi SUBDIR+= aibs +SUBDIR+= apei SUBDIR+= asus SUBDIR+= fujbp SUBDIR+= fujhk diff --git a/sys/modules/apei/Makefile b/sys/modules/apei/Makefile new file mode 100644 index 000000000000..909dab675de2 --- /dev/null +++ b/sys/modules/apei/Makefile @@ -0,0 +1,20 @@ +# $NetBSD$ +# + +.include "../Makefile.inc" + +.PATH: $S/dev/acpi + +KMOD= apei +IOCONF= apei.ioconf + +SRCS+= apei.c +SRCS+= apei_bert.c +SRCS+= apei_einj.c +SRCS+= apei_erst.c +SRCS+= apei_hest.c +SRCS+= apei_interp.c +SRCS+= apei_mapreg.c +SRCS+= apei_reg.c + +.include diff --git a/sys/modules/apei/apei.ioconf b/sys/modules/apei/apei.ioconf new file mode 100644 index 000000000000..d2099452d391 --- /dev/null +++ b/sys/modules/apei/apei.ioconf @@ -0,0 +1,11 @@ +# $NetBSD$ +# + +ioconf apei + +include "conf/files" +include "dev/acpi/files.acpi" + +pseudo-root apeibus* + +apei* at apeibus? From 37542c871c2098497516a564750ddf7c71be6f39 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Tue, 19 Mar 2024 02:20:11 +0000 Subject: [PATCH 3/3] acpihed(4): New driver for PNP0C33 to notify apei(4). PNP0C33 denotes the ACPI Hardware Error Device, which exists only to be a vector for event notifications. PR kern/58046 --- distrib/sets/lists/debug/module.md.amd64 | 2 + distrib/sets/lists/debug/module.md.i386 | 2 + distrib/sets/lists/modules/md.amd64 | 2 + distrib/sets/lists/modules/md.i386 | 2 + share/man/man4/acpihed.4 | 69 +++++++++++ share/man/man4/apei.4 | 1 + sys/arch/amd64/conf/ALL | 1 + sys/arch/i386/conf/ALL | 1 + sys/dev/acpi/acpi_hed.c | 143 +++++++++++++++++++++++ sys/dev/acpi/files.acpi | 5 + sys/modules/Makefile | 1 + sys/modules/acpihed/Makefile | 11 ++ sys/modules/acpihed/acpihed.ioconf | 11 ++ 13 files changed, 251 insertions(+) create mode 100644 share/man/man4/acpihed.4 create mode 100644 sys/dev/acpi/acpi_hed.c create mode 100644 sys/modules/acpihed/Makefile create mode 100644 sys/modules/acpihed/acpihed.ioconf diff --git a/distrib/sets/lists/debug/module.md.amd64 b/distrib/sets/lists/debug/module.md.amd64 index ac5758bbd180..e03304f046fa 100644 --- a/distrib/sets/lists/debug/module.md.amd64 +++ b/distrib/sets/lists/debug/module.md.amd64 @@ -11,6 +11,8 @@ ./usr/libdata/debug/@MODULEDIR@/acpidalb/acpidalb.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpifan modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpifan/acpifan.kmod.debug modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/acpihed modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/acpihed/acpihed.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpilid modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpilid/acpilid.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpipmtr modules-base-kernel kmod,debug diff --git a/distrib/sets/lists/debug/module.md.i386 b/distrib/sets/lists/debug/module.md.i386 index dcba7e40fce7..c99d0ae0201d 100644 --- a/distrib/sets/lists/debug/module.md.i386 +++ b/distrib/sets/lists/debug/module.md.i386 @@ -11,6 +11,8 @@ ./usr/libdata/debug/@MODULEDIR@/acpidalb/acpidalb.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpifan modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpifan/acpifan.kmod.debug modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/acpihed modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/acpihed/acpihed.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpilid modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpilid/acpilid.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpipmtr modules-base-kernel kmod,debug diff --git a/distrib/sets/lists/modules/md.amd64 b/distrib/sets/lists/modules/md.amd64 index 8b7c99c65e56..b5f32d939921 100644 --- a/distrib/sets/lists/modules/md.amd64 +++ b/distrib/sets/lists/modules/md.amd64 @@ -12,6 +12,8 @@ ./@MODULEDIR@/acpidalb/acpidalb.kmod modules-base-kernel kmod ./@MODULEDIR@/acpifan modules-base-kernel kmod ./@MODULEDIR@/acpifan/acpifan.kmod modules-base-kernel kmod +./@MODULEDIR@/acpihed modules-base-kernel kmod +./@MODULEDIR@/acpihed/acpihed.kmod modules-base-kernel kmod ./@MODULEDIR@/acpilid modules-base-kernel kmod ./@MODULEDIR@/acpilid/acpilid.kmod modules-base-kernel kmod ./@MODULEDIR@/acpipmtr modules-base-kernel kmod diff --git a/distrib/sets/lists/modules/md.i386 b/distrib/sets/lists/modules/md.i386 index a65537e9e260..900d4fde3f7b 100644 --- a/distrib/sets/lists/modules/md.i386 +++ b/distrib/sets/lists/modules/md.i386 @@ -12,6 +12,8 @@ ./@MODULEDIR@/acpidalb/acpidalb.kmod modules-base-kernel kmod ./@MODULEDIR@/acpifan modules-base-kernel kmod ./@MODULEDIR@/acpifan/acpifan.kmod modules-base-kernel kmod +./@MODULEDIR@/acpihed modules-base-kernel kmod +./@MODULEDIR@/acpihed/acpihed.kmod modules-base-kernel kmod ./@MODULEDIR@/acpilid modules-base-kernel kmod ./@MODULEDIR@/acpilid/acpilid.kmod modules-base-kernel kmod ./@MODULEDIR@/acpipmtr modules-base-kernel kmod diff --git a/share/man/man4/acpihed.4 b/share/man/man4/acpihed.4 new file mode 100644 index 000000000000..d7fc5bd38a66 --- /dev/null +++ b/share/man/man4/acpihed.4 @@ -0,0 +1,69 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2024 +.Dt APEI 4 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm acpihed +.Nd ACPI Hardware Error Device +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.Cd "acpihed* at acpi?" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +Certain hardware error sources that can be queried by +.Xr apei 4 +notify an ACPI node with PNP ID +.Sq Li PNP0C33 +when an error occurs. +The +.Nm +driver listens for these notifications and passes them on to +.Xr apei 4 +so it can report the error. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr acpi 4 , +.Xr apei 4 +.Rs +.%B ACPI Specification 6.5 +.%O Chapter 18: ACPI Platform Error Interfaces (APEI) +.%U https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +driver first appeared in +.Nx 11.0 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh AUTHORS +The +.Nm +driver was written by +.An Taylor R Campbell Aq Mt riastradh@NetBSD.org . diff --git a/share/man/man4/apei.4 b/share/man/man4/apei.4 index 7441361e04af..f38d402a6d70 100644 --- a/share/man/man4/apei.4 +++ b/share/man/man4/apei.4 @@ -85,6 +85,7 @@ Details of the hardware error sources can be dumped with .\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" .Sh SEE ALSO .Xr acpi 4 , +.Xr acpihed 4 , .Xr acpidump 8 .Rs .%B ACPI Specification 6.5 diff --git a/sys/arch/amd64/conf/ALL b/sys/arch/amd64/conf/ALL index 3304f50c4941..bd4e9871a8b5 100644 --- a/sys/arch/amd64/conf/ALL +++ b/sys/arch/amd64/conf/ALL @@ -368,6 +368,7 @@ acpiec* at acpi? # ACPI Embedded Controller (late) acpiecdt* at acpi? # ACPI Embedded Controller (early) options ACPIEC_DEBUG=-1 acpifan* at acpi? # ACPI Fan +acpihed* at acpi? # ACPI Hardware Error Device acpilid* at acpi? # ACPI Lid Switch acpipmtr* at acpi? # ACPI Power Meter (experimental) # XXX Do not enable this in a real kernel unless you also disable any diff --git a/sys/arch/i386/conf/ALL b/sys/arch/i386/conf/ALL index 60ea5675ecef..495938473688 100644 --- a/sys/arch/i386/conf/ALL +++ b/sys/arch/i386/conf/ALL @@ -355,6 +355,7 @@ acpiec* at acpi? # ACPI Embedded Controller (late) acpiecdt* at acpi? # ACPI Embedded Controller (early) options ACPIEC_DEBUG=-1 acpifan* at acpi? # ACPI Fan +acpihed* at acpi? # ACPI Hardware Error Device acpilid* at acpi? # ACPI Lid Switch acpipmtr* at acpi? # ACPI Power Meter (experimental) # XXX Do not enable this in a real kernel unless you also disable any diff --git a/sys/dev/acpi/acpi_hed.c b/sys/dev/acpi/acpi_hed.c new file mode 100644 index 000000000000..f5e77cf8c661 --- /dev/null +++ b/sys/dev/acpi/acpi_hed.c @@ -0,0 +1,143 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * HED: Hardware Error Device, PNP0C33. + * + * This device serves only to receive notifications about hardware + * errors, which we then dispatch to apei(4). + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("acpi_hed") + +struct acpihed_softc { + device_t sc_dev; + struct acpi_devnode *sc_node; +}; + +static const struct device_compatible_entry compat_data[] = { + { .compat = "PNP0C33" }, + DEVICE_COMPAT_EOL +}; + +static int acpihed_match(device_t, cfdata_t, void *); +static void acpihed_attach(device_t, device_t, void *); +static int acpihed_detach(device_t, int); +static void acpihed_notify(ACPI_HANDLE, uint32_t, void *); + +CFATTACH_DECL_NEW(acpihed, sizeof(struct acpihed_softc), + acpihed_match, acpihed_attach, acpihed_detach, NULL); + +static int +acpihed_match(device_t parent, cfdata_t match, void *aux) +{ + struct acpi_attach_args *aa = aux; + + return acpi_compatible_match(aa, compat_data); +} + +static void +acpihed_attach(device_t parent, device_t self, void *aux) +{ + struct acpihed_softc *sc = device_private(self); + struct acpi_attach_args *aa = aux; + + aprint_naive("\n"); + aprint_normal(": ACPI Hardware Error Device\n"); + + pmf_device_register(self, NULL, NULL); + + sc->sc_dev = self; + sc->sc_node = aa->aa_node; + + acpi_register_notify(sc->sc_node, acpihed_notify); +} + +static int +acpihed_detach(device_t self, int flags) +{ + struct acpihed_softc *sc = device_private(self); + int error; + + error = config_detach_children(self, flags); + if (error) + return error; + + acpi_deregister_notify(sc->sc_node); + + pmf_device_deregister(self); + + return 0; +} + +static void +acpihed_notify(ACPI_HANDLE handle, uint32_t event, void *cookie) +{ + + apei_hed_notify(); +} + +MODULE(MODULE_CLASS_DRIVER, acpihed, "apei"); + +#ifdef _MODULE +#include "ioconf.c" +#endif + +static int +acpihed_modcmd(modcmd_t cmd, void *opaque) +{ + int error = 0; + + switch (cmd) { + case MODULE_CMD_INIT: +#ifdef _MODULE + error = config_init_component(cfdriver_ioconf_acpihed, + cfattach_ioconf_acpihed, cfdata_ioconf_acpihed); +#endif + return error; + case MODULE_CMD_FINI: +#ifdef _MODULE + error = config_fini_component(cfdriver_ioconf_acpihed, + cfattach_ioconf_acpihed, cfdata_ioconf_acpihed); +#endif + return error; + default: + return ENOTTY; + } +} diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi index e657cfac4aff..a30dfbc009f0 100644 --- a/sys/dev/acpi/files.acpi +++ b/sys/dev/acpi/files.acpi @@ -322,4 +322,9 @@ file dev/acpi/apei_interp.c apei file dev/acpi/apei_mapreg.c apei file dev/acpi/apei_reg.c apei +# ACPI Hardware Error Device +device acpihed: apei +attach acpihed at acpinodebus +file dev/acpi/acpi_hed.c acpihed + include "dev/acpi/wmi/files.wmi" diff --git a/sys/modules/Makefile b/sys/modules/Makefile index bd8f36d6dfd8..57313bc5324e 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -285,6 +285,7 @@ SUBDIR+= acpibut SUBDIR+= acpicpu SUBDIR+= acpidalb SUBDIR+= acpifan +SUBDIR+= acpihed SUBDIR+= acpilid SUBDIR+= acpipmtr SUBDIR+= acpitz diff --git a/sys/modules/acpihed/Makefile b/sys/modules/acpihed/Makefile new file mode 100644 index 000000000000..7f23c86050a6 --- /dev/null +++ b/sys/modules/acpihed/Makefile @@ -0,0 +1,11 @@ +# $NetBSD$ + +.include "../Makefile.inc" + +.PATH: $S/dev/acpi + +KMOD= acpihed +IOCONF= acpihed.ioconf +SRCS= acpi_hed.c + +.include diff --git a/sys/modules/acpihed/acpihed.ioconf b/sys/modules/acpihed/acpihed.ioconf new file mode 100644 index 000000000000..8edb2783bc6b --- /dev/null +++ b/sys/modules/acpihed/acpihed.ioconf @@ -0,0 +1,11 @@ +# $NetBSD$ +# + +ioconf acpihed + +include "conf/files" +include "dev/acpi/files.acpi" + +pseudo-root acpi* + +acpihed* at acpi?