diff --git a/distrib/sets/lists/debug/module.md.amd64 b/distrib/sets/lists/debug/module.md.amd64 index fa740faa50b5..e03304f046fa 100644 --- a/distrib/sets/lists/debug/module.md.amd64 +++ b/distrib/sets/lists/debug/module.md.amd64 @@ -11,6 +11,8 @@ ./usr/libdata/debug/@MODULEDIR@/acpidalb/acpidalb.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpifan modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpifan/acpifan.kmod.debug modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/acpihed modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/acpihed/acpihed.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpilid modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpilid/acpilid.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpipmtr modules-base-kernel kmod,debug @@ -35,6 +37,8 @@ ./usr/libdata/debug/@MODULEDIR@/amdtemp/amdtemp.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/amdzentemp modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/amdzentemp/amdzentemp.kmod.debug modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/apei modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/apei/apei.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/aps modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/aps/aps.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/asus modules-base-kernel kmod,debug diff --git a/distrib/sets/lists/debug/module.md.i386 b/distrib/sets/lists/debug/module.md.i386 index 431139a0991f..c99d0ae0201d 100644 --- a/distrib/sets/lists/debug/module.md.i386 +++ b/distrib/sets/lists/debug/module.md.i386 @@ -11,6 +11,8 @@ ./usr/libdata/debug/@MODULEDIR@/acpidalb/acpidalb.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpifan modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpifan/acpifan.kmod.debug modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/acpihed modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/acpihed/acpihed.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpilid modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpilid/acpilid.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/acpipmtr modules-base-kernel kmod,debug @@ -35,6 +37,8 @@ ./usr/libdata/debug/@MODULEDIR@/amdtemp/amdtemp.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/amdzentemp modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/amdzentemp/amdzentemp.kmod.debug modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/apei modules-base-kernel kmod,debug +./usr/libdata/debug/@MODULEDIR@/apei/apei.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/aps modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/aps/aps.kmod.debug modules-base-kernel kmod,debug ./usr/libdata/debug/@MODULEDIR@/asus modules-base-kernel kmod,debug diff --git a/distrib/sets/lists/modules/md.amd64 b/distrib/sets/lists/modules/md.amd64 index 6a04608c36e7..b5f32d939921 100644 --- a/distrib/sets/lists/modules/md.amd64 +++ b/distrib/sets/lists/modules/md.amd64 @@ -12,6 +12,8 @@ ./@MODULEDIR@/acpidalb/acpidalb.kmod modules-base-kernel kmod ./@MODULEDIR@/acpifan modules-base-kernel kmod ./@MODULEDIR@/acpifan/acpifan.kmod modules-base-kernel kmod +./@MODULEDIR@/acpihed modules-base-kernel kmod +./@MODULEDIR@/acpihed/acpihed.kmod modules-base-kernel kmod ./@MODULEDIR@/acpilid modules-base-kernel kmod ./@MODULEDIR@/acpilid/acpilid.kmod modules-base-kernel kmod ./@MODULEDIR@/acpipmtr modules-base-kernel kmod @@ -36,6 +38,8 @@ ./@MODULEDIR@/amdtemp/amdtemp.kmod modules-base-kernel kmod ./@MODULEDIR@/amdzentemp modules-base-kernel kmod ./@MODULEDIR@/amdzentemp/amdzentemp.kmod modules-base-kernel kmod +./@MODULEDIR@/apei modules-base-kernel kmod +./@MODULEDIR@/apei/apei.kmod modules-base-kernel kmod ./@MODULEDIR@/aps modules-base-kernel kmod ./@MODULEDIR@/aps/aps.kmod modules-base-kernel kmod ./@MODULEDIR@/asus modules-base-kernel kmod diff --git a/distrib/sets/lists/modules/md.i386 b/distrib/sets/lists/modules/md.i386 index 6536f8ad8936..900d4fde3f7b 100644 --- a/distrib/sets/lists/modules/md.i386 +++ b/distrib/sets/lists/modules/md.i386 @@ -12,6 +12,8 @@ ./@MODULEDIR@/acpidalb/acpidalb.kmod modules-base-kernel kmod ./@MODULEDIR@/acpifan modules-base-kernel kmod ./@MODULEDIR@/acpifan/acpifan.kmod modules-base-kernel kmod +./@MODULEDIR@/acpihed modules-base-kernel kmod +./@MODULEDIR@/acpihed/acpihed.kmod modules-base-kernel kmod ./@MODULEDIR@/acpilid modules-base-kernel kmod ./@MODULEDIR@/acpilid/acpilid.kmod modules-base-kernel kmod ./@MODULEDIR@/acpipmtr modules-base-kernel kmod @@ -36,6 +38,8 @@ ./@MODULEDIR@/amdtemp/amdtemp.kmod modules-base-kernel kmod ./@MODULEDIR@/amdzentemp modules-base-kernel kmod ./@MODULEDIR@/amdzentemp/amdzentemp.kmod modules-base-kernel kmod +./@MODULEDIR@/apei modules-base-kernel kmod +./@MODULEDIR@/apei/apei.kmod modules-base-kernel kmod ./@MODULEDIR@/aps modules-base-kernel kmod ./@MODULEDIR@/aps/aps.kmod modules-base-kernel kmod ./@MODULEDIR@/asus modules-base-kernel kmod diff --git a/share/man/man4/acpihed.4 b/share/man/man4/acpihed.4 new file mode 100644 index 000000000000..d7fc5bd38a66 --- /dev/null +++ b/share/man/man4/acpihed.4 @@ -0,0 +1,69 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2024 +.Dt APEI 4 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm acpihed +.Nd ACPI Hardware Error Device +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.Cd "acpihed* at acpi?" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +Certain hardware error sources that can be queried by +.Xr apei 4 +notify an ACPI node with PNP ID +.Sq Li PNP0C33 +when an error occurs. +The +.Nm +driver listens for these notifications and passes them on to +.Xr apei 4 +so it can report the error. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr acpi 4 , +.Xr apei 4 +.Rs +.%B ACPI Specification 6.5 +.%O Chapter 18: ACPI Platform Error Interfaces (APEI) +.%U https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +driver first appeared in +.Nx 11.0 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh AUTHORS +The +.Nm +driver was written by +.An Taylor R Campbell Aq Mt riastradh@NetBSD.org . diff --git a/share/man/man4/apei.4 b/share/man/man4/apei.4 new file mode 100644 index 000000000000..f38d402a6d70 --- /dev/null +++ b/share/man/man4/apei.4 @@ -0,0 +1,133 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2024 +.Dt APEI 4 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm apei +.Nd ACPI Platform Error Interfaces +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.Cd "apei* at apeibus?" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +.Nm +reports hardware errors discovered through +.Tn APEI , +the +.Tn ACPI +Platform Error Interfaces. +.Pp +.Nm +also supports injecting errors. +.\" .Nm +.\" also supports reading/writing/clearing error records in a persistent +.\" firmware store (XXX not yet: nothing uses the ERST). +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DIAGNOSTICS +When the hardware detects an error and reports it to +.Nm , +it will print information about the error to the console. +.Pp +Example of a correctable memory error, automatically corrected by the +system, with no further intervention needed: +.Bd -literal +apei0: error source 1 reported hardware error: severity=corrected nentries=1 status=0x12 +apei0: error source 1 entry 0: SectionType={0xa5bc1114,0x6f64,0x4ede,0xb8b8,{0x3e,0x83,0xed,0x7c,0x83,0xb1}} (memory error) +apei0: error source 1 entry 0: ErrorSeverity=2 (corrected) +apei0: error source 1 entry 0: Revision=0x201 +apei0: error source 1 entry 0: Flags=0x1 +apei0: error source 1 entry 0: FruText=CorrectedErr +apei0: error source 1 entry 0: MemoryErrorType=8 (PARITY_ERROR) +.Pp +Example of a fatal uncorrectable memory error: +.Bd -literal +apei0: error source 0 reported hardware error: severity=fatal nentries=1 status=0x11 +apei0: error source 0 entry 0: SectionType={0xa5bc1114,0x6f64,0x4ede,0xb8b8,{0x3e,0x83,0xed,0x7c,0x83,0xb1}} (memory error) +apei0: error source 0 entry 0: ErrorSeverity=1 (fatal) +apei0: error source 0 entry 0: Revision=0x201 +apei0: error source 0 entry 0: Flags=0x1 +apei0: error source 0 entry 0: FruText=UncorrectedErr +apei0: error source 0 entry 0: ErrorStatus=0x400 +apei0: error source 0 entry 0: Node=0x0 +apei0: error source 0 entry 0: Module=0x0 +apei0: error source 0 entry 0: Device=0x0 +panic: fatal hardware error +.Ed +.Pp +Details of the hardware error sources can be dumped with +.Xr acpidump 8 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr acpi 4 , +.Xr acpihed 4 , +.Xr acpidump 8 +.Rs +.%B ACPI Specification 6.5 +.%O Chapter 18: ACPI Platform Error Interfaces (APEI) +.%U https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +driver first appeared in +.Nx 11.0 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh AUTHORS +The +.Nm +driver was written by +.An Taylor R Campbell Aq Mt riastradh@NetBSD.org . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh BUGS +No sysctl interface to read BERT after boot. +.Pp +No simple sysctl interface to inject errors with EINJ, or any way to +inject errors at physical addresses in pages allocated for testing. +Perhaps there should be a separate kernel module for that. +.Pp +Nothing reads, writes, or clears ERST. +.Nx +could use it to store dmesg or other diagnostic information on panic. +.Pp +Many hardware error source types in the HEST are missing, such as +.Tn PCIe +errors. +.Pp +.Nm +is not wired to any machine-dependent machine check exception +notifications. +.Pp +No formal log format or sysctl/device interface that programs can +reliably act on. +.Pp +.Nx +makes no attempt to recover from uncorrectable but recoverable errors, +such as discarding a clean cached page where an uncorrectable memory +error has occurred. diff --git a/sys/arch/amd64/conf/ALL b/sys/arch/amd64/conf/ALL index 950f3a011cb4..bd4e9871a8b5 100644 --- a/sys/arch/amd64/conf/ALL +++ b/sys/arch/amd64/conf/ALL @@ -368,6 +368,7 @@ acpiec* at acpi? # ACPI Embedded Controller (late) acpiecdt* at acpi? # ACPI Embedded Controller (early) options ACPIEC_DEBUG=-1 acpifan* at acpi? # ACPI Fan +acpihed* at acpi? # ACPI Hardware Error Device acpilid* at acpi? # ACPI Lid Switch acpipmtr* at acpi? # ACPI Power Meter (experimental) # XXX Do not enable this in a real kernel unless you also disable any @@ -379,6 +380,7 @@ acpivga* at acpi? # ACPI Display Adapter acpiout* at acpivga? # ACPI Display Output Device acpiwdrt* at acpi? # ACPI Watchdog Resource Table acpiwmi* at acpi? # ACPI WMI Mapper +apei* at apeibus? # ACPI Platform Error Interfaces # Mainboard devices aibs* at acpi? # ASUSTeK AI Booster hardware monitor diff --git a/sys/arch/i386/conf/ALL b/sys/arch/i386/conf/ALL index 6b419fba1cce..495938473688 100644 --- a/sys/arch/i386/conf/ALL +++ b/sys/arch/i386/conf/ALL @@ -355,6 +355,7 @@ acpiec* at acpi? # ACPI Embedded Controller (late) acpiecdt* at acpi? # ACPI Embedded Controller (early) options ACPIEC_DEBUG=-1 acpifan* at acpi? # ACPI Fan +acpihed* at acpi? # ACPI Hardware Error Device acpilid* at acpi? # ACPI Lid Switch acpipmtr* at acpi? # ACPI Power Meter (experimental) # XXX Do not enable this in a real kernel unless you also disable any @@ -366,6 +367,7 @@ acpivga* at acpi? # ACPI Display Adapter acpiout* at acpivga? # ACPI Display Output Device acpiwdrt* at acpi? # ACPI Watchdog Resource Table acpiwmi* at acpi? # ACPI WMI Mapper +apei* at apeibus? # ACPI Platform Error Interfaces # Mainboard devices aibs* at acpi? # ASUSTeK AI Booster hardware monitor diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c index 4ade9c47efa3..24e023373e0b 100644 --- a/sys/dev/acpi/acpi.c +++ b/sys/dev/acpi/acpi.c @@ -636,6 +636,9 @@ acpi_childdet(device_t self, device_t child) if (sc->sc_wdrt == child) sc->sc_wdrt = NULL; + if (sc->sc_apei == child) + sc->sc_apei = NULL; + SIMPLEQ_FOREACH(ad, &sc->sc_head, ad_list) { if (ad->ad_device == child) @@ -923,6 +926,11 @@ acpi_rescan(device_t self, const char *ifattr, const int *locators) CFARGS(.iattr = "acpiwdrtbus")); } + if (ifattr_match(ifattr, "apeibus") && sc->sc_apei == NULL) { + sc->sc_apei = config_found(sc->sc_dev, NULL, NULL, + CFARGS(.iattr = "apeibus")); + } + return 0; } diff --git a/sys/dev/acpi/acpi_hed.c b/sys/dev/acpi/acpi_hed.c new file mode 100644 index 000000000000..f5e77cf8c661 --- /dev/null +++ b/sys/dev/acpi/acpi_hed.c @@ -0,0 +1,143 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * HED: Hardware Error Device, PNP0C33. + * + * This device serves only to receive notifications about hardware + * errors, which we then dispatch to apei(4). + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("acpi_hed") + +struct acpihed_softc { + device_t sc_dev; + struct acpi_devnode *sc_node; +}; + +static const struct device_compatible_entry compat_data[] = { + { .compat = "PNP0C33" }, + DEVICE_COMPAT_EOL +}; + +static int acpihed_match(device_t, cfdata_t, void *); +static void acpihed_attach(device_t, device_t, void *); +static int acpihed_detach(device_t, int); +static void acpihed_notify(ACPI_HANDLE, uint32_t, void *); + +CFATTACH_DECL_NEW(acpihed, sizeof(struct acpihed_softc), + acpihed_match, acpihed_attach, acpihed_detach, NULL); + +static int +acpihed_match(device_t parent, cfdata_t match, void *aux) +{ + struct acpi_attach_args *aa = aux; + + return acpi_compatible_match(aa, compat_data); +} + +static void +acpihed_attach(device_t parent, device_t self, void *aux) +{ + struct acpihed_softc *sc = device_private(self); + struct acpi_attach_args *aa = aux; + + aprint_naive("\n"); + aprint_normal(": ACPI Hardware Error Device\n"); + + pmf_device_register(self, NULL, NULL); + + sc->sc_dev = self; + sc->sc_node = aa->aa_node; + + acpi_register_notify(sc->sc_node, acpihed_notify); +} + +static int +acpihed_detach(device_t self, int flags) +{ + struct acpihed_softc *sc = device_private(self); + int error; + + error = config_detach_children(self, flags); + if (error) + return error; + + acpi_deregister_notify(sc->sc_node); + + pmf_device_deregister(self); + + return 0; +} + +static void +acpihed_notify(ACPI_HANDLE handle, uint32_t event, void *cookie) +{ + + apei_hed_notify(); +} + +MODULE(MODULE_CLASS_DRIVER, acpihed, "apei"); + +#ifdef _MODULE +#include "ioconf.c" +#endif + +static int +acpihed_modcmd(modcmd_t cmd, void *opaque) +{ + int error = 0; + + switch (cmd) { + case MODULE_CMD_INIT: +#ifdef _MODULE + error = config_init_component(cfdriver_ioconf_acpihed, + cfattach_ioconf_acpihed, cfdata_ioconf_acpihed); +#endif + return error; + case MODULE_CMD_FINI: +#ifdef _MODULE + error = config_fini_component(cfdriver_ioconf_acpihed, + cfattach_ioconf_acpihed, cfdata_ioconf_acpihed); +#endif + return error; + default: + return ENOTTY; + } +} diff --git a/sys/dev/acpi/acpivar.h b/sys/dev/acpi/acpivar.h index 9dafef7f7ac5..ddce8cdd9c18 100644 --- a/sys/dev/acpi/acpivar.h +++ b/sys/dev/acpi/acpivar.h @@ -177,6 +177,13 @@ struct acpi_softc { struct sysmon_pswitch sc_smpsw_sleep; SIMPLEQ_HEAD(, acpi_devnode) sc_head; + + /* + * Move this section to the other pseudo-bus child pointers + * after pullup -- putting it here avoids potential ABI + * compatibility issues with kernel modules. + */ + device_t sc_apei; /* apei(4) pseudo-bus */ }; /* diff --git a/sys/dev/acpi/apei.c b/sys/dev/acpi/apei.c new file mode 100644 index 000000000000..fa6865db67d7 --- /dev/null +++ b/sys/dev/acpi/apei.c @@ -0,0 +1,929 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI: ACPI Platform Error Interface + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html + * + * XXX dtrace probes + * + * XXX call _OSC appropriately to announce to the platform that we, the + * OSPM, support APEI + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +static int apei_match(device_t, cfdata_t, void *); +static void apei_attach(device_t, device_t, void *); +static int apei_detach(device_t, int); + +static void apei_get_tables(struct apei_tab *); +static void apei_put_tables(struct apei_tab *); + +static void apei_identify(struct apei_softc *, const char *, + const ACPI_TABLE_HEADER *); + +CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc), + apei_match, apei_attach, apei_detach, NULL); + +static int +apei_match(device_t parent, cfdata_t match, void *aux) +{ + struct apei_tab tab; + int prio = 0; + + /* + * If we have any of the APEI tables, match. + */ + apei_get_tables(&tab); + if (tab.bert || tab.einj || tab.erst || tab.hest) + prio = 1; + apei_put_tables(&tab); + + return prio; +} + +static void +apei_attach(device_t parent, device_t self, void *aux) +{ + struct apei_softc *sc = device_private(self); + const struct sysctlnode *sysctl_hw_acpi; + int error; + + aprint_naive("\n"); + aprint_normal(": ACPI Platform Error Interface\n"); + + pmf_device_register(self, NULL, NULL); + + sc->sc_dev = self; + apei_get_tables(&sc->sc_tab); + + /* + * Get the sysctl hw.acpi node. This should already be created + * but I don't see an easy way to get at it. If this fails, + * something is seriously wrong, so let's stop here. + */ + error = sysctl_createv(&sc->sc_sysctllog, 0, + NULL, &sysctl_hw_acpi, 0, + CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0, + CTL_HW, CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, + "failed to create sysctl hw.acpi: %d\n", error); + return; + } + + /* + * Create sysctl hw.acpi.apei. + */ + error = sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_hw_acpi, &sc->sc_sysctlroot, 0, + CTLTYPE_NODE, "apei", + SYSCTL_DESCR("ACPI Platform Error Interface"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, + "failed to create sysctl hw.acpi.apei: %d\n", error); + return; + } + + /* + * Set up BERT, EINJ, ERST, and HEST. + */ + if (sc->sc_tab.bert) { + apei_identify(sc, "BERT", &sc->sc_tab.bert->Header); + apei_bert_attach(sc); + } + if (sc->sc_tab.einj) { + apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header); + apei_einj_attach(sc); + } + if (sc->sc_tab.erst) { + apei_identify(sc, "ERST", &sc->sc_tab.erst->Header); + apei_erst_attach(sc); + } + if (sc->sc_tab.hest) { + apei_identify(sc, "HEST", &sc->sc_tab.hest->Header); + apei_hest_attach(sc); + } +} + +static int +apei_detach(device_t self, int flags) +{ + struct apei_softc *sc = device_private(self); + int error; + + /* + * Detach children. We don't currently have any but this is + * harmless without children and mandatory if we ever sprouted + * them, so let's just leave it here for good measure. + * + * After this point, we are committed to detaching; failure is + * forbidden. + */ + error = config_detach_children(self, flags); + if (error) + return error; + + /* + * Tear down all the sysctl nodes first, before the software + * state backing them goes away. + */ + sysctl_teardown(&sc->sc_sysctllog); + sc->sc_sysctlroot = NULL; + + /* + * Detach the software state for the APEI tables. + */ + if (sc->sc_tab.hest) + apei_hest_detach(sc); + if (sc->sc_tab.erst) + apei_erst_detach(sc); + if (sc->sc_tab.einj) + apei_einj_detach(sc); + if (sc->sc_tab.bert) + apei_bert_detach(sc); + + /* + * Release the APEI tables and we're done. + */ + apei_put_tables(&sc->sc_tab); + pmf_device_deregister(self); + return 0; +} + +/* + * apei_get_tables(tab) + * + * Get references to whichever APEI-related tables -- BERT, EINJ, + * ERST, HEST -- are available in the system. + */ +static void +apei_get_tables(struct apei_tab *tab) +{ + ACPI_STATUS rv; + + /* + * Probe the BERT -- Boot Error Record Table. + */ + rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert); + if (ACPI_FAILURE(rv)) + tab->bert = NULL; + + /* + * Probe the EINJ -- Error Injection Table. + */ + rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj); + if (ACPI_FAILURE(rv)) + tab->einj = NULL; + + /* + * Probe the ERST -- Error Record Serialization Table. + */ + rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst); + if (ACPI_FAILURE(rv)) + tab->erst = NULL; + + /* + * Probe the HEST -- Hardware Error Source Table. + */ + rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest); + if (ACPI_FAILURE(rv)) + tab->hest = NULL; +} + +/* + * apei_put_tables(tab) + * + * Release the tables acquired by apei_get_tables. + */ +static void +apei_put_tables(struct apei_tab *tab) +{ + + if (tab->bert != NULL) { + AcpiPutTable(&tab->bert->Header); + tab->bert = NULL; + } + if (tab->einj != NULL) { + AcpiPutTable(&tab->einj->Header); + tab->einj = NULL; + } + if (tab->erst != NULL) { + AcpiPutTable(&tab->erst->Header); + tab->erst = NULL; + } + if (tab->hest != NULL) { + AcpiPutTable(&tab->hest->Header); + tab->hest = NULL; + } +} + +/* + * apei_identify(sc, name, header) + * + * Identify the APEI-related table header for dmesg. + */ +static void +apei_identify(struct apei_softc *sc, const char *name, + const ACPI_TABLE_HEADER *h) +{ + + aprint_normal_dev(sc->sc_dev, "%s:" + " OemId <%6.6s,%8.8s,%08x>" + " AslId <%4.4s,%08x>\n", + name, + h->OemId, h->OemTableId, h->OemRevision, + h->AslCompilerId, h->AslCompilerRevision); +} + +/* + * apei_cper_guid_dec(buf, uuid) + * + * Decode a Common Platform Error Record UUID/GUID from an ACPI + * table at buf into a sys/uuid.h struct uuid. + */ +static void +apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid) +{ + + uuid_dec_le(buf, uuid); +} + +/* + * apei_format_guid(uuid, s) + * + * Format a UUID as a string. This uses C initializer notation, + * not UUID notation, in order to match what the text in the UEFI + * specification. + */ +static void +apei_format_guid(const struct uuid *uuid, char guidstr[static 69]) +{ + + snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x," + "0x%02x%02x," + "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}", + uuid->time_low, uuid->time_mid, uuid->time_hi_and_version, + uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved, + uuid->node[0], uuid->node[1], uuid->node[2], + uuid->node[3], uuid->node[4], uuid->node[5]); +} + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + */ + +static const char *const cper_memory_error_type[] = { +#define F(LN, SN, V) [LN] = #SN, + CPER_MEMORY_ERROR_TYPES(F) +#undef F +}; + +/* + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block + * + * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this + * but are designated as being intended for Generic Error Data Entries + * rather than Generic Error Status Blocks. + */ +static const char *const apei_gesb_severity[] = { + [0] = "recoverable", + [1] = "fatal", + [2] = "corrected", + [3] = "none", +}; + +/* + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry + */ +static const char *const apei_gede_severity[] = { + [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable", + [ACPI_HEST_GEN_ERROR_FATAL] = "fatal", + [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected", + [ACPI_HEST_GEN_ERROR_NONE] = "none", +}; + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + */ +static const struct uuid CPER_MEMORY_ERROR_SECTION = + {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}}; + +static void +apei_cper_memory_error_report(struct apei_softc *sc, const void *buf, + size_t len, const char *ctx) +{ + const struct cper_memory_error *ME = buf; + char bitbuf[1024]; + + snprintb(bitbuf, sizeof(bitbuf), + CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->ValidationBits); + aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf); + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ERROR_STATUS) { + /* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-status + */ + /* XXX define this format somewhere */ + snprintb(bitbuf, sizeof(bitbuf), "\177\020" + "f\010\010" "ErrorType\0" + "=\001" "ERR_INTERNAL\0" + "=\004" "ERR_MEM\0" + "=\005" "ERR_TLB\0" + "=\006" "ERR_CACHE\0" + "=\007" "ERR_FUNCTION\0" + "=\010" "ERR_SELFTEST\0" + "=\011" "ERR_FLOW\0" + "=\020" "ERR_BUS\0" + "=\021" "ERR_MAP\0" + "=\022" "ERR_IMPROPER\0" + "=\023" "ERR_UNIMPL\0" + "=\024" "ERR_LOL\0" + "=\025" "ERR_RESPONSE\0" + "=\026" "ERR_PARITY\0" + "=\027" "ERR_PROTOCOL\0" + "=\030" "ERR_ERROR\0" + "=\031" "ERR_TIMEOUT\0" + "=\032" "ERR_POISONED\0" + "b\020" "AddressError\0" + "b\021" "ControlError\0" + "b\022" "DataError\0" + "b\023" "ResponderDetected\0" + "b\024" "RequesterDetected\0" + "b\025" "FirstError\0" + "b\026" "Overflow\0" + "\0", ME->ErrorStatus); + device_printf(sc->sc_dev, "%s: ErrorStatus=%s\n", ctx, bitbuf); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS) { + device_printf(sc->sc_dev, "%s: PhysicalAddress=0x%"PRIx64"\n", + ctx, ME->PhysicalAddress); + } + if (ME->ValidationBits & + CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK) { + device_printf(sc->sc_dev, "%s: PhysicalAddressMask=0x%"PRIx64 + "\n", ctx, ME->PhysicalAddressMask); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_NODE) { + device_printf(sc->sc_dev, "%s: Node=0x%"PRIx16"\n", ctx, + ME->Node); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_CARD) { + device_printf(sc->sc_dev, "%s: Card=0x%"PRIx16"\n", ctx, + ME->Card); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MODULE) { + device_printf(sc->sc_dev, "%s: Module=0x%"PRIx16"\n", ctx, + ME->Module); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BANK) { + device_printf(sc->sc_dev, "%s: Bank=0x%"PRIx16"\n", ctx, + ME->Bank); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_DEVICE) { + device_printf(sc->sc_dev, "%s: Device=0x%"PRIx16"\n", ctx, + ME->Device); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_ROW) { + device_printf(sc->sc_dev, "%s: Row=0x%"PRIx16"\n", ctx, + ME->Row); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_COLUMN) { + device_printf(sc->sc_dev, "%s: Column=0x%"PRIx16"\n", ctx, + ME->Column); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_BIT_POSITION) { + device_printf(sc->sc_dev, "%s: BitPosition=0x%"PRIx16"\n", + ctx, ME->BitPosition); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_REQUESTOR_ID) { + device_printf(sc->sc_dev, "%s: RequestorId=0x%"PRIx64"\n", + ctx, ME->RequestorId); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_RESPONDER_ID) { + device_printf(sc->sc_dev, "%s: ResponderId=0x%"PRIx64"\n", + ctx, ME->ResponderId); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_TARGET_ID) { + device_printf(sc->sc_dev, "%s: TargetId=0x%"PRIx64"\n", + ctx, ME->TargetId); + } + if (ME->ValidationBits & CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE) { + const uint8_t t = ME->MemoryErrorType; + const char *n = t < __arraycount(cper_memory_error_type) + ? cper_memory_error_type[t] : NULL; + + if (n) { + device_printf(sc->sc_dev, "%s: MemoryErrorType=%d" + " (%s)\n", ctx, t, n); + } else { + device_printf(sc->sc_dev, "%s: MemoryErrorType=%d\n", + ctx, t); + } + } +} + +/* + * apei_cper_reports + * + * Table of known Common Platform Error Record types, symbolic + * names, minimum data lengths, and functions to report them. + * + * The section types and corresponding section layouts are listed + * at: + * + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html + */ +static const struct apei_cper_report { + const char *name; + const struct uuid *type; + size_t minlength; + void (*func)(struct apei_softc *, const void *, size_t, const char *); +} apei_cper_reports[] = { + { "memory", &CPER_MEMORY_ERROR_SECTION, + sizeof(struct cper_memory_error), + apei_cper_memory_error_report }, +}; + +/* + * apei_gede_report_header(sc, gede, ctx, &headerlen, &report) + * + * Report the header of the ith Generic Error Data Entry in the + * given context. + * + * Return the actual length of the header in headerlen, or 0 if + * not known because the revision isn't recognized. + * + * Return the report type in report, or NULL if not known because + * the section type isn't recognized. + */ +static void +apei_gede_report_header(struct apei_softc *sc, + const ACPI_HEST_GENERIC_DATA *gede, const char *ctx, + size_t *headerlenp, const struct apei_cper_report **reportp) +{ + const ACPI_HEST_GENERIC_DATA_V300 *const gede_v3 = (const void *)gede; + struct uuid sectype; + char guidstr[69]; + char buf[128]; + unsigned i; + + /* + * Print the section type as a C initializer. It would be + * prettier to use standard hyphenated UUID notation, but that + * notation is slightly ambiguous here (two octets could be + * written either way, depending on Microsoft convention -- + * which influenced ACPI and UEFI -- or internet convention), + * and the UEFI spec writes the C initializer notation, so this + * makes it easier to search for. + * + * Also print out a symbolic name, if we know it. + */ + apei_cper_guid_dec(gede->SectionType, §ype); + apei_format_guid(§ype, guidstr); + for (i = 0; i < __arraycount(apei_cper_reports); i++) { + const struct apei_cper_report *const report = + &apei_cper_reports[i]; + + if (memcmp(§ype, report->type, sizeof(sectype)) != 0) + continue; + device_printf(sc->sc_dev, "%s: SectionType=%s (%s error)\n", + ctx, guidstr, report->name); + *reportp = report; + break; + } + if (i == __arraycount(apei_cper_reports)) { + device_printf(sc->sc_dev, "%s: SectionType=%s\n", ctx, + guidstr); + *reportp = NULL; + } + + /* + * Print the numeric severity and, if we have it, a symbolic + * name for it. + */ + device_printf(sc->sc_dev, "%s: ErrorSeverity=%"PRIu32" (%s)\n", ctx, + gede->ErrorSeverity, + (gede->ErrorSeverity < __arraycount(apei_gede_severity) + ? apei_gede_severity[gede->ErrorSeverity] + : "unknown")); + + /* + * The Revision may not often be useful, but this is only ever + * shown at the time of a hardware error report, not something + * you can glean at your convenience with acpidump. So print + * it anyway. + */ + device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", ctx, + gede->Revision); + + /* + * Don't touch anything past the Revision until we've + * determined we understand it. Return the header length to + * the caller, or return zero -- and stop here -- if we don't + * know what the actual header length is. + */ + if (gede->Revision < 0x0300) { + *headerlenp = sizeof(*gede); + } else if (gede->Revision < 0x0400) { + *headerlenp = sizeof(*gede_v3); + } else { + *headerlenp = 0; + return; + } + + /* + * Print the validation bits at debug level. Only really + * helpful if there are bits we _don't_ know about. + */ + /* XXX define this format somewhere */ + snprintb(buf, sizeof(buf), "\177\020" + "b\000" "FRU_ID\0" + "b\001" "FRU_TEXT\0" /* `FRU string', sometimes */ + "b\002" "TIMESTAMP\0" + "\0", gede->ValidationBits); + aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, buf); + + /* + * Print the CPER section flags. + */ + snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, gede->Flags); + device_printf(sc->sc_dev, "%s: Flags=%s\n", ctx, buf); + + /* + * The ErrorDataLength is unlikely to be useful for the log, so + * print it at debug level only. + */ + aprint_debug_dev(sc->sc_dev, "%s: ErrorDataLength=0x%"PRIu32"\n", + ctx, gede->ErrorDataLength); + + /* + * Print the FRU Id and text, if available. + */ + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) { + struct uuid fruid; + + apei_cper_guid_dec(gede->FruId, &fruid); + apei_format_guid(&fruid, guidstr); + device_printf(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr); + } + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) { + device_printf(sc->sc_dev, "%s: FruText=%.20s\n", + ctx, gede->FruText); + } + + /* + * Print the timestamp, if available by the revision number and + * the validation bits. + */ + if (gede->Revision >= 0x0300 && gede->Revision < 0x0400 && + gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) { + const uint8_t *const t = (const uint8_t *)&gede_v3->TimeStamp; + const uint8_t s = t[0]; + const uint8_t m = t[1]; + const uint8_t h = t[2]; + const uint8_t f = t[3]; + const uint8_t D = t[4]; + const uint8_t M = t[5]; + const uint8_t Y = t[6]; + const uint8_t C = t[7]; + + device_printf(sc->sc_dev, "%s: Timestamp=0x%"PRIx64 + " (%02d%02d-%02d-%02dT%02d:%02d:%02d%s)\n", + ctx, gede_v3->TimeStamp, + C,Y, M, D, h,m,s, + f & __BIT(0) ? " (event time)" : " (collect time)"); + } +} + +/* + * apei_gesb_report(sc, gesb, size, ctx) + * + * Check a Generic Error Status Block, of at most the specified + * size in bytes, and report any errors in it. Return the 32-bit + * Block Status in case the caller needs it to acknowledge the + * report to firmware. + */ +uint32_t +apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb, + size_t size, const char *ctx, bool *fatalp) +{ + uint32_t status, unknownstatus, severity, nentries, i; + uint32_t datalen, rawdatalen; + const ACPI_HEST_GENERIC_DATA *gede0, *gede; + const unsigned char *rawdata; + char statusbuf[128]; + bool fatal = false; + + /* + * Verify the buffer is large enough for a Generic Error Status + * Block before we try to touch anything in it. + */ + if (size < sizeof(*gesb)) { + device_printf(sc->sc_dev, "%s: truncated GESB, %zu < %zu\n", + ctx, size, sizeof(*gesb)); + return 0; + } + size -= sizeof(*gesb); + + /* + * Load the status. Access ordering rules are unclear in the + * ACPI specification; I'm guessing that load-acquire of the + * block status is a good idea before any other access to the + * GESB. + */ + status = atomic_load_acquire(&gesb->BlockStatus); + + /* + * If there are no status bits set, the rest of the GESB is + * garbage, so stop here. + */ + if (status == 0) { + /* XXX dtrace */ + /* XXX DPRINTF */ + goto out; + } + + /* XXX define this format somewhere */ + snprintb(statusbuf, sizeof(statusbuf), "\177\020" + "b\000" "UE\0" + "b\001" "CE\0" + "b\002" "MULTI_UE\0" + "b\003" "MULTI_CE\0" + "f\004\010" "GEDE_COUNT\0" + "\0", status); + + /* + * Print a message to the console and dmesg about the severity + * of the error. + */ + severity = gesb->ErrorSeverity; + nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT); + if (severity < __arraycount(apei_gesb_severity)) { + device_printf(sc->sc_dev, "%s reported hardware error:" + " severity=%s nentries=%u status=%s\n", + ctx, apei_gesb_severity[severity], nentries, statusbuf); + } else { + device_printf(sc->sc_dev, "%s reported error:" + " severity=%"PRIu32" nentries=%u status=%s\n", + ctx, severity, nentries, statusbuf); + } + + /* + * Make a determination about whether the error is fatal. + * + * XXX Currently we don't have any mechanism to recover from + * uncorrectable but recoverable errors, so we treat those -- + * and anything else we don't recognize -- as fatal. + */ + switch (severity) { + case ACPI_HEST_GEN_ERROR_CORRECTED: + case ACPI_HEST_GEN_ERROR_NONE: + fatal = false; + break; + case ACPI_HEST_GEN_ERROR_FATAL: + case ACPI_HEST_GEN_ERROR_RECOVERABLE: /* XXX */ + default: + fatal = true; + break; + } + + /* + * Clear the bits we know about to warn if there's anything + * left we don't understand. + */ + unknownstatus = status; + unknownstatus &= ~ACPI_HEST_UNCORRECTABLE; + unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE; + unknownstatus &= ~ACPI_HEST_CORRECTABLE; + unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE; + unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT; + if (unknownstatus != 0) { + /* XXX dtrace */ + /* XXX rate-limit? */ + device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:" + " 0x%"PRIx32"\n", ctx, unknownstatus); + } + + /* + * Advance past the Generic Error Status Block (GESB) header to + * the Generic Error Data Entries (GEDEs). + */ + gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1); + + /* + * Verify that the data length (GEDEs) fits within the size. + * If not, truncate the GEDEs. + */ + datalen = gesb->DataLength; + if (size < datalen) { + device_printf(sc->sc_dev, "%s:" + " GESB DataLength exceeds bounds: %zu < %"PRIu32"\n", + ctx, size, datalen); + datalen = size; + } + size -= datalen; + + /* + * Report each of the Generic Error Data Entries. + */ + for (i = 0; i < nentries; i++) { + size_t headerlen; + const struct apei_cper_report *report; + char subctx[128]; + + /* + * Format a subcontext to show this numbered entry of + * the GESB. + */ + snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i); + + /* + * If the remaining GESB data length isn't enough for a + * GEDE header, stop here. + */ + if (datalen < sizeof(*gede)) { + device_printf(sc->sc_dev, "%s:" + " truncated GEDE: %"PRIu32" < %zu bytes\n", + subctx, datalen, sizeof(*gede)); + break; + } + + /* + * Print the GEDE header and get the full length (may + * vary from revision to revision of the GEDE) and the + * CPER report function if possible. + */ + apei_gede_report_header(sc, gede, subctx, + &headerlen, &report); + + /* + * If we don't know the header length because of an + * unfamiliar revision, stop here. + */ + if (headerlen == 0) { + device_printf(sc->sc_dev, "%s:" + " unknown revision: 0x%"PRIx16"\n", + subctx, gede->Revision); + break; + } + + /* + * Stop here if what we mapped is too small for the + * error data length. + */ + datalen -= headerlen; + if (datalen < gede->ErrorDataLength) { + device_printf(sc->sc_dev, "%s: truncated GEDE payload:" + " %"PRIu32" < %"PRIu32" bytes\n", + subctx, datalen, gede->ErrorDataLength); + break; + } + + /* + * Report the Common Platform Error Record appendix to + * this Generic Error Data Entry. + */ + if (report == NULL) { + device_printf(sc->sc_dev, "%s: [unknown type]\n", ctx); + } else { + (*report->func)(sc, (const char *)gede + headerlen, + gede->ErrorDataLength, subctx); + } + + /* + * Advance past the GEDE header and CPER data to the + * next GEDE. + */ + gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede + + + headerlen + gede->ErrorDataLength); + } + + /* + * Advance past the Generic Error Data Entries (GEDEs) to the + * raw error data. + * + * XXX Provide Max Raw Data Length as a parameter, as found in + * various HEST entry types. + */ + rawdata = (const unsigned char *)gede0 + datalen; + + /* + * Verify that the raw data length fits within the size. If + * not, truncate the raw data. + */ + rawdatalen = gesb->RawDataLength; + if (size < rawdatalen) { + device_printf(sc->sc_dev, "%s:" + " GESB RawDataLength exceeds bounds: %zu < %"PRIu32"\n", + ctx, size, rawdatalen); + rawdatalen = size; + } + size -= rawdatalen; + + /* + * Hexdump the raw data, if any. + */ + if (rawdatalen > 0) { + char devctx[128]; + + snprintf(devctx, sizeof(devctx), "%s: %s: raw data", + device_xname(sc->sc_dev), ctx); + hexdump(printf, devctx, rawdata, rawdatalen); + } + + /* + * If there's anything left after the raw data, warn. + */ + if (size > 0) { + device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n", + ctx, size); + } + + /* + * Return the status so the caller can ack it, and tell the + * caller whether this error is fatal. + */ +out: *fatalp = fatal; + return status; +} + +MODULE(MODULE_CLASS_DRIVER, apei, NULL); + +#ifdef _MODULE +#include "ioconf.c" +#endif + +static int +apei_modcmd(modcmd_t cmd, void *opaque) +{ + int error = 0; + + switch (cmd) { + case MODULE_CMD_INIT: +#ifdef _MODULE + error = config_init_component(cfdriver_ioconf_apei, + cfattach_ioconf_apei, cfdata_ioconf_apei); +#endif + return error; + case MODULE_CMD_FINI: +#ifdef _MODULE + error = config_fini_component(cfdriver_ioconf_apei, + cfattach_ioconf_apei, cfdata_ioconf_apei); +#endif + return error; + default: + return ENOTTY; + } +} diff --git a/sys/dev/acpi/apei_bert.c b/sys/dev/acpi/apei_bert.c new file mode 100644 index 000000000000..b19c58a4d906 --- /dev/null +++ b/sys/dev/acpi/apei_bert.c @@ -0,0 +1,138 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI BERT -- Boot Error Record Table + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#boot-error-source + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include + +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +/* + * apei_bert_attach(sc) + * + * Scan the Boot Error Record Table for hardware errors that + * happened early at boot or on the previous boot. + */ +void +apei_bert_attach(struct apei_softc *sc) +{ + const ACPI_TABLE_BERT *bert = sc->sc_tab.bert; + struct apei_bert_softc *bsc = &sc->sc_bert; + bool fatal = false; + + /* + * Verify the table is large enough. + */ + if (bert->Header.Length < sizeof(*bert)) { + aprint_error_dev(sc->sc_dev, "BERT: truncated table:" + " %"PRIu32" < %zu bytes\n", + bert->Header.Length, sizeof(*bert)); + return; + } + + /* + * In verbose boots, print the BERT physical address and + * length. The operator might find this handy for dd'ing it + * from /dev/mem, if allowed. + */ + aprint_verbose_dev(sc->sc_dev, "BERT: 0x%x bytes at 0x%"PRIx64"\n", + bert->RegionLength, bert->Address); + + /* + * Verify the length is enough for a Generic Error Status Block + * header, at least. + */ + if (bert->RegionLength < sizeof(*bsc->bsc_gesb)) { + aprint_error_dev(sc->sc_dev, + "BERT: truncated boot error region, %"PRIu32" < %zu bytes", + bert->RegionLength, sizeof(*bsc->bsc_gesb)); + return; + } + + /* + * Map the GESB and process it, but don't acknowledge it -- + * this is a one-time polled source; it won't (or at least, + * shouldn't) change after boot. + */ + bsc->bsc_gesb = AcpiOsMapMemory(bert->Address, bert->RegionLength); + const uint32_t status = apei_gesb_report(sc, bsc->bsc_gesb, + bert->RegionLength, "boot error record", &fatal); + if (status == 0) { + /* + * If there were no boot errors, leave a note in dmesg + * to this effect without cluttering up the console + * unless you asked for it by `boot -v'. + */ + aprint_verbose_dev(sc->sc_dev, + "BERT: no boot errors recorded\n"); + } + + /* + * If the error was fatal, print a warning to the console. + * Probably not actually fatal now since it is usually related + * to early or previous boot. + */ + if (fatal) { + aprint_error_dev(sc->sc_dev, "BERT:" + " fatal pre-boot error recorded\n"); + } + + /* XXX expose content via sysctl? */ +} + +/* + * apei_bert_detach(sc) + * + * Free any software resources associated with the Boot Error + * Record Table. + */ +void +apei_bert_detach(struct apei_softc *sc) +{ + const ACPI_TABLE_BERT *bert = sc->sc_tab.bert; + struct apei_bert_softc *bsc = &sc->sc_bert; + + if (bsc->bsc_gesb) { + AcpiOsUnmapMemory(bsc->bsc_gesb, bert->RegionLength); + bsc->bsc_gesb = NULL; + } +} diff --git a/sys/dev/acpi/apei_bertvar.h b/sys/dev/acpi/apei_bertvar.h new file mode 100644 index 000000000000..9b91ef1d2a6a --- /dev/null +++ b/sys/dev/acpi/apei_bertvar.h @@ -0,0 +1,48 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_BERTVAR_H_ +#define _SYS_DEV_ACPI_APEI_BERTVAR_H_ + +#include + +struct apei_softc; + +/* + * struct apei_bert_softc + * + * Software state for access to the BERT, Boot Error Record Table. + */ +struct apei_bert_softc { + ACPI_HEST_GENERIC_STATUS *bsc_gesb; +}; + +void apei_bert_attach(struct apei_softc *); +void apei_bert_detach(struct apei_softc *); + +#endif /* _SYS_DEV_ACPI_APEI_BERTVAR_H_ */ diff --git a/sys/dev/acpi/apei_cper.h b/sys/dev/acpi/apei_cper.h new file mode 100644 index 000000000000..9cbfd8b1bd27 --- /dev/null +++ b/sys/dev/acpi/apei_cper.h @@ -0,0 +1,234 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * UEFI Common Platform Error Record + * + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html + */ + +#ifndef _SYS_DEV_ACPI_APEI_CPER_H_ +#define _SYS_DEV_ACPI_APEI_CPER_H_ + +#include + +#include + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#record-header + */ +struct cper_header { + char SignatureStart[4]; /* `CPER" */ + uint16_t Revision; + uint32_t SignatureEnd; /* 0xfffffff */ + uint16_t SectionCount; + uint32_t ErrorSeverity; + uint32_t ValidationBits; + uint32_t RecordLength; + uint64_t Timestamp; + uint8_t PlatformId[16]; + uint8_t PartitionId[16]; + uint8_t CreatorId[16]; + uint8_t NotificationType[16]; + uint64_t RecordId; + uint32_t Flags; + uint64_t PersistenceInfo; + uint8_t Reserved[12]; +} __packed; +__CTASSERT(sizeof(struct cper_header) == 128); + +enum { /* struct cper_header::error_severity */ + CPER_ERROR_SEVERITY_RECOVERABLE = 0, + CPER_ERROR_SEVERITY_FATAL = 1, + CPER_ERROR_SEVERITY_CORRECTED = 2, + CPER_ERROR_SEVERITY_INFORMATIONAL = 3, +}; + +enum { /* struct cper_header::validation_bits */ + CPER_VALID_PLATFORM_ID = __BIT(0), + CPER_VALID_TIMESTAMP = __BIT(1), + CPER_VALID_PARTITION_ID = __BIT(2), +}; + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-record-header-flags + */ +enum { /* struct cper_header::flags */ + CPER_HW_ERROR_FLAG_RECOVERED = __BIT(0), + CPER_HW_ERROR_FLAG_PREVERR = __BIT(1), + CPER_HW_ERROR_FLAG_SIMULATED = __BIT(2), +}; + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#section-descriptor-format + */ +enum { + CPER_SECTION_FLAG_PRIMARY = __BIT(0), + CPER_SECTION_FLAG_CONTAINMENT_WARNING = __BIT(1), + CPER_SECTION_FLAG_RESET = __BIT(2), + CPER_SECTION_FLAG_ERROR_THRESHOLD_EXCEEDED = __BIT(3), + CPER_SECTION_FLAG_RESOURCE_NOT_ACCESSIBLE = __BIT(4), + CPER_SECTION_FLAG_LATENT_ERROR = __BIT(5), + CPER_SECTION_FLAG_PROPAGATED = __BIT(6), + CPER_SECTION_FLAG_OVERFLOW = __BIT(7), +}; + +#define CPER_SECTION_FLAGS_FMT "\177\020" \ + "b\000" "PRIMARY\0" \ + "b\001" "CONTAINMENT_WARNING\0" \ + "b\002" "RESET\0" \ + "b\003" "ERROR_THRESHOLD_EXCEEDED\0" \ + "b\004" "RESOURCE_NOT_ACCESSIBLE\0" \ + "b\005" "LATENT_ERROR\0" \ + "b\006" "PROPAGATED\0" \ + "b\007" "OVERFLOW\0" \ + "\0" + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + * + * Type: {0xa5bc1114,0x6f64,0x4ede,{0xb8,0x63,0x3e,0x83,0xed,0x7c,0x83,0xb1}} + */ + +struct cper_memory_error { + uint64_t ValidationBits; + uint64_t ErrorStatus; + uint64_t PhysicalAddress; + uint64_t PhysicalAddressMask; + uint16_t Node; + uint16_t Card; + uint16_t Module; + uint16_t Bank; + uint16_t Device; + uint16_t Row; + uint16_t Column; + uint16_t BitPosition; + uint64_t RequestorId; + uint64_t ResponderId; + uint64_t TargetId; + uint8_t MemoryErrorType; +} __packed; +__CTASSERT(sizeof(struct cper_memory_error) == 73); + +struct cper_memory_error_ext { + struct cper_memory_error Base; + uint8_t Extended; + uint16_t RankNumber; + uint16_t CardHandle; + uint16_t ModuleHandle; +} __packed; +__CTASSERT(sizeof(struct cper_memory_error_ext) == 80); + +enum { /* struct cper_memory_error::validation_bits */ + CPER_MEMORY_ERROR_VALID_ERROR_STATUS = __BIT(0), + CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS = __BIT(1), + CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK = __BIT(2), + CPER_MEMORY_ERROR_VALID_NODE = __BIT(3), + CPER_MEMORY_ERROR_VALID_CARD = __BIT(4), + CPER_MEMORY_ERROR_VALID_MODULE = __BIT(5), + CPER_MEMORY_ERROR_VALID_BANK = __BIT(6), + CPER_MEMORY_ERROR_VALID_DEVICE = __BIT(7), + CPER_MEMORY_ERROR_VALID_ROW = __BIT(8), + CPER_MEMORY_ERROR_VALID_COLUMN = __BIT(9), + CPER_MEMORY_ERROR_VALID_BIT_POSITION = __BIT(10), + CPER_MEMORY_ERROR_VALID_REQUESTOR_ID = __BIT(11), + CPER_MEMORY_ERROR_VALID_RESPONDER_ID = __BIT(12), + CPER_MEMORY_ERROR_VALID_TARGET_ID = __BIT(13), + CPER_MEMORY_ERROR_VALID_MEMORY_ERROR_TYPE = __BIT(14), + CPER_MEMORY_ERROR_VALID_RANK_NUMBER = __BIT(15), + CPER_MEMORY_ERROR_VALID_CARD_HANDLE = __BIT(16), + CPER_MEMORY_ERROR_VALID_MODULE_HANDLE = __BIT(17), + CPER_MEMORY_ERROR_VALID_EXTENDED_ROW = __BIT(18), + CPER_MEMORY_ERROR_VALID_BANK_GROUP = __BIT(19), + CPER_MEMORY_ERROR_VALID_BANK_ADDRESS = __BIT(20), + CPER_MEMORY_ERROR_VALID_CHIP_ID = __BIT(21), +}; + +#define CPER_MEMORY_ERROR_VALIDATION_BITS_FMT "\177\020" \ + "b\000" "ERROR_STATUS\0" \ + "b\001" "PHYSICAL_ADDRESS\0" \ + "b\002" "PHYSICAL_ADDRESS_MASK\0" \ + "b\003" "NODE\0" \ + "b\004" "CARD\0" \ + "b\005" "MODULE\0" \ + "b\006" "BANK\0" \ + "b\007" "DEVICE\0" \ + "b\010" "ROW\0" \ + "b\011" "COLUJMN\0" \ + "b\012" "BIT_POSITION\0" \ + "b\013" "REQUESTOR_ID\0" \ + "b\014" "RESPONDER_ID\0" \ + "b\015" "TARGET_ID\0" \ + "b\016" "MEMORY_ERROR_TYPE\0" \ + "b\017" "RANK_NUMBER\0" \ + "b\020" "CARD_HANDLE\0" \ + "b\021" "MODULE_HANDLE\0" \ + "b\022" "EXTENDED_ROW\0" \ + "b\023" "BANK_GROUP\0" \ + "b\024" "BANK_ADDRESS\0" \ + "b\025" "CHIP_ID\0" \ + "\0" + +enum { /* struct cper_memory_error::bank */ + CPER_MEMORY_ERROR_BANK_ADDRESS = __BITS(7,0), + CPER_MEMORY_ERROR_BANK_GROUP = __BITS(15,8), +}; + +#define CPER_MEMORY_ERROR_TYPES(F) \ + F(CPER_MEMORY_ERROR_UNKNOWN, UNKNOWN, 0) \ + F(CPER_MEMORY_ERROR_NO_ERROR, NO_ERROR, 1) \ + F(CPER_MEMORY_ERROR_SINGLEBIT_ECC, SINGLEBIT_ECC, 2) \ + F(CPER_MEMORY_ERROR_MULTIBIT_ECC, MULTIBIT_ECC, 3) \ + F(CPER_MEMORY_ERROR_SINGLESYM_CHIPKILL_ECC, SINGLESYM_CHIPKILL_ECC, 4)\ + F(CPER_MEMORY_ERROR_MULTISYM_CHIPKILL_ECC, MULTISYM_CHIPKILL_ECC, 5) \ + F(CPER_MEMORY_ERROR_MASTER_ABORT, MASTER_ABORT, 6) \ + F(CPER_MEMORY_ERROR_TARGET_ABORT, TARGET_ABORT, 7) \ + F(CPER_MEMORY_ERROR_PARITY_ERROR, PARITY_ERROR, 8) \ + F(CPER_MEMORY_ERROR_WATCHDOG_TIMEOUT, WATCHDOG_TIMEOUT, 9) \ + F(CPER_MEMORY_ERROR_INVALID_ADDRESS, INVALID_ADDRESS, 10) \ + F(CPER_MEMORY_ERROR_MIRROR_BROKEN, MIRROR_BROKEN, 11) \ + F(CPER_MEMORY_ERROR_MEMORY_SPARING, MEMORY_SPARING, 12) \ + F(CPER_MEMORY_ERROR_SCRUB_CORRECTED_ERROR, SCRUB_CORRECTED_ERROR, 13) \ + F(CPER_MEMORY_ERROR_SCRUB_UNCORRECTED_ERROR, SCRUB_UNCORRECTED_ERROR, \ + 14) \ + F(CPER_MEMORY_ERROR_PHYSMEM_MAPOUT_EVENT, PHYSMEM_MAPOUT_EVENT, 15) \ + /* end of CPER_MEMORY_ERROR_TYPES */ + +enum cper_memory_error_type { /* struct cper_memory_error::memory_error_type */ +#define CPER_MEMORY_ERROR_TYPE_DEF(LN, SN, V) LN = V, + CPER_MEMORY_ERROR_TYPES(CPER_MEMORY_ERROR_TYPE_DEF) +#undef CPER_MEMORY_ERROR_TYPE_DEF +}; + +enum { /* struct cper_memory_error_ext::extended */ + CPER_MEMORY_ERROR_EXTENDED_ROWBIT16 = __BIT(0), + CPER_MEMORY_ERROR_EXTENDED_ROWBIT17 = __BIT(1), + CPER_MEMORY_ERROR_EXTENDED_CHIPID = __BITS(7,5), +}; + +#endif /* _SYS_DEV_ACPI_APEI_CPER_H_ */ diff --git a/sys/dev/acpi/apei_einj.c b/sys/dev/acpi/apei_einj.c new file mode 100644 index 000000000000..0e75dd0583e3 --- /dev/null +++ b/sys/dev/acpi/apei_einj.c @@ -0,0 +1,851 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI EINJ -- Error Injection Table + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-injection + * + * XXX Consider a /dev node with ioctls for error injection rather than + * the somewhat kooky sysctl interface. By representing an error + * injection request in a structure, we can serialize access to the + * platform's EINJ operational context. However, this also requires + * some nontrivial userland support; maybe relying on the user to tread + * carefully with error injection is fine -- after all, many types of + * error injection will cause a system halt/panic. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ioconf.h" + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +static void apei_einj_instfunc(ACPI_WHEA_HEADER *, void *, uint32_t *, + uint32_t); +static uint64_t apei_einj_act(struct apei_softc *, enum AcpiEinjActions, + uint64_t); +static uint64_t apei_einj_trigger(struct apei_softc *, uint64_t); +static int apei_einj_action_sysctl(SYSCTLFN_ARGS); +static int apei_einj_trigger_sysctl(SYSCTLFN_ARGS); +static int apei_einj_types_sysctl(SYSCTLFN_ARGS); + +/* + * apei_einj_action + * + * Symbolic names of the APEI EINJ (Error Injection) logical actions + * are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-injection-actions + */ +static const char *const apei_einj_action[] = { + [ACPI_EINJ_BEGIN_OPERATION] = "begin_injection_operation", + [ACPI_EINJ_GET_TRIGGER_TABLE] = "get_trigger_error_action_table", + [ACPI_EINJ_SET_ERROR_TYPE] = "set_error_type", + [ACPI_EINJ_GET_ERROR_TYPE] = "get_error_type", + [ACPI_EINJ_END_OPERATION] = "end_operation", + [ACPI_EINJ_EXECUTE_OPERATION] = "execute_operation", + [ACPI_EINJ_CHECK_BUSY_STATUS] = "check_busy_status", + [ACPI_EINJ_GET_COMMAND_STATUS] = "get_command_status", + [ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS] = "set_error_type_with_address", + [ACPI_EINJ_GET_EXECUTE_TIMINGS] = "get_execute_operation_timings", +}; + +/* + * apei_einj_instruction + * + * Symbolic names of the APEI EINJ (Error Injection) instructions to + * implement logical actions are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#injection-instructions-table + */ + +static const char *const apei_einj_instruction[] = { + [ACPI_EINJ_READ_REGISTER] = "read_register", + [ACPI_EINJ_READ_REGISTER_VALUE] = "read_register", + [ACPI_EINJ_WRITE_REGISTER] = "write_register", + [ACPI_EINJ_WRITE_REGISTER_VALUE] = "write_register_value", + [ACPI_EINJ_NOOP] = "noop", +}; + +/* + * apei_einj_attach(sc) + * + * Scan the Error Injection table to ascertain what error + * injection actions the firmware supports and how to perform + * them. Create sysctl nodes for triggering error injection. + */ +void +apei_einj_attach(struct apei_softc *sc) +{ + ACPI_TABLE_EINJ *einj = sc->sc_tab.einj; + struct apei_einj_softc *jsc = &sc->sc_einj; + ACPI_EINJ_ENTRY *entry; + const struct sysctlnode *sysctl_einj; + const struct sysctlnode *sysctl_einj_action; + uint32_t i, nentries, maxnentries; + unsigned action; + int error; + + /* + * Verify the table length, table header length, and + * instruction entry count are all sensible. If the header is + * truncated, stop here; if the entries are truncated, stop at + * the largest integral number of full entries that fits. + */ + if (einj->Header.Length < sizeof(*einj)) { + aprint_error_dev(sc->sc_dev, "EINJ: truncated table:" + " %"PRIu32" < %zu minimum bytes\n", + einj->Header.Length, sizeof(*einj)); + return; + } + if (einj->HeaderLength < + sizeof(*einj) - offsetof(ACPI_TABLE_EINJ, HeaderLength)) { + aprint_error_dev(sc->sc_dev, "EINJ: truncated header:" + " %"PRIu32" < %zu bytes\n", + einj->HeaderLength, + sizeof(*einj) - offsetof(ACPI_TABLE_EINJ, HeaderLength)); + return; + } + nentries = einj->Entries; + maxnentries = (einj->Header.Length - sizeof(*einj))/sizeof(*entry); + if (nentries > maxnentries) { + aprint_error_dev(sc->sc_dev, "EINJ: excessive entries:" + " %"PRIu32", truncating to %"PRIu32"\n", + nentries, maxnentries); + nentries = maxnentries; + } + if (nentries*sizeof(*entry) < einj->Header.Length - sizeof(*einj)) { + aprint_error_dev(sc->sc_dev, "EINJ:" + " %zu bytes of trailing garbage after last entry\n", + einj->Header.Length - nentries*sizeof(*entry)); + } + + /* + * Create sysctl hw.acpi.apei.einj for all EINJ-related knobs. + */ + error = sysctl_createv(&sc->sc_sysctllog, 0, + &sc->sc_sysctlroot, &sysctl_einj, 0, + CTLTYPE_NODE, "einj", + SYSCTL_DESCR("Error injection"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " hw.acpi.apei.einj: %d\n", error); + sysctl_einj = NULL; + } + + /* + * Create an interpreter for EINJ actions. + */ + jsc->jsc_interp = apei_interp_create("EINJ", + apei_einj_action, __arraycount(apei_einj_action), + apei_einj_instruction, __arraycount(apei_einj_instruction), + /*instvalid*/NULL, apei_einj_instfunc); + + /* + * Compile the interpreter from the EINJ action instruction + * table. + */ + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < nentries; i++, entry++) + apei_interp_pass1_load(jsc->jsc_interp, i, &entry->WheaHeader); + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < nentries; i++, entry++) { + apei_interp_pass2_verify(jsc->jsc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass3_alloc(jsc->jsc_interp); + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < nentries; i++, entry++) { + apei_interp_pass4_assemble(jsc->jsc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass5_verify(jsc->jsc_interp); + + /* + * Create sysctl hw.acpi.apei.einj.action for individual actions. + */ + error = sysctl_einj == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, &sysctl_einj_action, 0, + CTLTYPE_NODE, "action", + SYSCTL_DESCR("EINJ actions"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " hw.acpi.apei.einj.action: %d\n", error); + sysctl_einj_action = NULL; + } + + /* + * Create sysctl nodes for each action we know about. + */ + for (action = 0; action < __arraycount(apei_einj_action); action++) { + if (apei_einj_action[action] == NULL) + continue; + + /* + * Check to see if there are any instructions for this + * action. + * + * XXX Maybe add this to the apei_interp.h abstraction. + */ + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < nentries; i++, entry++) { + ACPI_WHEA_HEADER *const header = &entry->WheaHeader; + + if (action == header->Action) + break; + } + if (i == nentries) { + /* + * No instructions for this action, so assume + * it's not supported. + */ + continue; + } + + /* + * Create a sysctl knob to perform the action. + */ + error = sysctl_einj_action == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj_action, NULL, CTLFLAG_READWRITE, + CTLTYPE_QUAD, apei_einj_action[action], + NULL, /* description */ + &apei_einj_action_sysctl, 0, NULL, 0, + action, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.action.%s: %d\n", + apei_einj_action[action], error); + continue; + } + } + + /* + * Create a sysctl knob to trigger error. + */ + error = sysctl_einj == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, NULL, CTLFLAG_READWRITE, + CTLTYPE_QUAD, "trigger", + NULL, /* description */ + &apei_einj_trigger_sysctl, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.trigger: %d\n", + error); + } + + /* + * Query the available types of error to inject and print it to + * dmesg. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-types + */ + uint64_t types = apei_einj_act(sc, ACPI_EINJ_GET_ERROR_TYPE, 0); + char typesbuf[1024], *typesp; + /* XXX define this format somewhere */ + snprintb_m(typesbuf, sizeof(typesbuf), "\177\020" + "b\000" "PROC_CORRECTABLE\0" + "b\001" "PROC_UNCORRECTABLE\0" + "b\002" "PROC_FATAL\0" + "b\003" "MEM_CORRECTABLE\0" + "b\004" "MEM_UNCORRECTABLE\0" + "b\005" "MEM_FATAL\0" + "b\006" "PCIE_CORRECTABLE\0" + "b\007" "PCIE_UNCORRECTABLE\0" + "b\010" "PCIE_FATAL\0" + "b\011" "PLAT_CORRECTABLE\0" + "b\012" "PLAT_UNCORRECTABLE\0" + "b\013" "PLAT_FATAL\0" + "b\014" "CXLCACHE_CORRECTABLE\0" + "b\015" "CXLCACHE_UNCORRECTABLE\0" + "b\016" "CXLCACHE_FATAL\0" + "b\017" "CXLMEM_CORRECTABLE\0" + "b\020" "CXLMEM_UNCORRECTABLE\0" + "b\021" "CXLMEM_FATAL\0" +// "f\022\014" "reserved\0" + "b\036" "EINJv2\0" + "b\037" "VENDOR\0" + "\0", types, 36); + for (typesp = typesbuf; strlen(typesp); typesp += strlen(typesp) + 1) { + aprint_normal_dev(sc->sc_dev, "EINJ: can inject:" + " %s\n", typesp); + } + + /* + * Create a sysctl knob to query the available types of error + * to inject. In principle this could change dynamically, so + * we'll make it dynamic. + */ + error = sysctl_einj == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, NULL, 0, + CTLTYPE_QUAD, "types", + SYSCTL_DESCR("Types of errors that can be injected"), + &apei_einj_types_sysctl, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.types: %d\n", + error); + } +} + +/* + * apei_einj_detach(sc) + * + * Free any software resources associated with the Error Injection + * table. + */ +void +apei_einj_detach(struct apei_softc *sc) +{ + struct apei_einj_softc *jsc = &sc->sc_einj; + + if (jsc->jsc_interp) { + apei_interp_destroy(jsc->jsc_interp); + jsc->jsc_interp = NULL; + } +} + +/* + * struct apei_einj_machine + * + * Machine state for executing EINJ instructions. + */ +struct apei_einj_machine { + struct apei_softc *sc; + uint64_t x; /* in */ + uint64_t y; /* out */ +}; + +/* + * apei_einj_instfunc(header, cookie, &ip, maxip) + * + * Run a single instruction in the service of performing an EINJ + * action. Updates the EINJ machine at cookie in place. + * + * This doesn't read or write ip. The TRIGGER_ERROR logic relies + * on this; if you change the fact, you must update that logic + * too. + */ +static void +apei_einj_instfunc(ACPI_WHEA_HEADER *header, void *cookie, uint32_t *ipp, + uint32_t maxip) +{ + struct apei_einj_machine *M = cookie; + ACPI_STATUS rv = AE_OK; + + /* + * Abbreviate some of the intermediate quantities to make the + * instruction logic conciser and more legible. + */ + const uint8_t BitOffset = header->RegisterRegion.BitOffset; + const uint64_t Mask = header->Mask; + const uint64_t Value = header->Value; + ACPI_GENERIC_ADDRESS *const reg = &header->RegisterRegion; + const bool preserve_register = header->Flags & ACPI_EINJ_PRESERVE; + + aprint_debug_dev(M->sc->sc_dev, "%s: instr=0x%02"PRIx8 + " (%s)" + " Address=0x%"PRIx64 + " BitOffset=%"PRIu8" Mask=0x%"PRIx64" Value=0x%"PRIx64 + " Flags=0x%"PRIx8"\n", + __func__, header->Instruction, + (header->Instruction < __arraycount(apei_einj_instruction) + ? apei_einj_instruction[header->Instruction] + : "unknown"), + reg->Address, + BitOffset, Mask, Value, + header->Flags); + + /* + * Zero-initialize the output by default. + */ + M->y = 0; + + /* + * Dispatch the instruction. + */ + switch (header->Instruction) { + case ACPI_EINJ_READ_REGISTER: + rv = apei_read_register(reg, Mask, &M->y); + if (ACPI_FAILURE(rv)) + break; + break; + case ACPI_EINJ_READ_REGISTER_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->y = (v == Value ? 1 : 0); + break; + } + case ACPI_EINJ_WRITE_REGISTER: + rv = apei_write_register(reg, Mask, preserve_register, M->x); + break; + case ACPI_EINJ_WRITE_REGISTER_VALUE: + rv = apei_write_register(reg, Mask, preserve_register, Value); + break; + case ACPI_EINJ_NOOP: + break; + default: + rv = AE_ERROR; + break; + } + + /* + * If any register I/O failed, print the failure message. This + * could be more specific about exactly what failed, but that + * takes a little more effort to write. + */ + if (ACPI_FAILURE(rv)) { + aprint_debug_dev(M->sc->sc_dev, "%s: failed: %s\n", __func__, + AcpiFormatException(rv)); + } +} + +/* + * apei_einj_act(sc, action, x) + * + * Perform the named EINJ action with input x, by executing the + * instruction defined for the action by the EINJ, and return the + * output. + */ +static uint64_t +apei_einj_act(struct apei_softc *sc, enum AcpiEinjActions action, + uint64_t x) +{ + struct apei_einj_softc *const jsc = &sc->sc_einj; + struct apei_einj_machine einj_machine, *const M = &einj_machine; + + aprint_debug_dev(sc->sc_dev, "%s: action=%d (%s) input=0x%"PRIx64"\n", + __func__, + action, + (action < __arraycount(apei_einj_action) + ? apei_einj_action[action] + : "unknown"), + x); + + /* + * Initialize the machine to execute the action's instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + + /* + * Run the interpreter. + */ + apei_interpret(jsc->jsc_interp, action, M); + + /* + * Return the result. + */ + aprint_debug_dev(sc->sc_dev, "%s: output=0x%"PRIx64"\n", __func__, + M->y); + return M->y; +} + +/* + * apei_einj_trigger(sc, x) + * + * Obtain the TRIGGER_ERROR action table and, if there is anything + * to be done with it, execute it with input x and return the + * output. If nothing is to be done, return 0. + */ +static uint64_t +apei_einj_trigger(struct apei_softc *sc, uint64_t x) +{ + uint64_t teatab_pa; + ACPI_EINJ_TRIGGER *teatab = NULL; + size_t mapsize = 0, tabsize; + ACPI_EINJ_ENTRY *entry; + struct apei_einj_machine einj_machine, *const M = &einj_machine; + uint32_t i, nentries; + + /* + * Get the TRIGGER_ERROR action table's physical address. + */ + teatab_pa = apei_einj_act(sc, ACPI_EINJ_GET_TRIGGER_TABLE, 0); + + /* + * Map just the header. We don't know how large the table is + * because we get that from the header. + */ + mapsize = sizeof(*teatab); + teatab = AcpiOsMapMemory(teatab_pa, mapsize); + + /* + * If there's no entries, stop here -- nothing to do separately + * to trigger an error report. + */ + nentries = teatab->EntryCount; + if (nentries == 0) + goto out; + + /* + * If the header size or the table size is nonsense, bail. + */ + if (teatab->HeaderSize < sizeof(*teatab) || + teatab->TableSize < teatab->HeaderSize) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " invalid sizes:" + " HeaderSize=%"PRIu32" TableSize=%"PRIu32"\n", + teatab->HeaderSize, teatab->TableSize); + } + + /* + * If the revision is nonzero, we don't know what to do. I've + * only seen revision zero so far, and the spec doesn't say + * anything about revisions that I've found. + */ + if (teatab->Revision != 0) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " unknown revision: %"PRIx32"\n", teatab->Revision); + goto out; + } + + /* + * Truncate the table to the number of entries requested and + * ignore trailing garbage if the table is long, or round the + * number of entries down to what fits in the table if the + * table is short. + */ + tabsize = teatab->TableSize; + if (nentries < howmany(tabsize, sizeof(ACPI_EINJ_ENTRY))) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " %zu bytes of trailing garbage\n", + tabsize - nentries*sizeof(ACPI_EINJ_ENTRY)); + tabsize = nentries*sizeof(ACPI_EINJ_ENTRY); + } else if (nentries > howmany(tabsize, sizeof(ACPI_EINJ_ENTRY))) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " truncated to %zu entries\n", + nentries*sizeof(ACPI_EINJ_ENTRY)); + nentries = howmany(tabsize, sizeof(ACPI_EINJ_ENTRY)); + } + + /* + * Unmap the header and map the whole table instead. + */ + AcpiOsUnmapMemory(teatab, mapsize); + mapsize = tabsize; + teatab = AcpiOsMapMemory(teatab_pa, mapsize); + + /* + * Initialize the machine to execute the TRIGGER_ERROR action's + * instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + + /* + * Now iterate over the EINJ-type entries and execute the + * trigger error action instructions -- but skip if they're not + * for the TRIGGER_ERROR action, and stop if they're truncated. + * + * Entries are fixed-size, so we can just index them. + */ + entry = (ACPI_EINJ_ENTRY *)(teatab + 1); + for (i = 0; i < nentries; i++) { + ACPI_WHEA_HEADER *const header = &entry[i].WheaHeader; + + /* + * Verify the action is TRIGGER_ERROR. If not, skip. + */ + if (header->Action != ACPI_EINJ_TRIGGER_ERROR) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " other action: %"PRIu32" (%s)\n", + header->Action, + (header->Action < __arraycount(apei_einj_action) + ? apei_einj_action[header->Action] + : "unknown")); + continue; + } + + /* + * Execute the instruction. Since there's only one + * action, we don't bother with the apei_interp + * machinery to collate instruction tables for each + * action. EINJ instructions don't change ip. + */ + uint32_t ip = i + 1; + apei_einj_instfunc(header, M, &ip, nentries); + KASSERT(ip == i + 1); + } + +out: if (teatab) { + AcpiOsUnmapMemory(teatab, mapsize); + teatab = NULL; + mapsize = 0; + } + return M->y; +} + +/* + * apei_einj_action_sysctl: + * + * Handle sysctl queries under hw.acpi.apei.einj.action.*. + */ +static int +apei_einj_action_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + enum AcpiEinjActions action; + struct sysctlnode node = *rnode; + uint64_t v; + int error; + + /* + * As a defence against mistakes, require the user to specify a + * write. + */ + if (newp == NULL) { + error = ENOENT; + goto out; + } + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Identify the requested action. If we don't recognize it, + * fail with EINVAL. + */ + switch (node.sysctl_num) { + case ACPI_EINJ_BEGIN_OPERATION: + case ACPI_EINJ_GET_TRIGGER_TABLE: + case ACPI_EINJ_SET_ERROR_TYPE: + case ACPI_EINJ_GET_ERROR_TYPE: + case ACPI_EINJ_END_OPERATION: + case ACPI_EINJ_EXECUTE_OPERATION: + case ACPI_EINJ_CHECK_BUSY_STATUS: + case ACPI_EINJ_GET_COMMAND_STATUS: + case ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS: + case ACPI_EINJ_GET_EXECUTE_TIMINGS: + action = node.sysctl_num; + break; + default: + error = ENOENT; + goto out; + } + + /* + * Kludge: Copy the `new value' for the sysctl in as an input + * to the injection action. + */ + error = sysctl_copyin(curlwp, newp, &v, sizeof(v)); + if (error) + goto out; + + /* + * Perform the EINJ action by following the table's + * instructions. + */ + v = apei_einj_act(sc, action, v); + + /* + * Return the output of the operation as the `old value' of the + * sysctl. This also updates v with what was written to the + * sysctl was written, but we don't care because we already + * read that in and acted on it. + */ + node.sysctl_data = &v; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} + +/* + * apei_einj_trigger_sysctl + * + * Handle sysctl hw.acpi.apei.einj.trigger. + */ +static int +apei_einj_trigger_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + struct sysctlnode node = *rnode; + uint64_t v; + int error; + + /* + * As a defence against mistakes, require the user to specify a + * write. + */ + if (newp == NULL) { + error = ENOENT; + goto out; + } + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Kludge: Copy the `new value' for the sysctl in as an input + * to the trigger action. + */ + error = sysctl_copyin(curlwp, newp, &v, sizeof(v)); + if (error) + goto out; + + /* + * Perform the TRIGGER_ERROR action. + */ + v = apei_einj_trigger(sc, v); + + /* + * Return the output of the operation as the `old value' of the + * sysctl. This also updates v with what was written to the + * sysctl was written, but we don't care because we already + * read that in and acted on it. + */ + node.sysctl_data = &v; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} + +/* + * apei_einj_types_sysctl + * + * Handle sysctl hw.acpi.apei.einj.types. + */ +static int +apei_einj_types_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + struct sysctlnode node = *rnode; + uint64_t types; + int error; + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + * + * XXX Is this necessary? Shouldn't sysctl_teardown take care + * of preventing new sysctl calls and waiting until all pending + * sysctl calls are done? + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Perform the GET_ERROR_TYPE action and return the value to + * sysctl. + * + * XXX Should this do it between BEGIN_INJECTION_OPERATION and + * END_OPERATION? + */ + types = apei_einj_act(sc, ACPI_EINJ_GET_ERROR_TYPE, 0); + node.sysctl_data = &types; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} diff --git a/sys/dev/acpi/apei_einjvar.h b/sys/dev/acpi/apei_einjvar.h new file mode 100644 index 000000000000..39c2633ff0cd --- /dev/null +++ b/sys/dev/acpi/apei_einjvar.h @@ -0,0 +1,48 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_EINJVAR_H_ +#define _SYS_DEV_ACPI_APEI_EINJVAR_H_ + +struct apei_interp; +struct apei_softc; + +/* + * struct apei_einj_softc + * + * Software state for error injection actions described in the + * EINJ, Error Injection Table. + */ +struct apei_einj_softc { + struct apei_interp *jsc_interp; +}; + +void apei_einj_attach(struct apei_softc *); +void apei_einj_detach(struct apei_softc *); + +#endif /* _SYS_DEV_ACPI_APEI_EINJVAR_H_ */ diff --git a/sys/dev/acpi/apei_erst.c b/sys/dev/acpi/apei_erst.c new file mode 100644 index 000000000000..9c3d0ad0a3d2 --- /dev/null +++ b/sys/dev/acpi/apei_erst.c @@ -0,0 +1,577 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI ERST -- Error Record Serialization Table + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-serialization + * + * XXX Expose this through a /dev node with ioctls and/or through a + * file system. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +static bool apei_erst_instvalid(ACPI_WHEA_HEADER *, uint32_t, uint32_t); +static void apei_erst_instfunc(ACPI_WHEA_HEADER *, void *, uint32_t *, + uint32_t); +static uint64_t apei_erst_act(struct apei_softc *, enum AcpiErstActions, + uint64_t); + +/* + * apei_erst_action + * + * Symbolic names of the APEI ERST (Error Record Serialization + * Table) logical actions are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-record-serialization-actions-table + */ +static const char *const apei_erst_action[] = { + [ACPI_ERST_BEGIN_WRITE] = "begin_write_operation", + [ACPI_ERST_BEGIN_READ] = "begin_read_operation", + [ACPI_ERST_BEGIN_CLEAR] = "begin_clear_operation", + [ACPI_ERST_END] = "end_operation", + [ACPI_ERST_SET_RECORD_OFFSET] = "set_record_offset", + [ACPI_ERST_EXECUTE_OPERATION] = "execute_operation", + [ACPI_ERST_CHECK_BUSY_STATUS] = "check_busy_status", + [ACPI_ERST_GET_COMMAND_STATUS] = "get_command_status", + [ACPI_ERST_GET_RECORD_ID] = "get_record_identifier", + [ACPI_ERST_SET_RECORD_ID] = "set_record_identifier", + [ACPI_ERST_GET_RECORD_COUNT] = "get_record_count", + [ACPI_ERST_BEGIN_DUMMY_WRIITE] = "begin_dummy_write_operation", + [ACPI_ERST_NOT_USED] = "reserved", + [ACPI_ERST_GET_ERROR_RANGE] = "get_error_log_address_range", + [ACPI_ERST_GET_ERROR_LENGTH] = "get_error_log_address_range_length", + [ACPI_ERST_GET_ERROR_ATTRIBUTES] = + "get_error_log_address_range_attributes", + [ACPI_ERST_EXECUTE_TIMINGS] = "get_execute_operations_timings", +}; + +/* + * apei_erst_instruction + * + * Symbolic names of the APEI ERST (Error Record Serialization + * Table) instructions to implement logical actions are taken (and + * downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#serialization-instructions + */ +static const char *apei_erst_instruction[] = { + [ACPI_ERST_READ_REGISTER] = "read_register", + [ACPI_ERST_READ_REGISTER_VALUE] = "read_register_value", + [ACPI_ERST_WRITE_REGISTER] = "write_register", + [ACPI_ERST_WRITE_REGISTER_VALUE] = "write_register_value", + [ACPI_ERST_NOOP] = "noop", + [ACPI_ERST_LOAD_VAR1] = "load_var1", + [ACPI_ERST_LOAD_VAR2] = "load_var2", + [ACPI_ERST_STORE_VAR1] = "store_var1", + [ACPI_ERST_ADD] = "add", + [ACPI_ERST_SUBTRACT] = "subtract", + [ACPI_ERST_ADD_VALUE] = "add_value", + [ACPI_ERST_SUBTRACT_VALUE] = "subtract_value", + [ACPI_ERST_STALL] = "stall", + [ACPI_ERST_STALL_WHILE_TRUE] = "stall_while_true", + [ACPI_ERST_SKIP_NEXT_IF_TRUE] = "skip_next_instruction_if_true", + [ACPI_ERST_GOTO] = "goto", + [ACPI_ERST_SET_SRC_ADDRESS_BASE] = "set_src_address_base", + [ACPI_ERST_SET_DST_ADDRESS_BASE] = "set_dst_address_base", + [ACPI_ERST_MOVE_DATA] = "move_data", +}; + +/* + * XXX dtrace and kernhist + */ +static void +apei_pmemmove(uint64_t pdst, uint64_t psrc, uint64_t nbytes) +{ + char *vdst, *vsrc; + + aprint_debug("ERST: move" + " %"PRIu64" bytes from 0x%"PRIx64" to 0x%"PRIx64"\n", + nbytes, psrc, pdst); + + /* + * Carefully check for overlap. + */ + if (pdst < psrc && psrc < pdst + nbytes) { + /* + * psrc ______ psrc + nbytes + * / \ + * <---------------------> + * \______/ + * pdst pdst + nbytes + */ + vdst = AcpiOsMapMemory(pdst, nbytes + (psrc - pdst)); + vsrc = vdst + (psrc - pdst); + memmove(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vdst, nbytes + (psrc - pdst)); + } else if (psrc < pdst && pdst < psrc + nbytes) { + /* + * psrc ______ psrc + nbytes + * / \ + * <---------------------> + * \______/ + * pdst pdst + nbytes + */ + vsrc = AcpiOsMapMemory(psrc, nbytes + (pdst - psrc)); + vdst = vsrc + (pdst - psrc); + memmove(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vsrc, nbytes + (pdst - psrc)); + } else { + /* + * No overlap. + */ + vdst = AcpiOsMapMemory(pdst, nbytes); + vsrc = AcpiOsMapMemory(psrc, nbytes); + memcpy(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vsrc, nbytes); + AcpiOsUnmapMemory(vdst, nbytes); + } +} + +/* + * apei_erst_attach(sc) + * + * Scan the Error Record Serialization Table to collate the + * instructions for each ERST action. + */ +void +apei_erst_attach(struct apei_softc *sc) +{ + ACPI_TABLE_ERST *erst = sc->sc_tab.erst; + struct apei_erst_softc *ssc = &sc->sc_erst; + ACPI_ERST_ENTRY *entry; + uint32_t i, nentries, maxnentries; + + /* + * Verify the table length, table header length, and + * instruction entry count are all sensible. If the header is + * truncated, stop here; if the entries are truncated, stop at + * the largest integral number of full entries that fits. + */ + if (erst->Header.Length < sizeof(*erst)) { + aprint_error_dev(sc->sc_dev, "ERST: truncated table:" + " %"PRIu32" < %zu minimum bytes\n", + erst->Header.Length, sizeof(*erst)); + return; + } + if (erst->HeaderLength < + sizeof(*erst) - offsetof(ACPI_TABLE_ERST, HeaderLength)) { + aprint_error_dev(sc->sc_dev, "ERST: truncated header:" + " %"PRIu32" < %zu bytes\n", + erst->HeaderLength, + sizeof(*erst) - offsetof(ACPI_TABLE_ERST, HeaderLength)); + return; + } + nentries = erst->Entries; + maxnentries = (erst->Header.Length - sizeof(*erst))/sizeof(*entry); + if (nentries > maxnentries) { + aprint_error_dev(sc->sc_dev, "ERST: excessive entries:" + " %"PRIu32", truncating to %"PRIu32"\n", + nentries, maxnentries); + nentries = maxnentries; + } + if (nentries*sizeof(*entry) < erst->Header.Length - sizeof(*erst)) { + aprint_error_dev(sc->sc_dev, "ERST:" + " %zu bytes of trailing garbage after last entry\n", + erst->Header.Length - nentries*sizeof(*entry)); + } + + /* + * Create an interpreter for ERST actions. + */ + ssc->ssc_interp = apei_interp_create("ERST", + apei_erst_action, __arraycount(apei_erst_action), + apei_erst_instruction, __arraycount(apei_erst_instruction), + apei_erst_instvalid, apei_erst_instfunc); + + /* + * Compile the interpreter from the ERST action instruction + * table. + */ + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < nentries; i++, entry++) + apei_interp_pass1_load(ssc->ssc_interp, i, &entry->WheaHeader); + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < nentries; i++, entry++) { + apei_interp_pass2_verify(ssc->ssc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass3_alloc(ssc->ssc_interp); + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < nentries; i++, entry++) { + apei_interp_pass4_assemble(ssc->ssc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass5_verify(ssc->ssc_interp); + + /* + * Print some basic information about the stored records. + */ + uint64_t logaddr = apei_erst_act(sc, ACPI_ERST_GET_ERROR_RANGE, 0); + uint64_t logbytes = apei_erst_act(sc, ACPI_ERST_GET_ERROR_LENGTH, 0); + uint64_t attr = apei_erst_act(sc, ACPI_ERST_GET_ERROR_ATTRIBUTES, 0); + uint64_t nrecords = apei_erst_act(sc, ACPI_ERST_GET_RECORD_COUNT, 0); + char attrbuf[128]; + + /* XXX define this format somewhere */ + snprintb(attrbuf, sizeof(attrbuf), "\177\020" + "\001" "NVRAM\0" + "\002" "SLOW\0" + "\0", attr); + + aprint_normal_dev(sc->sc_dev, "ERST: %"PRIu64" records in error log" + " %"PRIu64" bytes @ 0x%"PRIx64" attr=%s\n", + nrecords, logbytes, logaddr, attrbuf); + + /* + * XXX wire up to sysctl or a file system or something, and/or + * dmesg or crash dumps + */ +} + +/* + * apei_erst_detach(sc) + * + * Free software resource allocated for ERST handling. + */ +void +apei_erst_detach(struct apei_softc *sc) +{ + struct apei_erst_softc *ssc = &sc->sc_erst; + + if (ssc->ssc_interp) { + apei_interp_destroy(ssc->ssc_interp); + ssc->ssc_interp = NULL; + } +} + +/* + * apei_erst_instvalid(header, ninst, i) + * + * Routine to validate the ith entry, for an action with ninst + * instructions. + */ +static bool +apei_erst_instvalid(ACPI_WHEA_HEADER *header, uint32_t ninst, uint32_t i) +{ + + switch (header->Instruction) { + case ACPI_ERST_GOTO: + if (header->Value > ninst) { + aprint_error("ERST[%"PRIu32"]:" + " GOTO(%"PRIu64") out of bounds," + " disabling action %"PRIu32" (%s)\n", i, + header->Value, + header->Action, + apei_erst_action[header->Action]); + return false; + } + } + return true; +} + +/* + * struct apei_erst_machine + * + * Machine state for executing ERST instructions. + */ +struct apei_erst_machine { + struct apei_softc *sc; + uint64_t x; /* in */ + uint64_t y; /* out */ + uint64_t var1; + uint64_t var2; + uint64_t src_base; + uint64_t dst_base; +}; + +/* + * apei_erst_instfunc(header, cookie, &ip, maxip) + * + * Run a single instruction in the service of performing an ERST + * action. Updates the ERST machine at cookie, and the ip if + * necessary, in place. + * + * On entry, ip points to the next instruction after this one + * sequentially; on exit, ip points to the next instruction to + * execute. + */ +static void +apei_erst_instfunc(ACPI_WHEA_HEADER *header, void *cookie, uint32_t *ipp, + uint32_t maxip) +{ + struct apei_erst_machine *const M = cookie; + ACPI_STATUS rv = AE_OK; + + /* + * Abbreviate some of the intermediate quantities to make the + * instruction logic conciser and more legible. + */ + const uint8_t BitOffset = header->RegisterRegion.BitOffset; + const uint64_t Mask = header->Mask; + const uint64_t Value = header->Value; + ACPI_GENERIC_ADDRESS *const reg = &header->RegisterRegion; + const bool preserve_register = header->Flags & ACPI_ERST_PRESERVE; + + aprint_debug_dev(M->sc->sc_dev, "%s: instr=0x%02"PRIx8 + " (%s)" + " Address=0x%"PRIx64 + " BitOffset=%"PRIu8" Mask=0x%"PRIx64" Value=0x%"PRIx64 + " Flags=0x%"PRIx8"\n", + __func__, header->Instruction, + (header->Instruction < __arraycount(apei_erst_instruction) + ? apei_erst_instruction[header->Instruction] + : "unknown"), + reg->Address, + BitOffset, Mask, Value, + header->Flags); + + /* + * Zero-initialize the output by default. + */ + M->y = 0; + + /* + * Dispatch the instruction. + */ + switch (header->Instruction) { + case ACPI_ERST_READ_REGISTER: + rv = apei_read_register(reg, Mask, &M->y); + break; + case ACPI_ERST_READ_REGISTER_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->y = (v == Value ? 1 : 0); + break; + } + case ACPI_ERST_WRITE_REGISTER: + rv = apei_write_register(reg, Mask, preserve_register, M->x); + break; + case ACPI_ERST_WRITE_REGISTER_VALUE: + rv = apei_write_register(reg, Mask, preserve_register, Value); + break; + case ACPI_ERST_NOOP: + break; + case ACPI_ERST_LOAD_VAR1: + rv = apei_read_register(reg, Mask, &M->var1); + break; + case ACPI_ERST_LOAD_VAR2: + rv = apei_read_register(reg, Mask, &M->var2); + break; + case ACPI_ERST_STORE_VAR1: + rv = apei_write_register(reg, Mask, preserve_register, + M->var1); + break; + case ACPI_ERST_ADD: + M->var1 += M->var2; + break; + case ACPI_ERST_SUBTRACT: + /* + * The specification at + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#serialization-instructions + * says: + * + * 0x09 SUBTRACT Subtracts VAR1 from VAR2 + * and stores the result in + * VAR1. + * + * So, according to the spec, this is _not_ simply + * + * M->var1 -= M->var2; + */ + M->var1 = M->var2 - M->var1; + break; + case ACPI_ERST_ADD_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + v += Value; + + rv = apei_write_register(reg, Mask, preserve_register, v); + break; + } + case ACPI_ERST_SUBTRACT_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + v -= Value; + + rv = apei_write_register(reg, Mask, preserve_register, v); + break; + } + case ACPI_ERST_STALL: + DELAY(Value); /* XXX avoid excessive delays */ + break; + case ACPI_ERST_STALL_WHILE_TRUE: + for (;;) { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + if (v != Value) + break; + DELAY(M->var1); + } + break; + case ACPI_ERST_SKIP_NEXT_IF_TRUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + /* + * If reading the register yields Value, skip the next + * instruction -- unless that would run past the end of + * the instruction buffer. + */ + if (v == Value) { + if (*ipp < maxip) + (*ipp)++; + } + break; + } + case ACPI_ERST_GOTO: + if (Value >= maxip) /* paranoia */ + *ipp = maxip; + else + *ipp = Value; + break; + case ACPI_ERST_SET_SRC_ADDRESS_BASE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->src_base = v; + break; + } + case ACPI_ERST_SET_DST_ADDRESS_BASE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->src_base = v; + break; + } + case ACPI_ERST_MOVE_DATA: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + apei_pmemmove(M->dst_base + v, M->src_base + v, M->var2); + break; + } + default: + break; + } + + /* + * If any register I/O failed, print the failure message. This + * could be more specific about exactly what failed, but that + * takes a little more effort to write. + */ + if (ACPI_FAILURE(rv)) { + aprint_debug_dev(M->sc->sc_dev, "%s: failed: %s\n", __func__, + AcpiFormatException(rv)); + } +} + +/* + * apei_erst_act(sc, action, x) + * + * Perform the named ERST action with input x, by stepping through + * all the instructions defined for the action by the ERST, and + * return the output. + */ +static uint64_t +apei_erst_act(struct apei_softc *sc, enum AcpiErstActions action, uint64_t x) +{ + struct apei_erst_softc *const ssc = &sc->sc_erst; + struct apei_erst_machine erst_machine, *const M = &erst_machine; + + aprint_debug_dev(sc->sc_dev, "%s: action=%d (%s) input=0x%"PRIx64"\n", + __func__, + action, + (action < __arraycount(apei_erst_action) + ? apei_erst_action[action] + : "unknown"), + x); + + /* + * Initialize the machine to execute the action's instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + M->var1 = 0; + M->var2 = 0; + M->src_base = 0; + M->dst_base = 0; + + /* + * Run the interpreter. + */ + apei_interpret(ssc->ssc_interp, action, M); + + /* + * Return the result. + */ + aprint_debug_dev(sc->sc_dev, "%s: output=0x%"PRIx64"\n", __func__, + M->y); + return M->y; +} diff --git a/sys/dev/acpi/apei_erstvar.h b/sys/dev/acpi/apei_erstvar.h new file mode 100644 index 000000000000..c2e7117b1066 --- /dev/null +++ b/sys/dev/acpi/apei_erstvar.h @@ -0,0 +1,49 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_ERSTVAR_H_ +#define _SYS_DEV_ACPI_APEI_ERSTVAR_H_ + +struct apei_interp; +struct apei_softc; + +/* + * struct apei_erst_softc + * + * Software state for error serialization actions described in the + * ERST, Error Record Serialization Table. + */ +struct apei_erst_softc { + struct apei_interp *ssc_interp; +}; + +void apei_erst_attach(struct apei_softc *); +void apei_erst_detach(struct apei_softc *); + +#endif /* _SYS_DEV_ACPI_APEI_ERSTVAR_H_ */ + diff --git a/sys/dev/acpi/apei_hed.h b/sys/dev/acpi/apei_hed.h new file mode 100644 index 000000000000..988be5292c0a --- /dev/null +++ b/sys/dev/acpi/apei_hed.h @@ -0,0 +1,34 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_HED_H_ +#define _SYS_DEV_ACPI_APEI_HED_H_ + +void apei_hed_notify(void); + +#endif /* _SYS_DEV_ACPI_APEI_HED_H_ */ diff --git a/sys/dev/acpi/apei_hest.c b/sys/dev/acpi/apei_hest.c new file mode 100644 index 000000000000..e5367eee5dfd --- /dev/null +++ b/sys/dev/acpi/apei_hest.c @@ -0,0 +1,1017 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI HEST -- Hardware Error Source Table + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#acpi-error-source + * + * XXX uncorrectable error NMI comes in on all CPUs at once, what to do? + * + * XXX AMD MCA + * + * XXX IA32 machine check stuff + * + * XXX switch-to-polling for GHES notifications + * + * XXX error threshold for GHES notifications + * + * XXX sort out interrupt notification types, e.g. do we ever need to + * do acpi_intr_establish? + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if defined(__i386__) || defined(__x86_64__) +#include +#endif + +#include "ioconf.h" + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +/* + * apei_hest_ghes_handle(sc, src) + * + * Check for, report, and acknowledge any error from a Generic + * Hardware Error Source (GHES, not GHESv2). Return true if there + * was any error to report, false if not. + */ +static bool +apei_hest_ghes_handle(struct apei_softc *sc, struct apei_source *src) +{ + ACPI_HEST_GENERIC *ghes = container_of(src->as_header, + ACPI_HEST_GENERIC, Header); + ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb; + char ctx[sizeof("error source 65535")]; + uint32_t status; + bool fatal = false; + + /* + * Process and report any error. + */ + snprintf(ctx, sizeof(ctx), "error source %"PRIu16, + ghes->Header.SourceId); + status = apei_gesb_report(sc, src->as_ghes.gesb, + ghes->ErrorBlockLength, ctx, &fatal); + + /* + * Acknowledge the error by clearing the block status. To + * avoid races, we probably have to avoid further access to the + * GESB until we get another notification. + * + * As a precaution, we zero this with atomic compare-and-swap + * so at least we can see if the status changed while we were + * working on it. + * + * It is tempting to clear bits with atomic and-complement, but + * the BlockStatus is not just a bit mask -- bits [13:4] are a + * count of Generic Error Data Entries, and who knows what bits + * [31:14] might be used for in the future. + * + * XXX The GHES(v1) protocol is unclear from the specification + * here. The GHESv2 protocol has a separate register write to + * acknowledge, which is a bit clearer. + */ + membar_release(); + const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0); + if (status1 != status) { + device_printf(sc->sc_dev, "%s: status changed from" + " 0x%"PRIx32" to 0x%"PRIx32"\n", + ctx, status, status1); + } + + /* + * If the error was fatal, panic now. + */ + if (fatal) + panic("fatal hardware error"); + + return status != 0; +} + +/* + * apei_hest_ghes_v2_handle(sc, src) + * + * Check for, report, and acknowledge any error from a Generic + * Hardware Error Source v2. Return true if there was any error + * to report, false if not. + */ +static bool +apei_hest_ghes_v2_handle(struct apei_softc *sc, struct apei_source *src) +{ + ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(src->as_header, + ACPI_HEST_GENERIC_V2, Header); + ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb; + char ctx[sizeof("error source 65535")]; + uint64_t X; + uint32_t status; + bool fatal; + + /* + * Process and report any error. + */ + snprintf(ctx, sizeof(ctx), "error source %"PRIu16, + ghes_v2->Header.SourceId); + status = apei_gesb_report(sc, src->as_ghes.gesb, + ghes_v2->ErrorBlockLength, ctx, &fatal); + + /* + * First clear the block status. As a precaution, we zero this + * with atomic compare-and-swap so at least we can see if the + * status changed while we were working on it. + */ + membar_release(); + const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0); + if (status1 != status) { + device_printf(sc->sc_dev, "%s: status changed from" + " 0x%"PRIx32" to 0x%"PRIx32"\n", + ctx, status, status1); + } + + /* + * Next, do the Read Ack dance. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-hardware-error-source-version-2-ghesv2-type-10 + */ + X = apei_mapreg_read(&ghes_v2->ReadAckRegister, + src->as_ghes_v2.read_ack); + X &= ghes_v2->ReadAckPreserve; + X |= ghes_v2->ReadAckWrite; + apei_mapreg_write(&ghes_v2->ReadAckRegister, + src->as_ghes_v2.read_ack, X); + + /* + * If the error was fatal, panic now. + */ + if (fatal) + panic("fatal hardware error"); + + return status != 0; +} + +/* + * apei_hest_ghes_poll(cookie) + * + * Callout handler for periodic polling of a Generic Hardware + * Error Source (GHES, not GHESv2), using Notification Type `0 - + * Polled'. + * + * cookie is the struct apei_source pointer for a single source; + * if there are multiple sources there will be multiple callouts. + */ +static void +apei_hest_ghes_poll(void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + ACPI_HEST_GENERIC *ghes = container_of(src->as_header, + ACPI_HEST_GENERIC, Header); + + /* + * Process and acknowledge any error. + */ + (void)apei_hest_ghes_handle(sc, src); + + /* + * Schedule polling again after the firmware-suggested + * interval. + */ + callout_schedule(&src->as_ch, + MAX(1, mstohz(ghes->Notify.PollInterval))); +} + +/* + * apei_hest_ghes_v2_poll(cookie) + * + * Callout handler for periodic polling of a Generic Hardware + * Error Source v2, using Notification Type `0 - Polled'. + * + * cookie is the struct apei_source pointer for a single source; + * if there are multiple sources there will be multiple callouts. + */ +static void +apei_hest_ghes_v2_poll(void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(src->as_header, + ACPI_HEST_GENERIC_V2, Header); + + /* + * Process and acknowledge any error. + */ + (void)apei_hest_ghes_v2_handle(sc, src); + + /* + * Schedule polling again after the firmware-suggested + * interval. + */ + callout_schedule(&src->as_ch, + MAX(1, mstohz(ghes_v2->Notify.PollInterval))); +} + +#if defined(__i386__) || defined(__x86_64__) + +/* + * The NMI is (sometimes?) delivered to all CPUs at once. To reduce + * confusion, let's try to have only one CPU process error + * notifications at a time. + */ +static __cpu_simple_lock_t apei_hest_nmi_lock; + +/* + * apei_hest_ghes_nmi(tf, cookie) + * + * Nonmaskable interrupt handler for Generic Hardware Error + * Sources (GHES, not GHESv2) with Notification Type `4 - NMI'. + */ +static int +apei_hest_ghes_nmi(const struct trapframe *tf, void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + + __cpu_simple_lock(&apei_hest_nmi_lock); + const bool mine = apei_hest_ghes_handle(sc, src); + __cpu_simple_unlock(&apei_hest_nmi_lock); + + /* + * Tell the NMI subsystem whether this interrupt could have + * been for us or not. + */ + return mine; +} + +/* + * apei_hest_ghes_v2_nmi(tf, cookie) + * + * Nonmaskable interrupt handler for Generic Hardware Error + * Sources v2 with Notification Type `4 - NMI'. + */ +static int +apei_hest_ghes_v2_nmi(const struct trapframe *tf, void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + + __cpu_simple_lock(&apei_hest_nmi_lock); + const bool mine = apei_hest_ghes_v2_handle(sc, src); + __cpu_simple_unlock(&apei_hest_nmi_lock); + + /* + * Tell the NMI subsystem whether this interrupt could have + * been for us or not. + */ + return mine; +} + +#endif /* defined(__i386__) || defined(__x86_64__) */ + +/* + * apei_hest_attach_ghes(sc, ghes, i) + * + * Attach a Generic Hardware Error Source (GHES, not GHESv2) as + * the ith source in the Hardware Error Source Table. + * + * After this point, the system will check for and handle errors + * when notified by this source. + */ +static void +apei_hest_attach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + uint64_t addr; + ACPI_STATUS rv; + char ctx[sizeof("HEST[4294967295, Id=65535]")]; + + snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]", + i, ghes->Header.SourceId); + + /* + * Verify the source is enabled before proceeding. The Enabled + * field is 8 bits with 256 possibilities, but only two of the + * possibilities, 0 and 1, have semantics defined in the spec, + * so out of an abundance of caution let's tread carefully in + * case anything changes and noisily reject any values other + * than 1. + */ + switch (ghes->Enabled) { + case 1: + break; + case 0: + aprint_debug_dev(sc->sc_dev, "%s: disabled\n", ctx); + return; + default: + aprint_error_dev(sc->sc_dev, "%s: unknown GHES Enabled state:" + " 0x%"PRIx8"\n", ctx, ghes->Enabled); + return; + } + + /* + * Verify the Error Status Address bit width is at most 64 bits + * before proceeding with this source. When we get 128-bit + * addressing, this code will have to be updated. + */ + if (ghes->ErrorStatusAddress.BitWidth > 64) { + aprint_error_dev(sc->sc_dev, "%s: excessive address bits:" + " %"PRIu8"\n", ctx, ghes->ErrorStatusAddress.BitWidth); + return; + } + + /* + * Read the GHES Error Status Addresss. This is the physical + * address of a GESB, Generic Error Status Block. Why the + * physical address is exposed via this indirection, and not + * simply stored directly in the GHES, is unclear to me. + * Hoping it's not because the address can change dynamically, + * because the error handling path shouldn't involve mapping + * anything. + */ + rv = AcpiRead(&addr, &ghes->ErrorStatusAddress); + if (ACPI_FAILURE(rv)) { + aprint_error_dev(sc->sc_dev, "%s:" + " failed to read error status address: %s", ctx, + AcpiFormatException(rv)); + return; + } + aprint_debug_dev(sc->sc_dev, "%s: error status @ 0x%"PRIx64"\n", ctx, + addr); + + /* + * Initialize the source and map the GESB so we can get at it + * in the error handling path. + */ + src->as_sc = sc; + src->as_header = &ghes->Header; + src->as_ghes.gesb = AcpiOsMapMemory(addr, ghes->ErrorBlockLength); + + /* + * Arrange to receive notifications. + */ + switch (ghes->Notify.Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_init(&src->as_ch, CALLOUT_MPSAFE); + callout_setfunc(&src->as_ch, &apei_hest_ghes_poll, src); + callout_schedule(&src->as_ch, 0); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * SCI and GPIO notifications are delivered through + * Hardware Error Device (PNP0C33) events. + * + * XXX Where is this spelled out? The text at + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources + * is vague. + */ + SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry); + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + src->as_nmi = nmi_establish(&apei_hest_ghes_nmi, src); + break; +#endif + } + + /* + * Now that we have notification set up, process and + * acknowledge the initial GESB report if any. + */ + apei_hest_ghes_handle(sc, src); +} + +/* + * apei_hest_detach_ghes(sc, ghes, i) + * + * Detach the ith source, which is a Generic Hardware Error Source + * (GHES, not GHESv2). + * + * After this point, the system will ignore notifications from + * this source. + */ +static void +apei_hest_detach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + + /* + * Arrange to stop receiving notifications. + */ + switch (ghes->Notify.Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_halt(&src->as_ch, NULL); + callout_destroy(&src->as_ch); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * No need to spend time removing the entry; no further + * calls via apei_hed_notify are possible at this + * point, now that detach has begun. + */ + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + nmi_disestablish(src->as_nmi); + src->as_nmi = NULL; + break; +#endif + } + + /* + * No more notifications. Unmap the GESB and destroy the + * interrupt source now that it will no longer be used in + * error handling path. + */ + AcpiOsUnmapMemory(src->as_ghes.gesb, ghes->ErrorBlockLength); + src->as_ghes.gesb = NULL; + src->as_header = NULL; + src->as_sc = NULL; +} + + +/* + * apei_hest_attach_ghes_v2(sc, ghes_v2, i) + * + * Attach a Generic Hardware Error Source v2 as the ith source in + * the Hardware Error Source Table. + * + * After this point, the system will check for and handle errors + * when notified by this source. + */ +static void +apei_hest_attach_ghes_v2(struct apei_softc *sc, ACPI_HEST_GENERIC_V2 *ghes_v2, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + uint64_t addr; + struct apei_mapreg *read_ack; + ACPI_STATUS rv; + char ctx[sizeof("HEST[4294967295, Id=65535]")]; + + snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]", + i, ghes_v2->Header.SourceId); + + /* + * Verify the source is enabled before proceeding. The Enabled + * field is 8 bits with 256 possibilities, but only two of the + * possibilities, 0 and 1, have semantics defined in the spec, + * so out of an abundance of caution let's tread carefully in + * case anything changes and noisily reject any values other + * than 1. + */ + switch (ghes_v2->Enabled) { + case 1: + break; + case 0: + aprint_debug_dev(sc->sc_dev, "%s: disabled\n", ctx); + return; + default: + aprint_error_dev(sc->sc_dev, "%s:" + " unknown GHESv2 Enabled state: 0x%"PRIx8"\n", ctx, + ghes_v2->Enabled); + return; + } + + /* + * Verify the Error Status Address bit width is at most 64 bits + * before proceeding with this source. When we get 128-bit + * addressing, this code will have to be updated. + */ + if (ghes_v2->ErrorStatusAddress.BitWidth > 64) { + aprint_error_dev(sc->sc_dev, "%s: excessive address bits:" + " %"PRIu8"\n", ctx, ghes_v2->ErrorStatusAddress.BitWidth); + return; + } + + /* + * Read the GHESv2 Error Status Addresss. This is the physical + * address of a GESB, Generic Error Status Block. Why the + * physical address is exposed via this indirection, and not + * simply stored directly in the GHESv2, is unclear to me. + * Hoping it's not because the address can change dynamically, + * because the error handling path shouldn't involve mapping + * anything. + */ + rv = AcpiRead(&addr, &ghes_v2->ErrorStatusAddress); + if (ACPI_FAILURE(rv)) { + aprint_error_dev(sc->sc_dev, "%s:" + " failed to read error status address: %s", ctx, + AcpiFormatException(rv)); + return; + } + aprint_debug_dev(sc->sc_dev, "%s: error status @ 0x%"PRIx64"\n", ctx, + addr); + + /* + * Try to map the Read Ack register up front, so we don't have + * to allocate and free kva in AcpiRead/AcpiWrite at the time + * we're handling an error. Bail if we can't. + */ + read_ack = apei_mapreg_map(&ghes_v2->ReadAckRegister); + if (read_ack == NULL) { + aprint_error_dev(sc->sc_dev, "%s:" + " unable to map Read Ack register\n", ctx); + return; + } + + /* + * Initialize the source and map the GESB it in the error + * handling path. + */ + src->as_sc = sc; + src->as_header = &ghes_v2->Header; + src->as_ghes_v2.gesb = AcpiOsMapMemory(addr, + ghes_v2->ErrorBlockLength); + src->as_ghes_v2.read_ack = read_ack; + + /* + * Arrange to receive notifications. + */ + switch (ghes_v2->Notify.Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_init(&src->as_ch, CALLOUT_MPSAFE); + callout_setfunc(&src->as_ch, &apei_hest_ghes_v2_poll, src); + callout_schedule(&src->as_ch, 0); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * SCI and GPIO notifications are delivered through + * Hardware Error Device (PNP0C33) events. + * + * XXX Where is this spelled out? The text at + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources + * is vague. + */ + SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry); + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + src->as_nmi = nmi_establish(&apei_hest_ghes_v2_nmi, src); + break; +#endif + } + + /* + * Now that we have notification set up, process and + * acknowledge the initial GESB report if any. + */ + apei_hest_ghes_handle(sc, src); +} + +/* + * apei_hest_detach_ghes_v2(sc, ghes_v2, i) + * + * Detach the ith source, which is a Generic Hardware Error Source + * v2. + * + * After this point, the system will ignore notifications from + * this source. + */ +static void +apei_hest_detach_ghes_v2(struct apei_softc *sc, ACPI_HEST_GENERIC_V2 *ghes_v2, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + + /* + * Arrange to stop receiving notifications. + */ + switch (ghes_v2->Notify.Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_halt(&src->as_ch, NULL); + callout_destroy(&src->as_ch); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * No need to spend time removing the entry; no further + * calls via apei_hed_notify are possible at this + * point, now that detach has begun. + */ + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + nmi_disestablish(src->as_nmi); + src->as_nmi = NULL; + break; +#endif + } + + /* + * No more notifications. Unmap the GESB and read ack register + * now that it will no longer be used in error handling path. + */ + AcpiOsUnmapMemory(src->as_ghes_v2.gesb, ghes_v2->ErrorBlockLength); + src->as_ghes_v2.gesb = NULL; + apei_mapreg_unmap(&ghes_v2->ReadAckRegister, src->as_ghes_v2.read_ack); + src->as_ghes_v2.read_ack = NULL; + src->as_header = NULL; + src->as_sc = NULL; +} + +/* + * apei_hest_attach_source(sc, header, i, size_t maxlen) + * + * Attach the ith source in the Hardware Error Source Table given + * its header, and return a pointer to the header of the next + * source in the table, provided it is no more than maxlen bytes + * past header. Return NULL if the size of the source is unknown + * or would exceed maxlen bytes. + */ +static ACPI_HEST_HEADER * +apei_hest_attach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header, + uint32_t i, size_t maxlen) +{ + char ctx[sizeof("HEST[4294967295, Id=65535]")]; + + snprintf(ctx, sizeof(ctx), "HEST[%"PRIu32", Id=%"PRIu16"]", + i, header->SourceId); + + switch (header->Type) { + case ACPI_HEST_TYPE_IA32_CHECK: { + ACPI_HEST_IA_MACHINE_CHECK *const imc = container_of(header, + ACPI_HEST_IA_MACHINE_CHECK, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*imc)) + return NULL; + maxlen -= sizeof(*imc); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imc + 1); + if (maxlen < imc->NumHardwareBanks*sizeof(*bank)) + return NULL; + return (ACPI_HEST_HEADER *)(bank + imc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: { + ACPI_HEST_IA_CORRECTED *const imcc = container_of(header, + ACPI_HEST_IA_CORRECTED, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*imcc)) + return NULL; + maxlen -= sizeof(*imcc); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imcc + 1); + if (maxlen < imcc->NumHardwareBanks*sizeof(*bank)) + return NULL; + return (ACPI_HEST_HEADER *)(bank + imcc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_IA32_NMI: { + ACPI_HEST_IA_NMI *const ianmi = container_of(header, + ACPI_HEST_IA_NMI, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*ianmi)) + return NULL; + return (ACPI_HEST_HEADER *)(ianmi + 1); + } + case ACPI_HEST_TYPE_AER_ROOT_PORT: { + ACPI_HEST_AER_ROOT *const aerroot = container_of(header, + ACPI_HEST_AER_ROOT, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*aerroot)) + return NULL; + return (ACPI_HEST_HEADER *)(aerroot + 1); + } + case ACPI_HEST_TYPE_AER_ENDPOINT: { + ACPI_HEST_AER *const aer = container_of(header, + ACPI_HEST_AER, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*aer)) + return NULL; + return (ACPI_HEST_HEADER *)(aer + 1); + } + case ACPI_HEST_TYPE_AER_BRIDGE: { + ACPI_HEST_AER_BRIDGE *const aerbridge = container_of(header, + ACPI_HEST_AER_BRIDGE, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*aerbridge)) + return NULL; + return (ACPI_HEST_HEADER *)(aerbridge + 1); + } + case ACPI_HEST_TYPE_GENERIC_ERROR: { + ACPI_HEST_GENERIC *const ghes = container_of(header, + ACPI_HEST_GENERIC, Header); + + if (maxlen < sizeof(*ghes)) + return NULL; + apei_hest_attach_ghes(sc, ghes, i); + return (ACPI_HEST_HEADER *)(ghes + 1); + } + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: { + ACPI_HEST_GENERIC_V2 *const ghes_v2 = container_of(header, + ACPI_HEST_GENERIC_V2, Header); + + if (maxlen < sizeof(*ghes_v2)) + return NULL; + apei_hest_attach_ghes_v2(sc, ghes_v2, i); + return (ACPI_HEST_HEADER *)(ghes_v2 + 1); + } + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: { + ACPI_HEST_IA_DEFERRED_CHECK *const imdc = container_of(header, + ACPI_HEST_IA_DEFERRED_CHECK, Header); + + aprint_error_dev(sc->sc_dev, "%s:" + " unimplemented type: 0x%04"PRIx16"\n", ctx, header->Type); + + if (maxlen < sizeof(*imdc)) + return NULL; + maxlen -= sizeof(*imdc); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imdc + 1); + if (maxlen < imdc->NumHardwareBanks*sizeof(*bank)) + return NULL; + return (ACPI_HEST_HEADER *)(bank + imdc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + default: + aprint_error_dev(sc->sc_dev, "%s: unknown type:" + " 0x%04"PRIx16"\n", ctx, header->Type); + if (header->Type >= 12) { + /* + * `Beginning with error source type 12 and + * onward, each Error Source Structure must + * use the standard Error Source Structure + * Header as defined below.' + * + * Not yet in acpica, though, so we copy this + * down manually. + */ + struct { + UINT16 Type; + UINT16 Length; + } *const essh = (void *)header; + + if (maxlen < sizeof(*essh) || maxlen < essh->Length) + return NULL; + return (ACPI_HEST_HEADER *)((char *)header + + essh->Length); + } + return NULL; + } +} + +/* + * apei_hest_detach_source(sc, header, i) + * + * Detach the ith source in the Hardware Error Status Table. + * Caller is assumed to have stored where each source's header is, + * so no need to return the pointer to the header of the next + * source in the table. + */ +static void +apei_hest_detach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header, + uint32_t i) +{ + + switch (header->Type) { + case ACPI_HEST_TYPE_GENERIC_ERROR: { + ACPI_HEST_GENERIC *ghes = container_of(header, + ACPI_HEST_GENERIC, Header); + + apei_hest_detach_ghes(sc, ghes, i); + break; + } + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: { + ACPI_HEST_GENERIC_V2 *ghes_v2 = container_of(header, + ACPI_HEST_GENERIC_V2, Header); + + apei_hest_detach_ghes_v2(sc, ghes_v2, i); + break; + } + case ACPI_HEST_TYPE_IA32_CHECK: + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: + case ACPI_HEST_TYPE_IA32_NMI: + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + case ACPI_HEST_TYPE_AER_ROOT_PORT: + case ACPI_HEST_TYPE_AER_ENDPOINT: + case ACPI_HEST_TYPE_AER_BRIDGE: + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: + default: + /* XXX shouldn't happen */ + break; + } +} + +/* + * apei_hest_attach(sc) + * + * Scan the Hardware Error Source Table and attach sources + * enumerated in it so we can receive and process hardware errors + * during operation. + */ +void +apei_hest_attach(struct apei_softc *sc) +{ + ACPI_TABLE_HEST *hest = sc->sc_tab.hest; + struct apei_hest_softc *hsc = &sc->sc_hest; + ACPI_HEST_HEADER *header, *next; + uint32_t i, n; + size_t resid; + + /* + * Initialize the HED (Hardware Error Device, PNP0C33) + * notification list so apei_hed_notify becomes a noop with no + * extra effort even if we fail to attach anything. + */ + SIMPLEQ_INIT(&hsc->hsc_hed_list); + + /* + * Verify the table is large enough. + */ + if (hest->Header.Length < sizeof(*hest)) { + aprint_error_dev(sc->sc_dev, "HEST: truncated table:" + " %"PRIu32" < %zu minimum bytes\n", + hest->Header.Length, sizeof(*hest)); + return; + } + + n = hest->ErrorSourceCount; + aprint_normal_dev(sc->sc_dev, "HEST: %"PRIu32 + " hardware error source%s\n", n, n == 1 ? "" : "s"); + + /* + * This could be SIZE_MAX but let's put a smaller arbitrary + * limit on it; if you have gigabytes of HEST something is + * probably wrong. + */ + if (n > INT32_MAX/sizeof(hsc->hsc_source[0])) { + aprint_error_dev(sc->sc_dev, "HEST: too many error sources\n"); + return; + } + hsc->hsc_source = kmem_zalloc(n * sizeof(hsc->hsc_source[0]), + KM_SLEEP); + + header = (ACPI_HEST_HEADER *)(hest + 1); + resid = hest->Header.Length - sizeof(*hest); + for (i = 0; i < n && resid; i++, header = next) { + next = apei_hest_attach_source(sc, header, i, resid); + if (next == NULL) { + aprint_error_dev(sc->sc_dev, "truncated source:" + " %"PRIu32"\n", i); + break; + } + KASSERT((const char *)next - (const char *)header <= resid); + resid -= (const char *)next - (const char *)header; + } + if (resid) { + aprint_error_dev(sc->sc_dev, "HEST:" + " %zu bytes of trailing garbage after %"PRIu32" entries\n", + resid, n); + } +} + +/* + * apei_hest_detach(sc) + * + * Stop receiving and processing hardware error notifications and + * free resources set up from the Hardware Error Source Table. + */ +void +apei_hest_detach(struct apei_softc *sc) +{ + ACPI_TABLE_HEST *hest = sc->sc_tab.hest; + struct apei_hest_softc *hsc = &sc->sc_hest; + uint32_t i, n; + + if (hsc->hsc_source) { + n = hest->ErrorSourceCount; + for (i = 0; i < n; i++) { + struct apei_source *src = &hsc->hsc_source[i]; + ACPI_HEST_HEADER *header = src->as_header; + + if (src->as_header == NULL) + continue; + apei_hest_detach_source(sc, header, i); + } + kmem_free(hsc->hsc_source, n * sizeof(hsc->hsc_source[0])); + hsc->hsc_source = NULL; + } +} + +void +apei_hed_notify(void) +{ + device_t apei0; + struct apei_softc *sc; + struct apei_hest_softc *hsc; + struct apei_source *src; + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) + goto out; + sc = device_private(apei0); + + /* + * If there's no HEST, nothing to do. + */ + if (sc->sc_tab.hest == NULL) + goto out; + hsc = &sc->sc_hest; + + /* + * Walk through the HED-notified hardware error sources and + * check them. The list is stable until we release apei0. + */ + SIMPLEQ_FOREACH(src, &hsc->hsc_hed_list, as_entry) { + ACPI_HEST_HEADER *const header = src->as_header; + + switch (header->Type) { + case ACPI_HEST_TYPE_GENERIC_ERROR: + apei_hest_ghes_handle(sc, src); + break; + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: + apei_hest_ghes_v2_handle(sc, src); + break; + case ACPI_HEST_TYPE_IA32_CHECK: + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: + case ACPI_HEST_TYPE_IA32_NMI: + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + case ACPI_HEST_TYPE_AER_ROOT_PORT: + case ACPI_HEST_TYPE_AER_ENDPOINT: + case ACPI_HEST_TYPE_AER_BRIDGE: +// case ACPI_HEST_TYPE_GENERIC_ERROR: +// case ACPI_HEST_TYPE_GENERIC_ERROR_V2: + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: + default: + /* XXX shouldn't happen */ + break; + } + } + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } +} diff --git a/sys/dev/acpi/apei_hestvar.h b/sys/dev/acpi/apei_hestvar.h new file mode 100644 index 000000000000..a816d69dff55 --- /dev/null +++ b/sys/dev/acpi/apei_hestvar.h @@ -0,0 +1,85 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_HESTVAR_H_ +#define _SYS_DEV_ACPI_APEI_HESTVAR_H_ + +#include +#include + +#include + +struct apei_mapreg; +struct apei_softc; + +#if defined(__i386__) || defined(__x86_64__) +struct nmi_handler; +#endif + +/* + * struct apei_source + * + * Software state for a hardware error source from the HEST, + * Hardware Error Source Table, to process error notifications. + */ +struct apei_source { + struct apei_softc *as_sc; + ACPI_HEST_HEADER *as_header; + union { + struct { + ACPI_HEST_GENERIC_STATUS *gesb; + } as_ghes; + struct { + ACPI_HEST_GENERIC_STATUS *gesb; + struct apei_mapreg *read_ack; + } as_ghes_v2; + }; + union { + struct callout as_ch; +#if defined(__i386__) || defined(__x86_64__) + struct nmi_handler *as_nmi; +#endif + SIMPLEQ_ENTRY(apei_source) as_entry; + }; +}; + +/* + * struct apei_hest_softc + * + * Software state for processing hardware error reports during + * operation, from the HEST, Hardware Error Source table. + */ +struct apei_hest_softc { + struct apei_source *hsc_source; + SIMPLEQ_HEAD(, apei_source) hsc_hed_list; +}; + +void apei_hest_attach(struct apei_softc *); +void apei_hest_detach(struct apei_softc *); + +#endif /* _SYS_DEV_ACPI_APEI_HESTVAR_H_ */ diff --git a/sys/dev/acpi/apei_interp.c b/sys/dev/acpi/apei_interp.c new file mode 100644 index 000000000000..65828a6ae7b9 --- /dev/null +++ b/sys/dev/acpi/apei_interp.c @@ -0,0 +1,382 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI action interpreter. + * + * APEI provides a generalized abstraction to implement the actions an + * OS must take to inject an error, or save state in a persistent error + * record for the next boot, since there are many different hardware + * register interfaces for, e.g., injecting errors. + * + * You might think that APEI, being part of ACPI, would use the usual + * ACPI interpreter to run ACPI methods for these actions. You would + * be wrong. Alas. + * + * Instead, there is an entirely different little language of actions + * that an OS must write programs in to inject errors, and an entirely + * different little language of instructions that the interpreter for + * the actions uses to interpret the OS's error injection program. Got + * it? + * + * The EINJ and ERST tables provide a series entries that look like: + * + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=SKIP_NEXT_INSTRUCTION_IF_TRUE | + * | Value=0xdeadbeef | + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=WRITE_REGISTER_VALUE | + * | Register=0x7fabcd14 [memory] | + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=READ_REGISTER | + * | Register=0x7fabcd1c [memory] | + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=WRITE_REGISTER | + * | Register=0x7fabcd20 [memory] | + * +-----------------------------------------------+ + * | Action=EXECUTE_OPERATION | + * | Instruction=LOAD_VAR1 | + * | Register=0x7fabcf00 [memory] | + * +-----------------------------------------------+ + * | Action=SET_ERROR_TYPE | + * | Instruction=WRITE_REGISTER_VALUE | + * | Register=0x7fabcd24 [memory] | + * | Value=42 | + * +-----------------------------------------------+ + * | ... | + * +-----------------------------------------------+ + * + * The entries tell the OS, for each action the OS might want to + * perform like BEGIN_INJECTION_OPERATION or SET_ERROR_TYPE or + * EXECUTE_OPERATION, what instructions must be executed and in what + * order. + * + * The instructions run in one of two little state machines -- there's + * a different instruction set for EINJ and ERST -- and vary from noops + * to reading and writing registers to arithmetic on registers to + * conditional and unconditional branches. + * + * Yes, that means this little language -- the ERST language, anyway, + * not the EINJ language -- is Turing-complete. + * + * This APEI interpreter first compiles the table into a contiguous + * sequence of instructions for each action, to make execution easier, + * since there's no requirement that the actions be in order, and the + * GOTO instruction relies on contiguous indexing of the instructions + * for an action. + * + * This interpreter also does a little validation so the firmware + * doesn't, e.g., GOTO somewhere in oblivion. The validation is mainly + * a convenience for catching mistakes in firmware, not a security + * measure, since the OS is absolutely vulnerable to malicious firmware + * anyway. + * + * XXX Map instruction registers in advance so ERST is safe in nasty + * contexts, e.g. to save dmesg? + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include + +#include +#include + +/* + * struct apei_actinst + * + * Sequence of instructions to execute for an action. + */ +struct apei_actinst { + uint32_t ninst; + uint32_t ip; + struct acpi_whea_header **inst; +}; + +/* + * struct apei_interp + * + * Table of instructions to interpret APEI actions. +*/ +struct apei_interp { + const char *name; + const char *const *actname; + unsigned nact; + const char *const *instname; + unsigned ninst; + bool (*instvalid)(ACPI_WHEA_HEADER *, uint32_t, + uint32_t); + void (*instfunc)(ACPI_WHEA_HEADER *, void *, + uint32_t *, uint32_t); + struct apei_actinst actinst[]; +}; + +struct apei_interp * +apei_interp_create(const char *name, + const char *const *actname, unsigned nact, + const char *const *instname, unsigned ninst, + bool (*instvalid)(ACPI_WHEA_HEADER *, uint32_t, uint32_t), + void (*instfunc)(ACPI_WHEA_HEADER *, void *, uint32_t *, uint32_t)) +{ + struct apei_interp *I; + + I = kmem_zalloc(offsetof(struct apei_interp, actinst[nact]), KM_SLEEP); + I->name = name; + I->actname = actname; + I->nact = nact; + I->instname = instname; + I->ninst = ninst; + I->instvalid = instvalid; + I->instfunc = instfunc; + + return I; +} + +void +apei_interp_destroy(struct apei_interp *I) +{ + unsigned nact = I->nact; + + kmem_free(I, offsetof(struct apei_interp, actinst[nact])); +} + +/* + * apei_interp_pass1_load(I, i, E) + * + * Load the ith table entry E into the interpreter I. To be + * called for each entry in the table sequentially. + * + * This first pass counts the number of instructions for each + * action, so we can allocate an array of instructions for + * indexing each action. + */ +void +apei_interp_pass1_load(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + /* + * If we don't recognize this action, ignore it and move on. + */ + if (E->Action >= I->nact || I->actname[E->Action] == NULL) { + aprint_error("%s[%"PRIu32"]: unknown action: 0x%"PRIx8"\n", + I->name, i, E->Action); + return; + } + struct apei_actinst *const A = &I->actinst[E->Action]; + + /* + * If we can't interpret this instruction for this action, or + * if we couldn't interpret a previous instruction for this + * action, ignore _all_ instructions for this action -- by + * marking the action as having UINT32_MAX instructions -- and + * move on. + */ + if (E->Instruction >= I->ninst || + I->instname[E->Instruction] == NULL) { + aprint_error("%s[%"PRIu32"]: unknown instruction: 0x%02"PRIx8 + "\n", I->name, i, E->Instruction); + A->ninst = UINT32_MAX; + return; + } + if (A->ninst == UINT32_MAX) + return; + + /* + * Count another instruction. We will make a pointer + * to it in a later pass. + */ + A->ninst++; + + /* + * If it overflows a reasonable size, bail on this instruction. + */ + if (A->ninst >= 256) { + aprint_error("%s[%"PRIu32"]:" + " too many instructions for action %"PRIu32" (%s)\n", + I->name, i, + E->Action, I->actname[E->Action]); + A->ninst = UINT32_MAX; + return; + } +} + +/* + * apei_interp_pass2_verify(I, i, E) + * + * Verify the ith entry's instruction, using the caller's + * instvalid function, now that all the instructions have been + * counted. To be called for each entry in the table + * sequentially. + * + * This second pass checks that GOTO instructions in particular + * don't jump out of bounds. + */ +void +apei_interp_pass2_verify(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + /* + * If there's no instruction validation function, skip this + * pass. + */ + if (I->instvalid == NULL) + return; + + /* + * If we skipped it in earlier passes, skip it now. + */ + if (E->Action > I->nact || I->actname[E->Action] == NULL) + return; + + /* + * If the instruction is invalid, disable the whole action. + */ + struct apei_actinst *const A = &I->actinst[E->Action]; + if (!(*I->instvalid)(E, A->ninst, i)) + A->ninst = UINT32_MAX; +} + +/* + * apei_interp_pass3_alloc(I) + * + * Allocate an array of instructions for each action that we + * didn't decide to bail on, marked with UINT32_MAX. + */ +void +apei_interp_pass3_alloc(struct apei_interp *I) +{ + unsigned action; + + for (action = 0; action < I->nact; action++) { + struct apei_actinst *const A = &I->actinst[action]; + if (A->ninst == 0 || A->ninst == UINT32_MAX) + continue; + A->inst = kmem_zalloc(A->ninst * sizeof(A->inst[0]), KM_SLEEP); + } +} + +/* + * apei_interp_pass4_assemble(I, i, E) + * + * Put the instruction for the ith entry E into the instruction + * array for its action. To be called for each entry in the table + * sequentially. + */ +void +apei_interp_pass4_assemble(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + /* + * If we skipped it in earlier passes, skip it now. + */ + if (E->Action >= I->nact || I->actname[E->Action] == NULL) + return; + + struct apei_actinst *const A = &I->actinst[E->Action]; + if (A->ninst == UINT32_MAX) + return; + + KASSERT(A->ip < A->ninst); + A->inst[A->ip++] = E; +} + +/* + * apei_interp_pass5_verify(I) + * + * Paranoia: Verify we got all the instructions for each action, + * verify the actions point to their own instructions, and dump + * the instructions for each action, collated, with aprint_debug. + */ +void +apei_interp_pass5_verify(struct apei_interp *I) +{ + unsigned action; + + for (action = 0; action < I->nact; action++) { + struct apei_actinst *const A = &I->actinst[action]; + unsigned j; + + /* + * If the action is disabled, it's all set. + */ + if (A->ninst == UINT32_MAX) + continue; + KASSERTMSG(A->ip == A->ninst, + "action %s ip=%"PRIu32" ninstruction=%"PRIu32, + I->actname[action], A->ip, A->ninst); + + /* + * XXX Dump the complete instruction table. + */ + for (j = 0; j < A->ninst; j++) { + ACPI_WHEA_HEADER *const E = A->inst[j]; + + KASSERT(E->Action == action); + aprint_debug("%s: %s[%"PRIu32"]: %s\n", + I->name, I->actname[action], j, + I->instname[E->Instruction]); + } + } +} + +/* + * apei_interpret(I, action, cookie) + * + * Run the instructions associated with the given action by + * calling the interpreter's instfunc for each one. + * + * Halt when the instruction pointer runs past the end of the + * array, or after 1000 cycles, whichever comes first. + */ +void +apei_interpret(struct apei_interp *I, unsigned action, void *cookie) +{ + unsigned juice = 1000; + uint32_t ip = 0; + + if (action > I->nact || I->actname[action] == NULL) + return; + struct apei_actinst *const A = &I->actinst[action]; + + while (ip < A->ninst && juice --> 0) { + ACPI_WHEA_HEADER *const E = A->inst[ip++]; + + (*I->instfunc)(E, cookie, &ip, A->ninst); + } +} diff --git a/sys/dev/acpi/apei_interp.h b/sys/dev/acpi/apei_interp.h new file mode 100644 index 000000000000..8b3570d83b89 --- /dev/null +++ b/sys/dev/acpi/apei_interp.h @@ -0,0 +1,55 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_INTERP_H_ +#define _SYS_DEV_ACPI_APEI_INTERP_H_ + +#include + +struct acpi_whea_header; +struct apei_interp; + +struct apei_interp *apei_interp_create(const char *, + const char *const *, unsigned, + const char *const *, unsigned, + bool (*)(struct acpi_whea_header *, uint32_t, uint32_t), + void (*)(struct acpi_whea_header *, void *, uint32_t *, uint32_t)); +void apei_interp_destroy(struct apei_interp *); + +void apei_interp_pass1_load(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass2_verify(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass3_alloc(struct apei_interp *); +void apei_interp_pass4_assemble(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass5_verify(struct apei_interp *); + +void apei_interpret(struct apei_interp *, unsigned, void *); + +#endif /* _SYS_DEV_ACPI_APEI_INTERP_H_ */ diff --git a/sys/dev/acpi/apei_mapreg.c b/sys/dev/acpi/apei_mapreg.c new file mode 100644 index 000000000000..3f2a77418eb1 --- /dev/null +++ b/sys/dev/acpi/apei_mapreg.c @@ -0,0 +1,201 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Pre-mapped ACPI register access + * + * XXX This isn't APEI-specific -- it should be moved into the general + * ACPI API, and unified with the AcpiRead/AcpiWrite implementation. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include + +#include +#include + +/* + * apei_mapreg_map(reg) + * + * Return a mapping for use with apei_mapreg_read, or NULL if it + * can't be mapped. + */ +struct apei_mapreg * +apei_mapreg_map(const ACPI_GENERIC_ADDRESS *reg) +{ + + /* + * Verify the result is reasonable. + */ + switch (reg->BitWidth) { + case 8: + case 16: + case 32: + case 64: + break; + default: + return NULL; + } + + /* + * Verify we know how to do the access width. + */ + switch (reg->AccessWidth) { + case 1: /* 8-bit */ + case 2: /* 16-bit */ + case 3: /* 32-bit */ + case 4: /* 64-bit */ + break; + default: + return NULL; + } + + /* + * Verify we don't need to shift anything, because I can't + * figure out how the shifting is supposed to work in five + * minutes of looking at the spec. + */ + switch (reg->BitOffset) { + case 0: + break; + default: + return NULL; + } + + /* + * Verify the bit width is a multiple of the access width so + * we're not accessing more than we need. + */ + if (reg->BitWidth % (8*(1 << (reg->AccessWidth - 1)))) + return NULL; + + /* + * Dispatch on the space id. + * + * Currently this only handles memory space because I/O space + * is too painful to contemplate reimplementing here. + */ + switch (reg->SpaceId) { + case ACPI_ADR_SPACE_SYSTEM_MEMORY: + return AcpiOsMapMemory(reg->Address, + 1 << (reg->AccessWidth - 1)); + default: + return NULL; + } +} + +/* + * apei_mapreg_unmap(reg, map) + * + * Unmap a mapping previously returned by apei_mapreg_map. + */ +void +apei_mapreg_unmap(const ACPI_GENERIC_ADDRESS *reg, + struct apei_mapreg *map) +{ + + AcpiOsUnmapMemory(map, 1 << (reg->AccessWidth - 1)); +} + +/* + * apei_mapreg_read(reg, map) + * + * Read from reg via map previously obtained by apei_mapreg_map. + */ +uint64_t +apei_mapreg_read(const ACPI_GENERIC_ADDRESS *reg, + const struct apei_mapreg *map) +{ + unsigned chunkbits = NBBY*(1 << (reg->AccessWidth - 1)); + unsigned i, n = reg->BitWidth % chunkbits; + uint64_t v = 0; + + for (i = 0; i < n; i++) { + uint64_t chunk; + + switch (reg->AccessWidth) { + case 1: + chunk = *(volatile const uint8_t *)map; + break; + case 2: + chunk = *(volatile const uint16_t *)map; + break; + case 3: + chunk = *(volatile const uint32_t *)map; + break; + case 4: + chunk = *(volatile const uint64_t *)map; + break; + default: + __unreachable(); + } + v |= chunk << (i*chunkbits); + } + + membar_acquire(); /* XXX probably not right for MMIO */ + return v; +} + +/* + * apei_mapreg_write(reg, map, v) + * + * Write to reg via map previously obtained by apei_mapreg_map. + */ +void +apei_mapreg_write(const ACPI_GENERIC_ADDRESS *reg, struct apei_mapreg *map, + uint64_t v) +{ + unsigned chunkbits = NBBY*(1 << (reg->AccessWidth - 1)); + unsigned i, n = reg->BitWidth % chunkbits; + + membar_release(); /* XXX probably not right for MMIO */ + for (i = 0; i < n; i++) { + uint64_t chunk = v >> (i*chunkbits); + + switch (reg->AccessWidth) { + case 1: + *(volatile uint8_t *)map = chunk; + break; + case 2: + *(volatile uint16_t *)map = chunk; + break; + case 3: + *(volatile uint32_t *)map = chunk; + break; + case 4: + *(volatile uint64_t *)map = chunk; + break; + default: + __unreachable(); + } + } +} diff --git a/sys/dev/acpi/apei_mapreg.h b/sys/dev/acpi/apei_mapreg.h new file mode 100644 index 000000000000..36efaee4fe06 --- /dev/null +++ b/sys/dev/acpi/apei_mapreg.h @@ -0,0 +1,46 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_MAPREG_H_ +#define _SYS_DEV_ACPI_APEI_MAPREG_H_ + +#include + +struct acpi_generic_address; +struct apei_mapreg; + +struct apei_mapreg *apei_mapreg_map(const struct acpi_generic_address *); +void apei_mapreg_unmap(const struct acpi_generic_address *, + struct apei_mapreg *); + +uint64_t apei_mapreg_read(const struct acpi_generic_address *, + const struct apei_mapreg *); +void apei_mapreg_write(const struct acpi_generic_address *, + struct apei_mapreg *, uint64_t); + +#endif /* _SYS_DEV_ACPI_APEI_MAPREG_H_ */ diff --git a/sys/dev/acpi/apei_reg.c b/sys/dev/acpi/apei_reg.c new file mode 100644 index 000000000000..ffc2b7d12790 --- /dev/null +++ b/sys/dev/acpi/apei_reg.c @@ -0,0 +1,103 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI register access for ERST/EINJ action instructions + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include + +/* + * apei_read_register(Register, Mask, &X) + * + * Read from Register, shifted out of position and then masked + * with Mask, and store the result in X. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#read-register + * + * (I'm guessing this applies to both ERST and EINJ, even though + * that section is under the ERST part.) + */ +ACPI_STATUS +apei_read_register(ACPI_GENERIC_ADDRESS *Register, uint64_t Mask, uint64_t *p) +{ + const uint8_t BitOffset = Register->BitOffset; + uint64_t X; + ACPI_STATUS rv; + + rv = AcpiRead(&X, Register); + if (ACPI_FAILURE(rv)) { + *p = 0; /* XXX */ + return rv; + } + X >>= BitOffset; + X &= Mask; + + *p = X; + return AE_OK; +} + +/* + * apei_write_register(Register, Mask, preserve_register, X) + * + * Write X, masked with Mask and shifted into position, to + * Register, preserving other bits if preserve_register is true. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#write-register + * + * Note: The Preserve Register semantics is based on the clearer + * indentation at + * https://uefi.org/sites/default/files/resources/ACPI_5_1release.pdf#page=714 + * which has been lost in more recent versions of the spec. + */ +ACPI_STATUS +apei_write_register(ACPI_GENERIC_ADDRESS *Register, uint64_t Mask, + bool preserve_register, uint64_t X) +{ + const uint8_t BitOffset = Register->BitOffset; + ACPI_STATUS rv; + + X &= Mask; + X <<= BitOffset; + if (preserve_register) { + uint64_t Y; + + rv = AcpiRead(&Y, Register); + if (ACPI_FAILURE(rv)) + return rv; + Y &= ~(Mask << BitOffset); + X |= Y; + } + return AcpiWrite(X, Register); +} diff --git a/sys/dev/acpi/apei_reg.h b/sys/dev/acpi/apei_reg.h new file mode 100644 index 000000000000..7afe09b2c2d1 --- /dev/null +++ b/sys/dev/acpi/apei_reg.h @@ -0,0 +1,40 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_REG_H_ +#define _SYS_DEV_ACPI_APEI_REG_H_ + +#include + +#include + +ACPI_STATUS apei_read_register(ACPI_GENERIC_ADDRESS *, uint64_t, uint64_t *); +ACPI_STATUS apei_write_register(ACPI_GENERIC_ADDRESS *, uint64_t, bool, + uint64_t); + +#endif /* _SYS_DEV_ACPI_APEI_REG_H_ */ diff --git a/sys/dev/acpi/apeivar.h b/sys/dev/acpi/apeivar.h new file mode 100644 index 000000000000..4729c2a890af --- /dev/null +++ b/sys/dev/acpi/apeivar.h @@ -0,0 +1,81 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEIVAR_H_ +#define _SYS_DEV_ACPI_APEIVAR_H_ + +#include + +#include + +#include +#include +#include +#include +#include + +struct sysctllog; +struct sysctlnode; + +/* + * struct apei_tab + * + * Collection of pointers to APEI-related ACPI tables. Used + * inside struct apei_softc, and by apei_match without an + * apei_softc. + */ +struct apei_tab { + ACPI_TABLE_BERT *bert; /* Boot Error Record Table */ + ACPI_TABLE_EINJ *einj; /* Error Injection Table */ + ACPI_TABLE_ERST *erst; /* Error Record Serialization Table */ + ACPI_TABLE_HEST *hest; /* Hardware Error Source Table */ +}; + +/* + * struct apei_softc + * + * All software state for APEI. + */ +struct apei_softc { + device_t sc_dev; + struct apei_tab sc_tab; + + struct sysctllog *sc_sysctllog; + const struct sysctlnode *sc_sysctlroot; + + struct apei_bert_softc sc_bert; + struct apei_einj_softc sc_einj; + struct apei_erst_softc sc_erst; + struct apei_hest_softc sc_hest; +}; + +uint32_t apei_gesb_report(struct apei_softc *, + const ACPI_HEST_GENERIC_STATUS *, size_t, const char *, + bool *); + +#endif /* _SYS_DEV_ACPI_APEIVAR_H_ */ diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi index ead9bab1e9bd..a30dfbc009f0 100644 --- a/sys/dev/acpi/files.acpi +++ b/sys/dev/acpi/files.acpi @@ -14,6 +14,7 @@ define acpiwdrtbus { } define acpisdtbus { } define acpigtdtbus { } define acpimadtbus { } +define apeibus { } device acpi: acpica, acpiapmbus, acpinodebus, acpiecdtbus, acpisdtbus, acpigtdtbus, acpimadtbus, acpihpetbus, acpiwdrtbus, sysmon_power, sysmon_taskq attach acpi at acpibus @@ -309,4 +310,21 @@ file dev/acpi/igpio_acpi.c igpio_acpi attach dwcmmc at acpinodebus with dwcmmc_acpi file dev/acpi/dwcmmc_acpi.c dwcmmc_acpi +# ACPI Platform Error Interface +device apei +attach apei at apeibus +file dev/acpi/apei.c apei +file dev/acpi/apei_bert.c apei +file dev/acpi/apei_einj.c apei +file dev/acpi/apei_erst.c apei +file dev/acpi/apei_hest.c apei +file dev/acpi/apei_interp.c apei +file dev/acpi/apei_mapreg.c apei +file dev/acpi/apei_reg.c apei + +# ACPI Hardware Error Device +device acpihed: apei +attach acpihed at acpinodebus +file dev/acpi/acpi_hed.c acpihed + include "dev/acpi/wmi/files.wmi" diff --git a/sys/modules/Makefile b/sys/modules/Makefile index d8b64a2134cf..57313bc5324e 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -285,6 +285,7 @@ SUBDIR+= acpibut SUBDIR+= acpicpu SUBDIR+= acpidalb SUBDIR+= acpifan +SUBDIR+= acpihed SUBDIR+= acpilid SUBDIR+= acpipmtr SUBDIR+= acpitz @@ -292,6 +293,7 @@ SUBDIR+= acpivga SUBDIR+= acpiwdrt SUBDIR+= acpiwmi SUBDIR+= aibs +SUBDIR+= apei SUBDIR+= asus SUBDIR+= fujbp SUBDIR+= fujhk diff --git a/sys/modules/acpihed/Makefile b/sys/modules/acpihed/Makefile new file mode 100644 index 000000000000..7f23c86050a6 --- /dev/null +++ b/sys/modules/acpihed/Makefile @@ -0,0 +1,11 @@ +# $NetBSD$ + +.include "../Makefile.inc" + +.PATH: $S/dev/acpi + +KMOD= acpihed +IOCONF= acpihed.ioconf +SRCS= acpi_hed.c + +.include diff --git a/sys/modules/acpihed/acpihed.ioconf b/sys/modules/acpihed/acpihed.ioconf new file mode 100644 index 000000000000..8edb2783bc6b --- /dev/null +++ b/sys/modules/acpihed/acpihed.ioconf @@ -0,0 +1,11 @@ +# $NetBSD$ +# + +ioconf acpihed + +include "conf/files" +include "dev/acpi/files.acpi" + +pseudo-root acpi* + +acpihed* at acpi? diff --git a/sys/modules/apei/Makefile b/sys/modules/apei/Makefile new file mode 100644 index 000000000000..909dab675de2 --- /dev/null +++ b/sys/modules/apei/Makefile @@ -0,0 +1,20 @@ +# $NetBSD$ +# + +.include "../Makefile.inc" + +.PATH: $S/dev/acpi + +KMOD= apei +IOCONF= apei.ioconf + +SRCS+= apei.c +SRCS+= apei_bert.c +SRCS+= apei_einj.c +SRCS+= apei_erst.c +SRCS+= apei_hest.c +SRCS+= apei_interp.c +SRCS+= apei_mapreg.c +SRCS+= apei_reg.c + +.include diff --git a/sys/modules/apei/apei.ioconf b/sys/modules/apei/apei.ioconf new file mode 100644 index 000000000000..d2099452d391 --- /dev/null +++ b/sys/modules/apei/apei.ioconf @@ -0,0 +1,11 @@ +# $NetBSD$ +# + +ioconf apei + +include "conf/files" +include "dev/acpi/files.acpi" + +pseudo-root apeibus* + +apei* at apeibus?