diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei.c --- a/sys/dev/acpi/apei.c Mon Oct 21 15:57:45 2024 +0000 +++ b/sys/dev/acpi/apei.c Thu Oct 24 20:08:59 2024 +0000 @@ -58,6 +58,7 @@ #include <dev/acpi/apei_hestvar.h> #include <dev/acpi/apei_interp.h> #include <dev/acpi/apeivar.h> +#include <dev/pci/pci_error.h> #define _COMPONENT ACPI_RESOURCE_COMPONENT ACPI_MODULE_NAME ("apei") @@ -313,10 +314,10 @@ apei_format_guid(const struct uuid *uuid { snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x," - "0x%02x%02x," - "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}", + "{0x%02x,%02x," + "0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}", uuid->time_low, uuid->time_mid, uuid->time_hi_and_version, - uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved, + uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low, uuid->node[0], uuid->node[1], uuid->node[2], uuid->node[3], uuid->node[4], uuid->node[5]); } @@ -356,6 +357,8 @@ static const char *const apei_gede_sever }; /* + * N.2.5. Memory Error Section + * * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section */ static const struct uuid CPER_MEMORY_ERROR_SECTION = @@ -475,6 +478,98 @@ apei_cper_memory_error_report(struct ape } /* + * N.2.7. PCI Express Error Section + * + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section + */ +static const struct uuid CPER_PCIE_ERROR_SECTION = + {0xd995e954,0xbbc1,0x430f,0xad,0x91,{0xb4,0x4d,0xcb,0x3c,0x6f,0x35}}; + +static const char *const cper_pcie_error_port_type[] = { +#define F(LN, SN, V) [LN] = #SN, + CPER_PCIE_ERROR_PORT_TYPES(F) +#undef F +}; + +static void +apei_cper_pcie_error_report(struct apei_softc *sc, const void *buf, size_t len, + const char *ctx) +{ + const struct cper_pcie_error *PE = buf; + char bitbuf[1024]; + + snprintb(bitbuf, sizeof(bitbuf), + CPER_PCIE_ERROR_VALIDATION_BITS_FMT, PE->ValidationBits); + aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, bitbuf); + if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_PORT_TYPE) { + const uint32_t t = PE->PortType; + const char *n = t < __arraycount(cper_pcie_error_port_type) + ? cper_pcie_error_port_type[t] : NULL; + + if (n) { + device_printf(sc->sc_dev, "%s: PortType=%"PRIu32 + " (%s)\n", ctx, t, n); + } else { + device_printf(sc->sc_dev, "%s: PortType=%"PRIu32"\n", + ctx, t); + } + } + if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_VERSION) { + /* XXX BCD */ + device_printf(sc->sc_dev, "%s: Version=0x%"PRIx32"\n", + ctx, PE->Version); + } + if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_COMMAND_STATUS) { + device_printf(sc->sc_dev, "%s: CommandStatus=0x04%"PRIx32"\n", + ctx, PE->CommandStatus); + } + if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) { + /* XXX decode vendor/product/class/fun/dev/seg/bus */ + char hex[2*sizeof(PE->DeviceID) + 1]; + const unsigned char *p = (const void *)&PE->DeviceID; + unsigned i; + + for (i = 0; i < sizeof(PE->DeviceID); i++) + snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx", p[i]); + device_printf(sc->sc_dev, "%s: DeviceID={%s}\n", ctx, hex); + } + if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_SERIAL) { + device_printf(sc->sc_dev, "%s: DeviceSerial={%016"PRIx64"}\n", + ctx, PE->DeviceSerial); + } + if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS) { + device_printf(sc->sc_dev, "%s: BridgeControlStatus=%"PRIx32 + "\n", ctx, PE->BridgeControlStatus); + } + if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE) { + char hex[2*sizeof(PE->CapabilityStructure) + 1]; + unsigned i; + + for (i = 0; i < sizeof(PE->CapabilityStructure); i++) { + snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx", + PE->CapabilityStructure[i]); + } + device_printf(sc->sc_dev, "%s: CapabilityStructure={%s}\n", + ctx, hex); + } + if (PE->ValidationBits & CPER_PCIE_ERROR_VALID_AER_INFO) { + char hex[2*sizeof(PE->AERInfo) + 1]; + unsigned i; + + for (i = 0; i < sizeof(PE->AERInfo); i++) { + snprintf(hex + 2*i, sizeof(hex) - 2*i, "%02hhx", + PE->AERInfo[i]); + } + device_printf(sc->sc_dev, "%s: AERInfo={%s}\n", ctx, hex); + } + + /* + * Let the PCI subsystem handle it. + */ + pci_cper_error(PE); +} + +/* * apei_cper_reports * * Table of known Common Platform Error Record types, symbolic @@ -494,6 +589,9 @@ static const struct apei_cper_report { { "memory", &CPER_MEMORY_ERROR_SECTION, sizeof(struct cper_memory_error), apei_cper_memory_error_report }, + { "PCIe", &CPER_PCIE_ERROR_SECTION, + sizeof(struct cper_pcie_error), + apei_cper_pcie_error_report }, }; /* diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei_cper.h --- a/sys/dev/acpi/apei_cper.h Mon Oct 21 15:57:45 2024 +0000 +++ b/sys/dev/acpi/apei_cper.h Thu Oct 24 20:08:59 2024 +0000 @@ -62,14 +62,14 @@ struct cper_header { } __packed; __CTASSERT(sizeof(struct cper_header) == 128); -enum { /* struct cper_header::error_severity */ +enum { /* struct cper_header::ErrorSeverity */ CPER_ERROR_SEVERITY_RECOVERABLE = 0, CPER_ERROR_SEVERITY_FATAL = 1, CPER_ERROR_SEVERITY_CORRECTED = 2, CPER_ERROR_SEVERITY_INFORMATIONAL = 3, }; -enum { /* struct cper_header::validation_bits */ +enum { /* struct cper_header::ValidationBits */ CPER_VALID_PLATFORM_ID = __BIT(0), CPER_VALID_TIMESTAMP = __BIT(1), CPER_VALID_PARTITION_ID = __BIT(2), @@ -78,7 +78,7 @@ enum { /* struct cper_header::validat /* * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#error-record-header-flags */ -enum { /* struct cper_header::flags */ +enum { /* struct cper_header::Flags */ CPER_HW_ERROR_FLAG_RECOVERED = __BIT(0), CPER_HW_ERROR_FLAG_PREVERR = __BIT(1), CPER_HW_ERROR_FLAG_SIMULATED = __BIT(2), @@ -110,6 +110,8 @@ enum { "\0" /* + * N.2.5. Memory Error Section + * * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section * * Type: {0xa5bc1114,0x6f64,0x4ede,{0xb8,0x63,0x3e,0x83,0xed,0x7c,0x83,0xb1}} @@ -144,7 +146,7 @@ struct cper_memory_error_ext { } __packed; __CTASSERT(sizeof(struct cper_memory_error_ext) == 80); -enum { /* struct cper_memory_error::validation_bits */ +enum { /* struct cper_memory_error::ValidationBits */ CPER_MEMORY_ERROR_VALID_ERROR_STATUS = __BIT(0), CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS = __BIT(1), CPER_MEMORY_ERROR_VALID_PHYSICAL_ADDRESS_MASK = __BIT(2), @@ -194,7 +196,7 @@ enum { /* struct cper_memory_error::v "b\025" "CHIP_ID\0" \ "\0" -enum { /* struct cper_memory_error::bank */ +enum { /* struct cper_memory_error::Bank */ CPER_MEMORY_ERROR_BANK_ADDRESS = __BITS(7,0), CPER_MEMORY_ERROR_BANK_GROUP = __BITS(15,8), }; @@ -219,16 +221,92 @@ enum { /* struct cper_memory_error::b F(CPER_MEMORY_ERROR_PHYSMEM_MAPOUT_EVENT, PHYSMEM_MAPOUT_EVENT, 15) \ /* end of CPER_MEMORY_ERROR_TYPES */ -enum cper_memory_error_type { /* struct cper_memory_error::memory_error_type */ +enum cper_memory_error_type { /* struct cper_memory_error::MemoryErrorType */ #define CPER_MEMORY_ERROR_TYPE_DEF(LN, SN, V) LN = V, CPER_MEMORY_ERROR_TYPES(CPER_MEMORY_ERROR_TYPE_DEF) #undef CPER_MEMORY_ERROR_TYPE_DEF }; -enum { /* struct cper_memory_error_ext::extended */ +enum { /* struct cper_memory_error_ext::Extended */ CPER_MEMORY_ERROR_EXTENDED_ROWBIT16 = __BIT(0), CPER_MEMORY_ERROR_EXTENDED_ROWBIT17 = __BIT(1), CPER_MEMORY_ERROR_EXTENDED_CHIPID = __BITS(7,5), }; +/* + * N.2.7. PCI Express Error Section + * + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#pci-express-error-section + * + * Type: {0xd995e954,0xbbc1,0x430f,{0xad,0x91,0xb4,0x4d,0xcb,0x3c,0x6f,0x35}} + */ + +struct cper_pcie_error { + uint64_t ValidationBits; + uint32_t PortType; + uint32_t Version; + uint32_t CommandStatus; + uint32_t Reserved0; + struct { + uint8_t VendorID[2]; + uint8_t DeviceID[2]; /* product */ + uint8_t ClassCode[3]; + uint8_t Function; + uint8_t Device; + uint8_t Segment[2]; + uint8_t PrimaryBus; + uint8_t SecondaryBus; + uint8_t Slot[2]; /* bits 0:2 resv, bits 3:15 slot */ + uint8_t Reserved0; + } DeviceID; + uint64_t DeviceSerial; + uint32_t BridgeControlStatus; + uint8_t CapabilityStructure[60]; + uint8_t AERInfo[96]; +}; +__CTASSERT(sizeof(struct cper_pcie_error) == 208); + +enum { /* struct cper_pcie_error::ValidationBits */ + CPER_PCIE_ERROR_VALID_PORT_TYPE = __BIT(0), + CPER_PCIE_ERROR_VALID_VERSION = __BIT(1), + CPER_PCIE_ERROR_VALID_COMMAND_STATUS = __BIT(2), + CPER_PCIE_ERROR_VALID_DEVICE_ID = __BIT(3), + CPER_PCIE_ERROR_VALID_DEVICE_SERIAL = __BIT(4), + CPER_PCIE_ERROR_VALID_BRIDGE_CONTROL_STATUS = __BIT(5), + CPER_PCIE_ERROR_VALID_CAPABILITY_STRUCTURE = __BIT(6), + CPER_PCIE_ERROR_VALID_AER_INFO = __BIT(7), +}; + +#define CPER_PCIE_ERROR_VALIDATION_BITS_FMT "\177\020" \ + "b\000" "PORT_TYPE\0" \ + "b\001" "VERSION\0" \ + "b\002" "COMMAND_STATUS\0" \ + "b\003" "DEVICE_ID\0" \ + "b\004" "DEVICE_SERIAL\0" \ + "b\005" "BRIDGE_CONTROL_STATUS\0" \ + "b\006" "CAPABILITY_STRUCTURE\0" \ + "b\007" "AER_INFO\0" \ + "\0" + +#define CPER_PCIE_ERROR_PORT_TYPES(F) \ + F(CPER_PCIE_ERROR_PORT_TYPE_PCIE_ENDPOINT, PCIE_ENDPOINT, 0) \ + F(CPER_PCIE_ERROR_PORT_TYPE_LEGACY_PCI_ENDPOINT, LEGACY_PCI_ENDPOINT, \ + 1) \ + F(CPER_PCIE_ERROR_PORT_TYPE_ROOTPORT5_UPSTREAMSWITCH, \ + ROOTPORT5_UPSTREAMSWITCH, 4) \ + F(CPER_PCIE_ERROR_PORT_TYPE_DOWNSTREAMSWITCH, DOWNSTREAMSWITCH, 6) \ + F(CPER_PCIE_ERROR_PORT_TYPE_PCIE_PCI_BRIDGE, PCIE_PCI_BRIDGE, 7) \ + F(CPER_PCIE_ERROR_PORT_TYPE_PCI_PCIE_BRIDGE, PCI_PCIE_BRIDGE, 8) \ + F(CPER_PCIE_ERROR_PORT_TYPE_RCIEP_DEV, RCIEP_DEV, 9) \ + /* Root Complex Integrated Endpoint Device */ \ + F(CPER_PCIE_ERROR_PORT_TYPE_RCEC, RCEC, 10) \ + /* Root Complex Event Collector */ \ + /* end of CPER_PCIE_ERROR_PORT_TYPES */ + +enum cper_pcie_error_port_type { /* struct cper_pcie_error::PortType */ +#define CPER_PCIE_ERROR_PORT_TYPE_DEF(LN, SN, V) LN = V, + CPER_PCIE_ERROR_PORT_TYPES(CPER_PCIE_ERROR_PORT_TYPE_DEF) +#undef CPER_PCIE_ERROR_PORT_TYPE_DEF +}; + #endif /* _SYS_DEV_ACPI_APEI_CPER_H_ */ diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/acpi/apei_hest.c --- a/sys/dev/acpi/apei_hest.c Mon Oct 21 15:57:45 2024 +0000 +++ b/sys/dev/acpi/apei_hest.c Thu Oct 24 20:08:59 2024 +0000 @@ -400,6 +400,8 @@ apei_hest_attach_ghes(struct apei_softc */ switch (ghes->Notify.Type) { case ACPI_HEST_NOTIFY_POLLED: + if (ghes->Notify.PollInterval == 0) /* paranoia */ + break; callout_init(&src->as_ch, CALLOUT_MPSAFE); callout_setfunc(&src->as_ch, &apei_hest_ghes_poll, src); callout_schedule(&src->as_ch, 0); @@ -451,6 +453,8 @@ apei_hest_detach_ghes(struct apei_softc */ switch (ghes->Notify.Type) { case ACPI_HEST_NOTIFY_POLLED: + if (ghes->Notify.PollInterval == 0) /* paranoia */ + break; callout_halt(&src->as_ch, NULL); callout_destroy(&src->as_ch); break; @@ -583,6 +587,8 @@ apei_hest_attach_ghes_v2(struct apei_sof */ switch (ghes_v2->Notify.Type) { case ACPI_HEST_NOTIFY_POLLED: + if (ghes_v2->Notify.PollInterval == 0) /* paranoia */ + break; callout_init(&src->as_ch, CALLOUT_MPSAFE); callout_setfunc(&src->as_ch, &apei_hest_ghes_v2_poll, src); callout_schedule(&src->as_ch, 0); @@ -634,6 +640,8 @@ apei_hest_detach_ghes_v2(struct apei_sof */ switch (ghes_v2->Notify.Type) { case ACPI_HEST_NOTIFY_POLLED: + if (ghes_v2->Notify.PollInterval == 0) /* paranoia */ + break; callout_halt(&src->as_ch, NULL); callout_destroy(&src->as_ch); break; diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/files.pci --- a/sys/dev/pci/files.pci Mon Oct 21 15:57:45 2024 +0000 +++ b/sys/dev/pci/files.pci Thu Oct 24 20:08:59 2024 +0000 @@ -19,6 +19,7 @@ defflag opt_pciide.h PCIIDE_CMD064x_DISA device pci {[dev = -1], [function = -1]} attach pci at pcibus file dev/pci/pci.c pci needs-flag +file dev/pci/pci_error.c pci file dev/pci/pci_map.c pci file dev/pci/pci_quirks.c pci file dev/pci/pci_resource.c pci & pci_resource diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/pci_error.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/dev/pci/pci_error.c Thu Oct 24 20:08:59 2024 +0000 @@ -0,0 +1,257 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * PCI error reporting + */ + +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD$"); + +#include <dev/acpi/apei_cper.h> /* XXX not APEI- or even ACPI-specific */ +#include <dev/pci/pci_error.h> +#include <dev/pci/pcireg.h> +#include <dev/pci/pcivar.h> + +static int +pci_cper_match(void *cookie, const struct pci_attach_args *pa) +{ + const struct cper_pcie_error *PE = cookie; + + if (le16dec(PE->DeviceID.Segment) != pci_get_segment(pa->pa_pc)) + return 0; + if (PE->DeviceID.PrimaryBus != pa->pa_bus) + return 0; + if (PE->DeviceID.Device != pa->pa_device) + return 0; + if (PE->DeviceID.Function != pa->pa_function) + return 0; + + return 1; +} + +/* + * pci_cper_error(PE) + * + * Act on notification of a PCI error report via Common Platform + * Error Record. + */ +void +pci_cper_error(const struct cper_pcie_error *PE) +{ + struct pci_attach_args pa; + + /* + * If there's no device ID, nothing for us to do. + * + * XXX Report this back to the caller? + */ + if ((PE->ValidationBits & CPER_PCIE_ERROR_VALID_DEVICE_ID) == 0) + return; + + /* + * Find a matching device. If none, do nothing -- we can't do + * anything to acknowledge this. + */ + if (!pci_find_device1(&pa, pci_cper_match, __UNCONST(PE))) { + char devbuf[sizeof "0000:00:00.000"]; + + snprintf(devbuf, sizeof(devbuf), "PCI %04x:%02x:%02x.%u", + le16dec(PE->DeviceID.Segment), + PE->DeviceID.PrimaryBus, + PE->DeviceID.Device, + PE->DeviceID.Function); + aprint_debug("%s: hardware error in unknown device\n", devbuf); + return; + } + + /* + * Handle via the pci_attach_args that we now have. + */ + pci_error(&pa); +} + +/* + * pci_error(pa) + * + * Check for, report, and acknowledge any errors in the PCI device + * described by pa. + */ +void +pci_error(const struct pci_attach_args *pa) +{ + char devbuf[sizeof "0000:00:00.000"]; + const pci_chipset_tag_t pc = pa->pa_pc; + const pcitag_t tag = pa->pa_tag; + pcireg_t aer, pcie; + char bitbuf[1024]; + + snprintf(devbuf, sizeof(devbuf), "PCI %04x:%02x:%02x.%u", + pci_get_segment(pa->pa_pc), + pa->pa_bus, pa->pa_device, pa->pa_function); + + /* + * If we have Advanced Error Reporting capability, read and + * write back any uncorrectable or corrected error status. + */ + if (pci_get_ext_capability(pc, tag, PCI_EXTCAP_AER, &aer, NULL)) { + pcireg_t uc_status, uc_mask, uc_sev; + pcireg_t control; + pcireg_t cor_status, cor_mask; + + /* + * Read the status, mask, severity, and control (which + * has the number of the first error bit). + */ + uc_status = pci_conf_read(pc, tag, aer + PCI_AER_UC_STATUS); + uc_mask = pci_conf_read(pc, tag, aer + PCI_AER_UC_MASK); + uc_sev = pci_conf_read(pc, tag, aer + PCI_AER_UC_SEVERITY); + + cor_status = pci_conf_read(pc, tag, aer + PCI_AER_COR_STATUS); + cor_mask = pci_conf_read(pc, tag, aer + PCI_AER_COR_MASK); + + control = pci_conf_read(pc, tag, aer + PCI_AER_CAP_CONTROL); + + /* + * Acknowledge error status bits. + */ + pci_conf_write(pc, tag, aer + PCI_AER_UC_STATUS, uc_status); + pci_conf_write(pc, tag, aer + PCI_AER_COR_STATUS, cor_status); + + /* XXX move me to pcireg.h */ +#define PCI_AER_UC_STATUS_FMT "\177\020" \ + "b\000" "UNDEFINED\0" \ + "b\004" "DL_PROTOCOL_ERROR\0" \ + "b\005" "SURPRISE_DOWN_ERROR\0" \ + "b\014" "POISONED_TLP\0" \ + "b\015" "FC_PROTOCOL_ERROR\0" \ + "b\016" "COMPLETION_TIMEOUT\0" \ + "b\017" "COMPLETION_ABORT\0" \ + "b\020" "UNEXPECTED_COMPLETION\0" \ + "b\021" "RECEIVER_OVERFLOW\0" \ + "b\022" "MALFORMED_TLP\0" \ + "b\023" "ECRC_ERROR\0" \ + "b\024" "UNSUPPORTED_REQUEST_ERROR\0" \ + "b\025" "ACS_VIOLATION\0" \ + "b\026" "INTERNAL_ERROR\0" \ + "b\027" "MC_BLOCKED_TLP\0" \ + "b\030" "ATOMIC_OP_EGRESS_BLOCKED\0" \ + "b\031" "TLP_PREFIX_BLOCKED_ERROR\0" \ + "b\032" "POISONTLP_EGRESS_BLOCKED\0" \ + "\0" + + /* + * Report uncorrectable fatal errors. + */ + if ((uc_status & uc_sev) != 0) { + snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, + uc_status & uc_sev); + aprint_error("%s: hardware fatal uncorrectable error:" + " %s (mask=0x%"PRIx32")\n", + devbuf, bitbuf, + (uint32_t)uc_mask); + } + + /* + * Report uncorrectable non-fatal errors. + */ + if ((uc_status & ~uc_sev) != 0) { + snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, + uc_status & ~uc_sev); + aprint_error("%s: hardware uncorrectable error: %s" + " (mask=0x%"PRIx32")\n", + devbuf, bitbuf, + (uint32_t)uc_mask); + } + + /* + * Show the first error, if any. + */ + if (uc_status != 0) { + pcireg_t first = __SHIFTOUT(control, + PCI_AER_FIRST_ERROR_PTR); + snprintb(bitbuf, sizeof(bitbuf), PCI_AER_UC_STATUS_FMT, + (uint32_t)1 << first); + aprint_error("%s: hardware first uncorrectable error:" + " %s\n", + devbuf, bitbuf); + } + + /* + * Report corrected errors. + * + * XXX sysctl knob to suppress this + */ + if (cor_status != 0) { + /* XXX move me to pcireg.h */ + snprintb(bitbuf, sizeof(bitbuf), "\177\020" + "b\000" "RECEIVER_ERROR\0" + "b\006" "BAD_TLP\0" + "b\007" "BAD_DLLP\0" + "b\010" "REPLAY_NUM_ROLLOVER\0" + "b\014" "REPLAY_TIMER_TIMEOUT\0" + "b\015" "ADVISORY_NF_ERROR\0" + "b\016" "INTERNAL_ERROR\0" + "b\017" "HEADER_LOG_OVERFLOW\0" + "\0", cor_status); + aprint_error("%s: hardware corrected error: %s" + " (mask=0x%"PRIx32")\n", + devbuf, bitbuf, (uint32_t)cor_mask); + } + } + + /* + * If we have PCIe at all, read and write back any error + * status. + */ + if (pci_get_capability(pc, tag, PCI_CAP_PCIEXPRESS, &pcie, NULL)) { + pcireg_t dcsr = pci_conf_read(pc, tag, pcie + PCIE_DCSR); + uint16_t dsr = __SHIFTOUT(dcsr, __BITS(31,16)); + + /* + * If any status bits are set, acknowledge all status + * bits, write back control bits unchanged, and print + * the status. + */ + if (dsr != 0) { + pci_conf_write(pc, tag, pcie + PCIE_DCSR, dcsr); + + /* XXX move me to pcireg.h; note: high half of DCSR */ + snprintb(bitbuf, sizeof(bitbuf), "\177\020" + "b\000" "CORRECTABLE_ERROR\0" + "b\001" "NONFATAL_UNCORRECTABLE_ERROR\0" + "b\002" "FATAL_ERROR\0" + "b\003" "UNSUPPORTED_REQUEST\0" + "b\004" "AUX_POWER\0" + "b\005" "TRANSACTIONS_PENDING\0" + "\0", dsr); + aprint_error("%s: hardware error: DSR=%s\n", + devbuf, bitbuf); + } + } +} diff -r b4e17a9d10b4 -r d7eb1dff835c sys/dev/pci/pci_error.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/dev/pci/pci_error.h Thu Oct 24 20:08:59 2024 +0000 @@ -0,0 +1,38 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _DEV_PCI_PCI_ERROR_H_ +#define _DEV_PCI_PCI_ERROR_H_ + +struct cper_pcie_error; +struct pci_attach_args; + +void pci_cper_error(const struct cper_pcie_error *); +void pci_error(const struct pci_attach_args *); + +#endif /* _DEV_PCI_PCI_ERROR_H_ */