Message ID | 60da74c80a0b05ea4a5b4b7f2eda1b58d555edce.1718794335.git.mchehab+huawei@kernel.org |
---|---|
State | New |
Headers | show |
Series | [v2] efi/cper: align ARM CPER type with UEFI 2.9A/2.10 specs | expand |
On Wed, 19 Jun 2024 12:52:38 +0200 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote: > Up to UEFI spec, the type byte of CPER struct was defined simply > as: > > Type at byte offset 4: > > - Cache error > - TLB Error > - Bus Error > - Micro-architectural Error > All other values are reserved > > Yet, there was no information about how this would be encoded. > > Spec 2.9A corrected it by defining: > > - Bit 1 - Cache Error > - Bit 2 - TLB Error > - Bit 3 - Bus Error > - Bit 4 - Micro-architectural Error > All other values are reserved > > Spec 2.10 also preserve the same encoding as 2.9A > > See: https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#arm-processor-error-information > > Adjust CPER handling code for ARM to properly handle UEFI 2.9A and > 2.10 encoding. Hi Mauro, I'd be tempted to use "ARM Processor" throughout this patch description as could in theory be something else and currently the link is the only way to tell! A few comments inline. Good catch on the spec change btw. > > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> > --- > drivers/acpi/apei/ghes.c | 19 +++++++++++--- > drivers/firmware/efi/cper-arm.c | 44 ++++++++++++++------------------- > include/linux/cper.h | 9 +++---- > 3 files changed, 37 insertions(+), 35 deletions(-) > > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c > index ed32bbecb4a3..365de4115508 100644 > --- a/drivers/acpi/apei/ghes.c > +++ b/drivers/acpi/apei/ghes.c > @@ -546,9 +546,12 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, > p = (char *)(err + 1); > for (i = 0; i < err->err_info_num; i++) { > struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p; > - bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR); > + bool is_cache = (err_info->type & CPER_ARM_CACHE_ERROR); Matches local style I guess but the () are unnecessary. > bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR); > - const char *error_type = "unknown error"; > + char error_type[120] = ""; > + char *s = error_type; > + int len = 0; > + int i; Shadowing i which is bad for readability. > > /* > * The field (err_info->error_info & BIT(26)) is fixed to set to > @@ -562,8 +565,16 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, > continue; > } > > - if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs)) > - error_type = cper_proc_error_type_strs[err_info->type]; > + for (i = 0; i < ARRAY_SIZE(cper_proc_error_type_strs); i++) { > + if (!(err_info->type & (1U << i))) > + continue; > + > + len += snprintf(s, sizeof(err_info->type) - len, "%s ", cper_proc_error_type_strs[i]); Size of the index into the type string array? I'm confused. sizeof(error_type) maybe? Also, maybe break that long line of code before cper_* > + s += len; > + } > + > + if (!*error_type) > + strscpy(error_type, "unknown error", sizeof(error_type)); Perhaps should handle multiple bits where only one is unknown? So maybe compare with a mask of known bits and print this on the end (perhaps including which bit)? > > pr_warn_ratelimited(FW_WARN GHES_PFX > "Unhandled processor error type: %s\n", > diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c > index fa9c1c3bf168..f57641eb548a 100644 > --- a/drivers/firmware/efi/cper-arm.c > +++ b/drivers/firmware/efi/cper-arm.c > @@ -93,15 +93,11 @@ static void cper_print_arm_err_info(const char *pfx, u32 type, > bool proc_context_corrupt, corrected, precise_pc, restartable_pc; > bool time_out, access_mode; > > - /* If the type is unknown, bail. */ > - if (type > CPER_ARM_MAX_TYPE) > - return; > - > /* > * Vendor type errors have error information values that are vendor > * specific. > */ > - if (type == CPER_ARM_VENDOR_ERROR) > + if (type & CPER_ARM_VENDOR_ERROR) > return; > > if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) { > @@ -116,43 +112,38 @@ static void cper_print_arm_err_info(const char *pfx, u32 type, > if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) { > op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT) > & CPER_ARM_ERR_OPERATION_MASK); > - switch (type) { > - case CPER_ARM_CACHE_ERROR: > + if (type & CPER_ARM_CACHE_ERROR) { > if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) { > - printk("%soperation type: %s\n", pfx, > + printk("%scache error: %s\n", pfx, > arm_cache_err_op_strs[op_type]); Can we keep that this is an operation type in print? "%scache error, operation type: %s\n" perhaps? > } > - break; > - case CPER_ARM_TLB_ERROR: > + } > + if (type & CPER_ARM_TLB_ERROR) { > if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) { > - printk("%soperation type: %s\n", pfx, > + printk("%sTLB error: %s\n", pfx, > arm_tlb_err_op_strs[op_type]); > } > - break; > - case CPER_ARM_BUS_ERROR: > + } > + if (type & CPER_ARM_BUS_ERROR) { > if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) { > - printk("%soperation type: %s\n", pfx, > + printk("%sbus error: %s\n", pfx, > arm_bus_err_op_strs[op_type]); > } > - break; > } > } > > if (error_info & CPER_ARM_ERR_VALID_LEVEL) { > level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT) > & CPER_ARM_ERR_LEVEL_MASK); Not a today thing, but would be lovely to use FIELD_GET() for all these with appropriately fixed up mask definitions. Right now it is inconsistent as the valid entries are handled as shifted values, and we have GENMASK(X,0) plus a shift for these. > - switch (type) { > - case CPER_ARM_CACHE_ERROR: > + if (type & CPER_ARM_CACHE_ERROR) > printk("%scache level: %d\n", pfx, level); > - break; > - case CPER_ARM_TLB_ERROR: > + > + if (type & CPER_ARM_TLB_ERROR) > printk("%sTLB level: %d\n", pfx, level); > - break; > - case CPER_ARM_BUS_ERROR: > + > + if (type & CPER_ARM_BUS_ERROR) > printk("%saffinity level at which the bus error occurred: %d\n", > pfx, level); > - break; > - } > }
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ed32bbecb4a3..365de4115508 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -546,9 +546,12 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, p = (char *)(err + 1); for (i = 0; i < err->err_info_num; i++) { struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p; - bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR); + bool is_cache = (err_info->type & CPER_ARM_CACHE_ERROR); bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR); - const char *error_type = "unknown error"; + char error_type[120] = ""; + char *s = error_type; + int len = 0; + int i; /* * The field (err_info->error_info & BIT(26)) is fixed to set to @@ -562,8 +565,16 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, continue; } - if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs)) - error_type = cper_proc_error_type_strs[err_info->type]; + for (i = 0; i < ARRAY_SIZE(cper_proc_error_type_strs); i++) { + if (!(err_info->type & (1U << i))) + continue; + + len += snprintf(s, sizeof(err_info->type) - len, "%s ", cper_proc_error_type_strs[i]); + s += len; + } + + if (!*error_type) + strscpy(error_type, "unknown error", sizeof(error_type)); pr_warn_ratelimited(FW_WARN GHES_PFX "Unhandled processor error type: %s\n", diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c index fa9c1c3bf168..f57641eb548a 100644 --- a/drivers/firmware/efi/cper-arm.c +++ b/drivers/firmware/efi/cper-arm.c @@ -93,15 +93,11 @@ static void cper_print_arm_err_info(const char *pfx, u32 type, bool proc_context_corrupt, corrected, precise_pc, restartable_pc; bool time_out, access_mode; - /* If the type is unknown, bail. */ - if (type > CPER_ARM_MAX_TYPE) - return; - /* * Vendor type errors have error information values that are vendor * specific. */ - if (type == CPER_ARM_VENDOR_ERROR) + if (type & CPER_ARM_VENDOR_ERROR) return; if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) { @@ -116,43 +112,38 @@ static void cper_print_arm_err_info(const char *pfx, u32 type, if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) { op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT) & CPER_ARM_ERR_OPERATION_MASK); - switch (type) { - case CPER_ARM_CACHE_ERROR: + if (type & CPER_ARM_CACHE_ERROR) { if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) { - printk("%soperation type: %s\n", pfx, + printk("%scache error: %s\n", pfx, arm_cache_err_op_strs[op_type]); } - break; - case CPER_ARM_TLB_ERROR: + } + if (type & CPER_ARM_TLB_ERROR) { if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) { - printk("%soperation type: %s\n", pfx, + printk("%sTLB error: %s\n", pfx, arm_tlb_err_op_strs[op_type]); } - break; - case CPER_ARM_BUS_ERROR: + } + if (type & CPER_ARM_BUS_ERROR) { if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) { - printk("%soperation type: %s\n", pfx, + printk("%sbus error: %s\n", pfx, arm_bus_err_op_strs[op_type]); } - break; } } if (error_info & CPER_ARM_ERR_VALID_LEVEL) { level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT) & CPER_ARM_ERR_LEVEL_MASK); - switch (type) { - case CPER_ARM_CACHE_ERROR: + if (type & CPER_ARM_CACHE_ERROR) printk("%scache level: %d\n", pfx, level); - break; - case CPER_ARM_TLB_ERROR: + + if (type & CPER_ARM_TLB_ERROR) printk("%sTLB level: %d\n", pfx, level); - break; - case CPER_ARM_BUS_ERROR: + + if (type & CPER_ARM_BUS_ERROR) printk("%saffinity level at which the bus error occurred: %d\n", pfx, level); - break; - } } if (error_info & CPER_ARM_ERR_VALID_PROC_CONTEXT_CORRUPT) { @@ -289,9 +280,10 @@ void cper_print_proc_arm(const char *pfx, newpfx); } - printk("%serror_type: %d, %s\n", newpfx, err_info->type, - err_info->type < ARRAY_SIZE(cper_proc_error_type_strs) ? - cper_proc_error_type_strs[err_info->type] : "unknown"); + printk("%s""error_type: 0x%02x\n", pfx, err_info->type); + cper_print_bits(pfx, err_info->type, + cper_proc_error_type_strs, + ARRAY_SIZE(cper_proc_error_type_strs)); if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) { printk("%serror_info: 0x%016llx\n", newpfx, err_info->error_info); diff --git a/include/linux/cper.h b/include/linux/cper.h index 265b0f8fc0b3..afc6d41b4e67 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -293,11 +293,10 @@ enum { #define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2) #define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3) -#define CPER_ARM_CACHE_ERROR 0 -#define CPER_ARM_TLB_ERROR 1 -#define CPER_ARM_BUS_ERROR 2 -#define CPER_ARM_VENDOR_ERROR 3 -#define CPER_ARM_MAX_TYPE CPER_ARM_VENDOR_ERROR +#define CPER_ARM_CACHE_ERROR BIT(1) +#define CPER_ARM_TLB_ERROR BIT(2) +#define CPER_ARM_BUS_ERROR BIT(3) +#define CPER_ARM_VENDOR_ERROR BIT(4) #define CPER_ARM_ERR_VALID_TRANSACTION_TYPE BIT(0) #define CPER_ARM_ERR_VALID_OPERATION_TYPE BIT(1)
Up to UEFI spec, the type byte of CPER struct was defined simply as: Type at byte offset 4: - Cache error - TLB Error - Bus Error - Micro-architectural Error All other values are reserved Yet, there was no information about how this would be encoded. Spec 2.9A corrected it by defining: - Bit 1 - Cache Error - Bit 2 - TLB Error - Bit 3 - Bus Error - Bit 4 - Micro-architectural Error All other values are reserved Spec 2.10 also preserve the same encoding as 2.9A See: https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#arm-processor-error-information Adjust CPER handling code for ARM to properly handle UEFI 2.9A and 2.10 encoding. Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> --- drivers/acpi/apei/ghes.c | 19 +++++++++++--- drivers/firmware/efi/cper-arm.c | 44 ++++++++++++++------------------- include/linux/cper.h | 9 +++---- 3 files changed, 37 insertions(+), 35 deletions(-)