diff mbox series

x86/acpi: Fix LAPIC/x2APIC parsing order

Message ID 20241022001712.9218-1-rui.zhang@intel.com
State Superseded
Headers show
Series x86/acpi: Fix LAPIC/x2APIC parsing order | expand

Commit Message

Zhang Rui Oct. 22, 2024, 12:17 a.m. UTC
On some systems, the same CPU (with the same APIC ID) is assigned a
different logical CPU id after commit ec9aedb2aa1a ("x86/acpi: Ignore
invalid x2APIC entries").

This means that Linux enumerates the CPUs in a different order, which
violates ACPI specification[1] that states:

  "OSPM should initialize processors in the order that they appear in
   the MADT"

The problematic commit parses all LAPIC entries before any x2APIC
entries, aiming to ignore x2APIC entries with APIC ID < 255 when valid
LAPIC entries exist. However, it disrupts the CPU enumeration order on
systems where x2APIC entries precede LAPIC entries in the MADT.

Fix the problem by separately checking LAPIC entries before parsing any
LAPIC or x2APIC entries.

1. https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#madt-processor-local-apic-sapic-structure-entry-order

Cc: stable@vger.kernel.org
Reported-by: Jim Mattson <jmattson@google.com>
Closes: https://lore.kernel.org/all/20241010213136.668672-1-jmattson@google.com/
Fixes: ec9aedb2aa1a ("x86/acpi: Ignore invalid x2APIC entries")
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Tested-by: Jim Mattson <jmattson@google.com>
---
 arch/x86/kernel/acpi/boot.c | 50 +++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 5 deletions(-)

Comments

Jim Mattson Dec. 10, 2024, 2:59 p.m. UTC | #1
On Mon, Oct 21, 2024 at 5:17 PM Zhang Rui <rui.zhang@intel.com> wrote:
>
> On some systems, the same CPU (with the same APIC ID) is assigned a
> different logical CPU id after commit ec9aedb2aa1a ("x86/acpi: Ignore
> invalid x2APIC entries").
>
> This means that Linux enumerates the CPUs in a different order, which
> violates ACPI specification[1] that states:
>
>   "OSPM should initialize processors in the order that they appear in
>    the MADT"
>
> The problematic commit parses all LAPIC entries before any x2APIC
> entries, aiming to ignore x2APIC entries with APIC ID < 255 when valid
> LAPIC entries exist. However, it disrupts the CPU enumeration order on
> systems where x2APIC entries precede LAPIC entries in the MADT.
>
> Fix the problem by separately checking LAPIC entries before parsing any
> LAPIC or x2APIC entries.
>
> 1. https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#madt-processor-local-apic-sapic-structure-entry-order
>
> Cc: stable@vger.kernel.org
> Reported-by: Jim Mattson <jmattson@google.com>
> Closes: https://lore.kernel.org/all/20241010213136.668672-1-jmattson@google.com/
> Fixes: ec9aedb2aa1a ("x86/acpi: Ignore invalid x2APIC entries")
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> Reviewed-by: Jim Mattson <jmattson@google.com>
> Tested-by: Jim Mattson <jmattson@google.com>
> ---
>  arch/x86/kernel/acpi/boot.c | 50 +++++++++++++++++++++++++++++++++----
>  1 file changed, 45 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
> index 4efecac49863..c70b86f1f295 100644
> --- a/arch/x86/kernel/acpi/boot.c
> +++ b/arch/x86/kernel/acpi/boot.c
> @@ -226,6 +226,28 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
>         return 0;
>  }
>
> +static int __init
> +acpi_check_lapic(union acpi_subtable_headers *header, const unsigned long end)
> +{
> +       struct acpi_madt_local_apic *processor = NULL;
> +
> +       processor = (struct acpi_madt_local_apic *)header;
> +
> +       if (BAD_MADT_ENTRY(processor, end))
> +               return -EINVAL;
> +
> +       /* Ignore invalid ID */
> +       if (processor->id == 0xff)
> +               return 0;
> +
> +       /* Ignore processors that can not be onlined */
> +       if (!acpi_is_processor_usable(processor->lapic_flags))
> +               return 0;
> +
> +       has_lapic_cpus = true;
> +       return 0;
> +}
> +
>  static int __init
>  acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
>  {
> @@ -257,7 +279,6 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
>                                processor->processor_id, /* ACPI ID */
>                                processor->lapic_flags & ACPI_MADT_ENABLED);
>
> -       has_lapic_cpus = true;
>         return 0;
>  }
>
> @@ -1029,6 +1050,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
>  static int __init acpi_parse_madt_lapic_entries(void)
>  {
>         int count, x2count = 0;
> +       struct acpi_subtable_proc madt_proc[2];
> +       int ret;
>
>         if (!boot_cpu_has(X86_FEATURE_APIC))
>                 return -ENODEV;
> @@ -1037,10 +1060,27 @@ static int __init acpi_parse_madt_lapic_entries(void)
>                                       acpi_parse_sapic, MAX_LOCAL_APIC);
>
>         if (!count) {
> -               count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
> -                                       acpi_parse_lapic, MAX_LOCAL_APIC);
> -               x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
> -                                       acpi_parse_x2apic, MAX_LOCAL_APIC);
> +               /* Check if there are valid LAPIC entries */
> +               acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_check_lapic, MAX_LOCAL_APIC);
> +
> +               /*
> +                * Enumerate the APIC IDs in the order that they appear in the
> +                * MADT, no matter LAPIC entry or x2APIC entry is used.
> +                */
> +               memset(madt_proc, 0, sizeof(madt_proc));
> +               madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
> +               madt_proc[0].handler = acpi_parse_lapic;
> +               madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
> +               madt_proc[1].handler = acpi_parse_x2apic;
> +               ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
> +                               sizeof(struct acpi_table_madt),
> +                               madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
> +               if (ret < 0) {
> +                       pr_err("Error parsing LAPIC/X2APIC entries\n");
> +                       return ret;
> +               }
> +               count = madt_proc[0].count;
> +               x2count = madt_proc[1].count;
>         }
>         if (!count && !x2count) {
>                 pr_err("No LAPIC entries present\n");
> --
> 2.34.1
>

Does anyone have any thoughts on this patch?
Thomas Gleixner Dec. 10, 2024, 10:51 p.m. UTC | #2
On Tue, Oct 22 2024 at 08:17, Zhang Rui wrote:
> On some systems, the same CPU (with the same APIC ID) is assigned a
> different logical CPU id after commit ec9aedb2aa1a ("x86/acpi: Ignore
> invalid x2APIC entries").
>
> This means that Linux enumerates the CPUs in a different order, which
> violates ACPI specification[1] that states:
>
>   "OSPM should initialize processors in the order that they appear in
>    the MADT"
>
> The problematic commit parses all LAPIC entries before any x2APIC
> entries, aiming to ignore x2APIC entries with APIC ID < 255 when valid
> LAPIC entries exist. However, it disrupts the CPU enumeration order on
> systems where x2APIC entries precede LAPIC entries in the MADT.
>
> Fix the problem by separately checking LAPIC entries before parsing any
> LAPIC or x2APIC entries.

I really had to stare at the change to understand how this fixes
anything. What you want to say is:

Fix this problem by:

    1) Parsing LAPIC entries first without registering them in the
       topology to evaluate whether valid LAPIC entries exist.

    2) Restoring the MADT in order parser which invokes either the LAPIC or
       the X2APIC parser function depending on the entry type.

       The X2APIC parser still ignores entries < 0xff in case that #1
       found valid LAPIC entries independent of their position in the
       MADT table.

Other than that:

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Zhang Rui Dec. 11, 2024, 5:40 a.m. UTC | #3
On Tue, 2024-12-10 at 23:51 +0100, Thomas Gleixner wrote:
> On Tue, Oct 22 2024 at 08:17, Zhang Rui wrote:
> > On some systems, the same CPU (with the same APIC ID) is assigned a
> > different logical CPU id after commit ec9aedb2aa1a ("x86/acpi:
> > Ignore
> > invalid x2APIC entries").
> > 
> > This means that Linux enumerates the CPUs in a different order,
> > which
> > violates ACPI specification[1] that states:
> > 
> >   "OSPM should initialize processors in the order that they appear
> > in
> >    the MADT"
> > 
> > The problematic commit parses all LAPIC entries before any x2APIC
> > entries, aiming to ignore x2APIC entries with APIC ID < 255 when
> > valid
> > LAPIC entries exist. However, it disrupts the CPU enumeration order
> > on
> > systems where x2APIC entries precede LAPIC entries in the MADT.
> > 
> > Fix the problem by separately checking LAPIC entries before parsing
> > any
> > LAPIC or x2APIC entries.
> 
> I really had to stare at the change to understand how this fixes
> anything. What you want to say is:
> 
> Fix this problem by:
> 
>     1) Parsing LAPIC entries first without registering them in the
>        topology to evaluate whether valid LAPIC entries exist.
> 
>     2) Restoring the MADT in order parser which invokes either the
> LAPIC or
>        the X2APIC parser function depending on the entry type.
> 
>        The X2APIC parser still ignores entries < 0xff in case that #1
>        found valid LAPIC entries independent of their position in the
>        MADT table.
> 
Exactly. Thanks for the rewording.

-rui

> Other than that:
> 
> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
> 
>
diff mbox series

Patch

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4efecac49863..c70b86f1f295 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -226,6 +226,28 @@  acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
 	return 0;
 }
 
+static int __init
+acpi_check_lapic(union acpi_subtable_headers *header, const unsigned long end)
+{
+	struct acpi_madt_local_apic *processor = NULL;
+
+	processor = (struct acpi_madt_local_apic *)header;
+
+	if (BAD_MADT_ENTRY(processor, end))
+		return -EINVAL;
+
+	/* Ignore invalid ID */
+	if (processor->id == 0xff)
+		return 0;
+
+	/* Ignore processors that can not be onlined */
+	if (!acpi_is_processor_usable(processor->lapic_flags))
+		return 0;
+
+	has_lapic_cpus = true;
+	return 0;
+}
+
 static int __init
 acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
 {
@@ -257,7 +279,6 @@  acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
 			       processor->processor_id, /* ACPI ID */
 			       processor->lapic_flags & ACPI_MADT_ENABLED);
 
-	has_lapic_cpus = true;
 	return 0;
 }
 
@@ -1029,6 +1050,8 @@  static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 static int __init acpi_parse_madt_lapic_entries(void)
 {
 	int count, x2count = 0;
+	struct acpi_subtable_proc madt_proc[2];
+	int ret;
 
 	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
@@ -1037,10 +1060,27 @@  static int __init acpi_parse_madt_lapic_entries(void)
 				      acpi_parse_sapic, MAX_LOCAL_APIC);
 
 	if (!count) {
-		count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
-					acpi_parse_lapic, MAX_LOCAL_APIC);
-		x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
-					acpi_parse_x2apic, MAX_LOCAL_APIC);
+		/* Check if there are valid LAPIC entries */
+		acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_check_lapic, MAX_LOCAL_APIC);
+
+		/*
+		 * Enumerate the APIC IDs in the order that they appear in the
+		 * MADT, no matter LAPIC entry or x2APIC entry is used.
+		 */
+		memset(madt_proc, 0, sizeof(madt_proc));
+		madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+		madt_proc[0].handler = acpi_parse_lapic;
+		madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+		madt_proc[1].handler = acpi_parse_x2apic;
+		ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
+				sizeof(struct acpi_table_madt),
+				madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+		if (ret < 0) {
+			pr_err("Error parsing LAPIC/X2APIC entries\n");
+			return ret;
+		}
+		count = madt_proc[0].count;
+		x2count = madt_proc[1].count;
 	}
 	if (!count && !x2count) {
 		pr_err("No LAPIC entries present\n");