diff mbox series

[v6] powerpc/kexec_file: use current CPU info while setting up FDT

Message ID 20210427045120.2109980-1-sourabhjain@linux.ibm.com
State Accepted
Commit 40c753993e3aad51a12c21233486e2037417a4d6
Headers show
Series [v6] powerpc/kexec_file: use current CPU info while setting up FDT | expand

Commit Message

Sourabh Jain April 27, 2021, 4:51 a.m. UTC
kexec_file_load uses initial_boot_params in setting up the device-tree
for the kernel to be loaded. Though initial_boot_params holds info
about CPUs at the time of boot, it doesn't account for hot added CPUs.

So, kexec'ing with kexec_file_load syscall would leave the kexec'ed
kernel with inaccurate CPU info. Also, if kdump kernel is loaded with
kexec_file_load syscall and the system crashes on a hot added CPU,
capture kernel hangs failing to identify the boot CPU.

 Kernel panic - not syncing: sysrq triggered crash
 CPU: 24 PID: 6065 Comm: echo Kdump: loaded Not tainted 5.12.0-rc5upstream #54
 Call Trace:
 [c0000000e590fac0] [c0000000007b2400] dump_stack+0xc4/0x114 (unreliable)
 [c0000000e590fb00] [c000000000145290] panic+0x16c/0x41c
 [c0000000e590fba0] [c0000000008892e0] sysrq_handle_crash+0x30/0x40
 [c0000000e590fc00] [c000000000889cdc] __handle_sysrq+0xcc/0x1f0
 [c0000000e590fca0] [c00000000088a538] write_sysrq_trigger+0xd8/0x178
 [c0000000e590fce0] [c0000000005e9b7c] proc_reg_write+0x10c/0x1b0
 [c0000000e590fd10] [c0000000004f26d0] vfs_write+0xf0/0x330
 [c0000000e590fd60] [c0000000004f2aec] ksys_write+0x7c/0x140
 [c0000000e590fdb0] [c000000000031ee0] system_call_exception+0x150/0x290
 [c0000000e590fe10] [c00000000000ca5c] system_call_common+0xec/0x278
 --- interrupt: c00 at 0x7fff905b9664
 NIP:  00007fff905b9664 LR: 00007fff905320c4 CTR: 0000000000000000
 REGS: c0000000e590fe80 TRAP: 0c00   Not tainted  (5.12.0-rc5upstream)
 MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28000242
       XER: 00000000
 IRQMASK: 0
 GPR00: 0000000000000004 00007ffff5fedf30 00007fff906a7300 0000000000000001
 GPR04: 000001002a7355b0 0000000000000002 0000000000000001 00007ffff5fef616
 GPR08: 0000000000000001 0000000000000000 0000000000000000 0000000000000000
 GPR12: 0000000000000000 00007fff9073a160 0000000000000000 0000000000000000
 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
 GPR20: 0000000000000000 00007fff906a4ee0 0000000000000002 0000000000000001
 GPR24: 00007fff906a0898 0000000000000000 0000000000000002 000001002a7355b0
 GPR28: 0000000000000002 00007fff906a1790 000001002a7355b0 0000000000000002
 NIP [00007fff905b9664] 0x7fff905b9664
 LR [00007fff905320c4] 0x7fff905320c4
 --- interrupt: c00

To avoid this from happening, extract current CPU info from of_root
device node and use it for setting up the fdt in kexec_file_load case.

Fixes: 6ecd0163d360 ("powerpc/kexec_file: Add appropriate regions for memory reserve map")

Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>
Cc: <stable@vger.kernel.org>
---
 arch/powerpc/kexec/file_load_64.c | 88 +++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

 ---
Changelog:

v1 -> v5
  - https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-April/227950.html

v5 -> v6
  - use exiting macro (for_each_property_of_node) to loop through all
    properties of a node.
  - removed devtree_lock while accessing the node properties.
  - function name update, add_node_prop to add_node_props.
 ---

Comments

Tyrel Datwyler April 28, 2021, 12:32 a.m. UTC | #1
On 4/26/21 9:51 PM, Sourabh Jain wrote:
> kexec_file_load uses initial_boot_params in setting up the device-tree
> for the kernel to be loaded. Though initial_boot_params holds info
> about CPUs at the time of boot, it doesn't account for hot added CPUs.
> 
> So, kexec'ing with kexec_file_load syscall would leave the kexec'ed
> kernel with inaccurate CPU info. Also, if kdump kernel is loaded with
> kexec_file_load syscall and the system crashes on a hot added CPU,
> capture kernel hangs failing to identify the boot CPU.
> 
>  Kernel panic - not syncing: sysrq triggered crash
>  CPU: 24 PID: 6065 Comm: echo Kdump: loaded Not tainted 5.12.0-rc5upstream #54
>  Call Trace:
>  [c0000000e590fac0] [c0000000007b2400] dump_stack+0xc4/0x114 (unreliable)
>  [c0000000e590fb00] [c000000000145290] panic+0x16c/0x41c
>  [c0000000e590fba0] [c0000000008892e0] sysrq_handle_crash+0x30/0x40
>  [c0000000e590fc00] [c000000000889cdc] __handle_sysrq+0xcc/0x1f0
>  [c0000000e590fca0] [c00000000088a538] write_sysrq_trigger+0xd8/0x178
>  [c0000000e590fce0] [c0000000005e9b7c] proc_reg_write+0x10c/0x1b0
>  [c0000000e590fd10] [c0000000004f26d0] vfs_write+0xf0/0x330
>  [c0000000e590fd60] [c0000000004f2aec] ksys_write+0x7c/0x140
>  [c0000000e590fdb0] [c000000000031ee0] system_call_exception+0x150/0x290
>  [c0000000e590fe10] [c00000000000ca5c] system_call_common+0xec/0x278
>  --- interrupt: c00 at 0x7fff905b9664
>  NIP:  00007fff905b9664 LR: 00007fff905320c4 CTR: 0000000000000000
>  REGS: c0000000e590fe80 TRAP: 0c00   Not tainted  (5.12.0-rc5upstream)
>  MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28000242
>        XER: 00000000
>  IRQMASK: 0
>  GPR00: 0000000000000004 00007ffff5fedf30 00007fff906a7300 0000000000000001
>  GPR04: 000001002a7355b0 0000000000000002 0000000000000001 00007ffff5fef616
>  GPR08: 0000000000000001 0000000000000000 0000000000000000 0000000000000000
>  GPR12: 0000000000000000 00007fff9073a160 0000000000000000 0000000000000000
>  GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
>  GPR20: 0000000000000000 00007fff906a4ee0 0000000000000002 0000000000000001
>  GPR24: 00007fff906a0898 0000000000000000 0000000000000002 000001002a7355b0
>  GPR28: 0000000000000002 00007fff906a1790 000001002a7355b0 0000000000000002
>  NIP [00007fff905b9664] 0x7fff905b9664
>  LR [00007fff905320c4] 0x7fff905320c4
>  --- interrupt: c00
> 
> To avoid this from happening, extract current CPU info from of_root
> device node and use it for setting up the fdt in kexec_file_load case.
> 
> Fixes: 6ecd0163d360 ("powerpc/kexec_file: Add appropriate regions for memory reserve map")
> 
> Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>
> Cc: <stable@vger.kernel.org>
> ---
>  arch/powerpc/kexec/file_load_64.c | 88 +++++++++++++++++++++++++++++++
>  1 file changed, 88 insertions(+)
> 
>  ---
> Changelog:
> 
> v1 -> v5
>   - https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-April/227950.html
> 
> v5 -> v6
>   - use exiting macro (for_each_property_of_node) to loop through all
>     properties of a node.
>   - removed devtree_lock while accessing the node properties.
>   - function name update, add_node_prop to add_node_props.
>  ---
> 
> diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
> index 02b9e4d0dc40..4f7d4c10f939 100644
> --- a/arch/powerpc/kexec/file_load_64.c
> +++ b/arch/powerpc/kexec/file_load_64.c
> @@ -960,6 +960,89 @@ unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image)
>  	return fdt_size;
>  }
> 
> +/**
> + * add_node_props - Reads node properties from device node structure and add
> + *                  them to fdt.
> + * @fdt:            Flattened device tree of the kernel
> + * @node_offset:    offset of the node to add a property at
> + * @dn:             device node pointer
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
> +{
> +	int ret = 0;
> +	struct property *pp;
> +
> +	if (!dn)
> +		return -EINVAL;
> +
> +	for_each_property_of_node(dn, pp) {
> +		ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
> +		if (ret < 0) {
> +			pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
> +			return ret;
> +		}
> +	}
> +	return ret;
> +}
> +
> +/**
> + * update_cpus_node - Update cpus node of flattened device tree using of_root
> + *                    device node.
> + * @fdt:              Flattened device tree of the kernel.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int update_cpus_node(void *fdt)
> +{
> +	struct device_node *cpus_node, *dn;
> +	int cpus_offset, cpus_subnode_offset, ret = 0;
> +
> +	cpus_offset = fdt_path_offset(fdt, "/cpus");
> +	if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
> +		pr_err("Malformed device tree: error reading /cpus node: %s\n",
> +		       fdt_strerror(cpus_offset));
> +		return cpus_offset;
> +	}
> +
> +	if (cpus_offset > 0) {
> +		ret = fdt_del_node(fdt, cpus_offset);
> +		if (ret < 0) {
> +			pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
> +			return -EINVAL;
> +		}
> +	}
> +
> +	/* Add cpus node to fdt */
> +	cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
> +	if (cpus_offset < 0) {
> +		pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
> +		return -EINVAL;
> +	}
> +
> +	/* Add cpus node properties */
> +	cpus_node = of_find_node_by_path("/cpus");

Pretty sure that of_find_node_by_path() returns a device_node with its refcount
incremented.

> +	ret = add_node_props(fdt, cpus_offset, cpus_node);

Need a of_node_put(cpus_node) here.

-Tyrel

> +	if (ret < 0)
> +		return ret;
> +
> +	/* Loop through all subnodes of cpus and add them to fdt */
> +	for_each_node_by_type(dn, "cpu") {
> +		cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
> +		if (cpus_subnode_offset < 0) {
> +			pr_err("Unable to add %s subnode: %s\n", dn->full_name,
> +			       fdt_strerror(cpus_subnode_offset));
> +			return cpus_subnode_offset;
> +		}
> +		ret = add_node_props(fdt, cpus_subnode_offset, dn);
> +		if (ret < 0)
> +			return ret;
> +	}
> +	of_node_put(dn);
> +	return ret;
> +}
> +
>  /**
>   * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
>   *                       being loaded.
> @@ -1020,6 +1103,11 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
>  		}
>  	}
> 
> +	/* Update cpus nodes information to account hotplug CPUs. */
> +	ret =  update_cpus_node(fdt);
> +	if (ret < 0)
> +		return ret;
> +
>  	/* Update memory reserve map */
>  	ret = get_reserved_memory_ranges(&rmem);
>  	if (ret)
>
Sourabh Jain April 29, 2021, 6:05 a.m. UTC | #2
On 28/04/21 6:02 am, Tyrel Datwyler wrote:
> On 4/26/21 9:51 PM, Sourabh Jain wrote:

>> kexec_file_load uses initial_boot_params in setting up the device-tree

>> for the kernel to be loaded. Though initial_boot_params holds info

>> about CPUs at the time of boot, it doesn't account for hot added CPUs.

>>

>> So, kexec'ing with kexec_file_load syscall would leave the kexec'ed

>> kernel with inaccurate CPU info. Also, if kdump kernel is loaded with

>> kexec_file_load syscall and the system crashes on a hot added CPU,

>> capture kernel hangs failing to identify the boot CPU.

>>

>>   Kernel panic - not syncing: sysrq triggered crash

>>   CPU: 24 PID: 6065 Comm: echo Kdump: loaded Not tainted 5.12.0-rc5upstream #54

>>   Call Trace:

>>   [c0000000e590fac0] [c0000000007b2400] dump_stack+0xc4/0x114 (unreliable)

>>   [c0000000e590fb00] [c000000000145290] panic+0x16c/0x41c

>>   [c0000000e590fba0] [c0000000008892e0] sysrq_handle_crash+0x30/0x40

>>   [c0000000e590fc00] [c000000000889cdc] __handle_sysrq+0xcc/0x1f0

>>   [c0000000e590fca0] [c00000000088a538] write_sysrq_trigger+0xd8/0x178

>>   [c0000000e590fce0] [c0000000005e9b7c] proc_reg_write+0x10c/0x1b0

>>   [c0000000e590fd10] [c0000000004f26d0] vfs_write+0xf0/0x330

>>   [c0000000e590fd60] [c0000000004f2aec] ksys_write+0x7c/0x140

>>   [c0000000e590fdb0] [c000000000031ee0] system_call_exception+0x150/0x290

>>   [c0000000e590fe10] [c00000000000ca5c] system_call_common+0xec/0x278

>>   --- interrupt: c00 at 0x7fff905b9664

>>   NIP:  00007fff905b9664 LR: 00007fff905320c4 CTR: 0000000000000000

>>   REGS: c0000000e590fe80 TRAP: 0c00   Not tainted  (5.12.0-rc5upstream)

>>   MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28000242

>>         XER: 00000000

>>   IRQMASK: 0

>>   GPR00: 0000000000000004 00007ffff5fedf30 00007fff906a7300 0000000000000001

>>   GPR04: 000001002a7355b0 0000000000000002 0000000000000001 00007ffff5fef616

>>   GPR08: 0000000000000001 0000000000000000 0000000000000000 0000000000000000

>>   GPR12: 0000000000000000 00007fff9073a160 0000000000000000 0000000000000000

>>   GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000

>>   GPR20: 0000000000000000 00007fff906a4ee0 0000000000000002 0000000000000001

>>   GPR24: 00007fff906a0898 0000000000000000 0000000000000002 000001002a7355b0

>>   GPR28: 0000000000000002 00007fff906a1790 000001002a7355b0 0000000000000002

>>   NIP [00007fff905b9664] 0x7fff905b9664

>>   LR [00007fff905320c4] 0x7fff905320c4

>>   --- interrupt: c00

>>

>> To avoid this from happening, extract current CPU info from of_root

>> device node and use it for setting up the fdt in kexec_file_load case.

>>

>> Fixes: 6ecd0163d360 ("powerpc/kexec_file: Add appropriate regions for memory reserve map")

>>

>> Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>

>> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>

>> Cc: <stable@vger.kernel.org>

>> ---

>>   arch/powerpc/kexec/file_load_64.c | 88 +++++++++++++++++++++++++++++++

>>   1 file changed, 88 insertions(+)

>>

>>   ---

>> Changelog:

>>

>> v1 -> v5

>>    - https://lists.ozlabs.org/pipermail/linuxppc-dev/2021-April/227950.html

>>

>> v5 -> v6

>>    - use exiting macro (for_each_property_of_node) to loop through all

>>      properties of a node.

>>    - removed devtree_lock while accessing the node properties.

>>    - function name update, add_node_prop to add_node_props.

>>   ---

>>

>> diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c

>> index 02b9e4d0dc40..4f7d4c10f939 100644

>> --- a/arch/powerpc/kexec/file_load_64.c

>> +++ b/arch/powerpc/kexec/file_load_64.c

>> @@ -960,6 +960,89 @@ unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image)

>>   	return fdt_size;

>>   }

>>

>> +/**

>> + * add_node_props - Reads node properties from device node structure and add

>> + *                  them to fdt.

>> + * @fdt:            Flattened device tree of the kernel

>> + * @node_offset:    offset of the node to add a property at

>> + * @dn:             device node pointer

>> + *

>> + * Returns 0 on success, negative errno on error.

>> + */

>> +static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)

>> +{

>> +	int ret = 0;

>> +	struct property *pp;

>> +

>> +	if (!dn)

>> +		return -EINVAL;

>> +

>> +	for_each_property_of_node(dn, pp) {

>> +		ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);

>> +		if (ret < 0) {

>> +			pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));

>> +			return ret;

>> +		}

>> +	}

>> +	return ret;

>> +}

>> +

>> +/**

>> + * update_cpus_node - Update cpus node of flattened device tree using of_root

>> + *                    device node.

>> + * @fdt:              Flattened device tree of the kernel.

>> + *

>> + * Returns 0 on success, negative errno on error.

>> + */

>> +static int update_cpus_node(void *fdt)

>> +{

>> +	struct device_node *cpus_node, *dn;

>> +	int cpus_offset, cpus_subnode_offset, ret = 0;

>> +

>> +	cpus_offset = fdt_path_offset(fdt, "/cpus");

>> +	if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {

>> +		pr_err("Malformed device tree: error reading /cpus node: %s\n",

>> +		       fdt_strerror(cpus_offset));

>> +		return cpus_offset;

>> +	}

>> +

>> +	if (cpus_offset > 0) {

>> +		ret = fdt_del_node(fdt, cpus_offset);

>> +		if (ret < 0) {

>> +			pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));

>> +			return -EINVAL;

>> +		}

>> +	}

>> +

>> +	/* Add cpus node to fdt */

>> +	cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");

>> +	if (cpus_offset < 0) {

>> +		pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));

>> +		return -EINVAL;

>> +	}

>> +

>> +	/* Add cpus node properties */

>> +	cpus_node = of_find_node_by_path("/cpus");

> Pretty sure that of_find_node_by_path() returns a device_node with its refcount

> incremented.

>

>> +	ret = add_node_props(fdt, cpus_offset, cpus_node);

> Need a of_node_put(cpus_node) here.

>

> Thanks for the review Tyrel. updated in v7.

>

> - Sourabh
diff mbox series

Patch

diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 02b9e4d0dc40..4f7d4c10f939 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -960,6 +960,89 @@  unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image)
 	return fdt_size;
 }
 
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ *                  them to fdt.
+ * @fdt:            Flattened device tree of the kernel
+ * @node_offset:    offset of the node to add a property at
+ * @dn:             device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
+{
+	int ret = 0;
+	struct property *pp;
+
+	if (!dn)
+		return -EINVAL;
+
+	for_each_property_of_node(dn, pp) {
+		ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
+		if (ret < 0) {
+			pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
+			return ret;
+		}
+	}
+	return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ *                    device node.
+ * @fdt:              Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int update_cpus_node(void *fdt)
+{
+	struct device_node *cpus_node, *dn;
+	int cpus_offset, cpus_subnode_offset, ret = 0;
+
+	cpus_offset = fdt_path_offset(fdt, "/cpus");
+	if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+		pr_err("Malformed device tree: error reading /cpus node: %s\n",
+		       fdt_strerror(cpus_offset));
+		return cpus_offset;
+	}
+
+	if (cpus_offset > 0) {
+		ret = fdt_del_node(fdt, cpus_offset);
+		if (ret < 0) {
+			pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
+			return -EINVAL;
+		}
+	}
+
+	/* Add cpus node to fdt */
+	cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
+	if (cpus_offset < 0) {
+		pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
+		return -EINVAL;
+	}
+
+	/* Add cpus node properties */
+	cpus_node = of_find_node_by_path("/cpus");
+	ret = add_node_props(fdt, cpus_offset, cpus_node);
+	if (ret < 0)
+		return ret;
+
+	/* Loop through all subnodes of cpus and add them to fdt */
+	for_each_node_by_type(dn, "cpu") {
+		cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
+		if (cpus_subnode_offset < 0) {
+			pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+			       fdt_strerror(cpus_subnode_offset));
+			return cpus_subnode_offset;
+		}
+		ret = add_node_props(fdt, cpus_subnode_offset, dn);
+		if (ret < 0)
+			return ret;
+	}
+	of_node_put(dn);
+	return ret;
+}
+
 /**
  * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
  *                       being loaded.
@@ -1020,6 +1103,11 @@  int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 		}
 	}
 
+	/* Update cpus nodes information to account hotplug CPUs. */
+	ret =  update_cpus_node(fdt);
+	if (ret < 0)
+		return ret;
+
 	/* Update memory reserve map */
 	ret = get_reserved_memory_ranges(&rmem);
 	if (ret)