diff mbox series

[1/1] kernel/crash_core.c - Add crashkernel=auto for x86 and ARM

Message ID 20201118232431.21832-1-saeed.mirzamohammadi@oracle.com
State New
Headers show
Series [1/1] kernel/crash_core.c - Add crashkernel=auto for x86 and ARM | expand

Commit Message

Saeed Mirzamohammadi Nov. 18, 2020, 11:24 p.m. UTC
This adds crashkernel=auto feature to configure reserved memory for
vmcore creation to both x86 and ARM platforms based on the total memory
size.

Cc: stable@vger.kernel.org
Signed-off-by: John Donnelly <john.p.donnelly@oracle.com>
Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi@oracle.com>
---
 Documentation/admin-guide/kdump/kdump.rst |  5 +++++
 arch/arm64/Kconfig                        | 26 ++++++++++++++++++++++-
 arch/arm64/configs/defconfig              |  1 +
 arch/x86/Kconfig                          | 26 ++++++++++++++++++++++-
 arch/x86/configs/x86_64_defconfig         |  1 +
 kernel/crash_core.c                       | 20 +++++++++++++++--
 6 files changed, 75 insertions(+), 4 deletions(-)

Comments

Randy Dunlap Nov. 19, 2020, 1:06 a.m. UTC | #1
Hi,

On 11/18/20 3:24 PM, Saeed Mirzamohammadi wrote:
> This adds crashkernel=auto feature to configure reserved memory for

> vmcore creation to both x86 and ARM platforms based on the total memory

> size.

> 

> Cc: stable@vger.kernel.org


why?

> Signed-off-by: John Donnelly <john.p.donnelly@oracle.com>

> Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi@oracle.com>

> ---

>  Documentation/admin-guide/kdump/kdump.rst |  5 +++++

>  arch/arm64/Kconfig                        | 26 ++++++++++++++++++++++-

>  arch/arm64/configs/defconfig              |  1 +

>  arch/x86/Kconfig                          | 26 ++++++++++++++++++++++-

>  arch/x86/configs/x86_64_defconfig         |  1 +

>  kernel/crash_core.c                       | 20 +++++++++++++++--

>  6 files changed, 75 insertions(+), 4 deletions(-)

> 


> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

> index 1515f6f153a0..d359dcffa80e 100644

> --- a/arch/arm64/Kconfig

> +++ b/arch/arm64/Kconfig


> @@ -1135,6 +1135,30 @@ config CRASH_DUMP

>  

>  	  For more details see Documentation/admin-guide/kdump/kdump.rst

>  

> +if CRASH_DUMP

> +

> +config CRASH_AUTO_STR

> +        string "Memory reserved for crash kernel"


use tab instead of spaces above.

> +	depends on CRASH_DUMP

> +        default "1G-64G:128M,64G-1T:256M,1T-:512M"


ditto.

> +	help

> +	  This configures the reserved memory dependent

> +	  on the value of System RAM. The syntax is:

> +	  crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]

> +	              range=start-[end]

> +

> +	  For example:

> +	      crashkernel=512M-2G:64M,2G-:128M

> +

> +	  This would mean:

> +

> +	      1) if the RAM is smaller than 512M, then don't reserve anything

> +	         (this is the "rescue" case)

> +	      2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M

> +	      3) if the RAM size is larger than 2G, then reserve 128M

> +

> +endif # CRASH_DUMP

> +

>  config XEN_DOM0

>  	def_bool y

>  	depends on XEN


> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

> index f6946b81f74a..bacd17312bb1 100644

> --- a/arch/x86/Kconfig

> +++ b/arch/x86/Kconfig


> @@ -2049,6 +2049,30 @@ config CRASH_DUMP

>  	  (CONFIG_RELOCATABLE=y).

>  	  For more details see Documentation/admin-guide/kdump/kdump.rst

>  

> +if CRASH_DUMP

> +

> +config CRASH_AUTO_STR

> +        string "Memory reserved for crash kernel" if X86_64


ditto.

> +	depends on CRASH_DUMP

> +        default "1G-64G:128M,64G-1T:256M,1T-:512M"


ditto.

> +	help

> +	  This configures the reserved memory dependent

> +	  on the value of System RAM. The syntax is:

> +	  crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]

> +	              range=start-[end]

> +

> +	  For example:

> +	      crashkernel=512M-2G:64M,2G-:128M

> +

> +	  This would mean:

> +

> +	      1) if the RAM is smaller than 512M, then don't reserve anything

> +	         (this is the "rescue" case)

> +	      2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M

> +	      3) if the RAM size is larger than 2G, then reserve 128M

> +

> +endif # CRASH_DUMP

> +

>  config KEXEC_JUMP

>  	bool "kexec jump"

>  	depends on KEXEC && HIBERNATION


> diff --git a/kernel/crash_core.c b/kernel/crash_core.c

> index 106e4500fd53..a44cd9cc12c4 100644

> --- a/kernel/crash_core.c

> +++ b/kernel/crash_core.c

> @@ -7,6 +7,7 @@

>  #include <linux/crash_core.h>

>  #include <linux/utsname.h>

>  #include <linux/vmalloc.h>

> +#include <linux/kexec.h>

>  

>  #include <asm/page.h>

>  #include <asm/sections.h>

> @@ -41,6 +42,15 @@ static int __init parse_crashkernel_mem(char *cmdline,

>  					unsigned long long *crash_base)

>  {

>  	char *cur = cmdline, *tmp;

> +	unsigned long long total_mem = system_ram;

> +

> +	/*

> +	 * Firmware sometimes reserves some memory regions for it's own use.


	                                                       its

> +	 * so we get less than actual system memory size.

> +	 * Workaround this by round up the total size to 128M which is

> +	 * enough for most test cases.

> +	 */

> +	total_mem = roundup(total_mem, SZ_128M);



thanks.
-- 
~Randy
Kairui Song Nov. 19, 2020, 6:09 a.m. UTC | #2
On Thu, Nov 19, 2020 at 7:29 AM Saeed Mirzamohammadi
<saeed.mirzamohammadi@oracle.com> wrote:
>

> This adds crashkernel=auto feature to configure reserved memory for

> vmcore creation to both x86 and ARM platforms based on the total memory

> size.


Thanks for the patch! This is very helpful for distribution makers,
this allows the distro to have better control of the crashkernel size
and ship it with the kernel. The crashkernel value is sensitive to
kernel and driver changes, so shipping it with the kernel makes sense.

And I think crashkernel=auto could be used as an indicator that user
want the kernel to control the crashkernel size, so some further work
could be done to adjust the crashkernel more accordingly. eg. when
memory encryption is enabled, increase the crashkernel value for the
auto estimation, as it's known to consume more crashkernel memory.

There have been a lot of efforts trying to push this in upstream:
https://lkml.org/lkml/2018/5/20/262
https://lkml.org/lkml/2009/8/12/61

Still, it's not yet accepted. it's good to see more people working on this.

But why not make it arch-independent? This crashkernel=auto idea
should simply work with every arch.

>

> Cc: stable@vger.kernel.org

> Signed-off-by: John Donnelly <john.p.donnelly@oracle.com>

> Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi@oracle.com>

> ---

>  Documentation/admin-guide/kdump/kdump.rst |  5 +++++

>  arch/arm64/Kconfig                        | 26 ++++++++++++++++++++++-

>  arch/arm64/configs/defconfig              |  1 +

>  arch/x86/Kconfig                          | 26 ++++++++++++++++++++++-

>  arch/x86/configs/x86_64_defconfig         |  1 +

>  kernel/crash_core.c                       | 20 +++++++++++++++--

>  6 files changed, 75 insertions(+), 4 deletions(-)

>

> diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst

> index 75a9dd98e76e..f95a2af64f59 100644

> --- a/Documentation/admin-guide/kdump/kdump.rst

> +++ b/Documentation/admin-guide/kdump/kdump.rst

> @@ -285,7 +285,12 @@ This would mean:

>      2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M

>      3) if the RAM size is larger than 2G, then reserve 128M

>

> +Or you can use crashkernel=auto if you have enough memory. The threshold

> +is 1G on x86_64 and arm64. If your system memory is less than the threshold,

> +crashkernel=auto will not reserve memory. The size changes according to

> +the system memory size like below:

>

> +    x86_64/arm64: 1G-64G:128M,64G-1T:256M,1T-:512M

>

>  Boot into System Kernel

>  =======================

> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

> index 1515f6f153a0..d359dcffa80e 100644

> --- a/arch/arm64/Kconfig

> +++ b/arch/arm64/Kconfig

> @@ -1124,7 +1124,7 @@ comment "Support for PE file signature verification disabled"

>         depends on KEXEC_SIG

>         depends on !EFI || !SIGNED_PE_FILE_VERIFICATION

>

> -config CRASH_DUMP

> +menuconfig CRASH_DUMP

>         bool "Build kdump crash kernel"

>         help

>           Generate crash dump after being started by kexec. This should

> @@ -1135,6 +1135,30 @@ config CRASH_DUMP

>

>           For more details see Documentation/admin-guide/kdump/kdump.rst

>

> +if CRASH_DUMP

> +

> +config CRASH_AUTO_STR

> +        string "Memory reserved for crash kernel"

> +       depends on CRASH_DUMP

> +        default "1G-64G:128M,64G-1T:256M,1T-:512M"

> +       help

> +         This configures the reserved memory dependent

> +         on the value of System RAM. The syntax is:

> +         crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]

> +                     range=start-[end]

> +

> +         For example:

> +             crashkernel=512M-2G:64M,2G-:128M

> +

> +         This would mean:

> +

> +             1) if the RAM is smaller than 512M, then don't reserve anything

> +                (this is the "rescue" case)

> +             2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M

> +             3) if the RAM size is larger than 2G, then reserve 128M

> +

> +endif # CRASH_DUMP

> +

>  config XEN_DOM0

>         def_bool y

>         depends on XEN

> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig

> index 5cfe3cf6f2ac..899ef3b6a78f 100644

> --- a/arch/arm64/configs/defconfig

> +++ b/arch/arm64/configs/defconfig

> @@ -69,6 +69,7 @@ CONFIG_SECCOMP=y

>  CONFIG_KEXEC=y

>  CONFIG_KEXEC_FILE=y

>  CONFIG_CRASH_DUMP=y

> +# CONFIG_CRASH_AUTO_STR is not set

>  CONFIG_XEN=y

>  CONFIG_COMPAT=y

>  CONFIG_RANDOMIZE_BASE=y

> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

> index f6946b81f74a..bacd17312bb1 100644

> --- a/arch/x86/Kconfig

> +++ b/arch/x86/Kconfig

> @@ -2035,7 +2035,7 @@ config KEXEC_BZIMAGE_VERIFY_SIG

>         help

>           Enable bzImage signature verification support.

>

> -config CRASH_DUMP

> +menuconfig CRASH_DUMP

>         bool "kernel crash dumps"

>         depends on X86_64 || (X86_32 && HIGHMEM)

>         help

> @@ -2049,6 +2049,30 @@ config CRASH_DUMP

>           (CONFIG_RELOCATABLE=y).

>           For more details see Documentation/admin-guide/kdump/kdump.rst

>

> +if CRASH_DUMP

> +

> +config CRASH_AUTO_STR

> +        string "Memory reserved for crash kernel" if X86_64

> +       depends on CRASH_DUMP

> +        default "1G-64G:128M,64G-1T:256M,1T-:512M"

> +       help

> +         This configures the reserved memory dependent

> +         on the value of System RAM. The syntax is:

> +         crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]

> +                     range=start-[end]

> +

> +         For example:

> +             crashkernel=512M-2G:64M,2G-:128M

> +

> +         This would mean:

> +

> +             1) if the RAM is smaller than 512M, then don't reserve anything

> +                (this is the "rescue" case)

> +             2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M

> +             3) if the RAM size is larger than 2G, then reserve 128M

> +

> +endif # CRASH_DUMP

> +

>  config KEXEC_JUMP

>         bool "kexec jump"

>         depends on KEXEC && HIBERNATION

> diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig

> index 9936528e1939..7a87fbecf40b 100644

> --- a/arch/x86/configs/x86_64_defconfig

> +++ b/arch/x86/configs/x86_64_defconfig

> @@ -33,6 +33,7 @@ CONFIG_EFI_MIXED=y

>  CONFIG_HZ_1000=y

>  CONFIG_KEXEC=y

>  CONFIG_CRASH_DUMP=y

> +# CONFIG_CRASH_AUTO_STR is not set

>  CONFIG_HIBERNATION=y

>  CONFIG_PM_DEBUG=y

>  CONFIG_PM_TRACE_RTC=y

> diff --git a/kernel/crash_core.c b/kernel/crash_core.c

> index 106e4500fd53..a44cd9cc12c4 100644

> --- a/kernel/crash_core.c

> +++ b/kernel/crash_core.c

> @@ -7,6 +7,7 @@

>  #include <linux/crash_core.h>

>  #include <linux/utsname.h>

>  #include <linux/vmalloc.h>

> +#include <linux/kexec.h>

>

>  #include <asm/page.h>

>  #include <asm/sections.h>

> @@ -41,6 +42,15 @@ static int __init parse_crashkernel_mem(char *cmdline,

>                                         unsigned long long *crash_base)

>  {

>         char *cur = cmdline, *tmp;

> +       unsigned long long total_mem = system_ram;

> +

> +       /*

> +        * Firmware sometimes reserves some memory regions for it's own use.

> +        * so we get less than actual system memory size.

> +        * Workaround this by round up the total size to 128M which is

> +        * enough for most test cases.

> +        */

> +       total_mem = roundup(total_mem, SZ_128M);


I think this rounding may be better moved to the arch specified part
where parse_crashkernel is called?

>

>         /* for each entry of the comma-separated list */

>         do {

> @@ -85,13 +95,13 @@ static int __init parse_crashkernel_mem(char *cmdline,

>                         return -EINVAL;

>                 }

>                 cur = tmp;

> -               if (size >= system_ram) {

> +               if (size >= total_mem) {

>                         pr_warn("crashkernel: invalid size\n");

>                         return -EINVAL;

>                 }

>

>                 /* match ? */

> -               if (system_ram >= start && system_ram < end) {

> +               if (total_mem >= start && total_mem < end) {

>                         *crash_size = size;

>                         break;

>                 }

> @@ -250,6 +260,12 @@ static int __init __parse_crashkernel(char *cmdline,

>         if (suffix)

>                 return parse_crashkernel_suffix(ck_cmdline, crash_size,

>                                 suffix);

> +#ifdef CONFIG_CRASH_AUTO_STR

> +       if (strncmp(ck_cmdline, "auto", 4) == 0) {

> +               ck_cmdline = CONFIG_CRASH_AUTO_STR;

> +               pr_info("Using crashkernel=auto, the size chosen is a best effort estimation.\n");

> +       }

> +#endif

>         /*

>          * if the commandline contains a ':', then that's the extended

>          * syntax -- if not, it must be the classic syntax

> --

> 2.18.4

>



--
Best Regards,
Kairui Song
Saeed Mirzamohammadi Nov. 19, 2020, 8:16 p.m. UTC | #3
> 

>> @@ -1135,6 +1135,30 @@ config CRASH_DUMP

>> 

>> 	  For more details see Documentation/admin-guide/kdump/kdump.rst

>> 

>> +if CRASH_DUMP

>> +

>> +config CRASH_AUTO_STR

>> +        string "Memory reserved for crash kernel"

> 

> use tab instead of spaces above.

> 


Thanks, Randy. I’ll be fixing the them in the v2. I’ll be removing the ‘CC: stable’ as well.

Saeed
Saeed Mirzamohammadi Nov. 19, 2020, 8:52 p.m. UTC | #4
Hi,
[I’m resending this email as it failed to be sent to certain emails.]

> And I think crashkernel=auto could be used as an indicator that user

> want the kernel to control the crashkernel size, so some further work

> could be done to adjust the crashkernel more accordingly. eg. when

> memory encryption is enabled, increase the crashkernel value for the

> auto estimation, as it's known to consume more crashkernel memory.

> 

Thanks for the suggestion! I tried to keep it simple and leave it to the user to change Kconfig in case a different range is needed. Based on experience, these ranges work well for most of the regular cases.

> 

> But why not make it arch-independent? This crashkernel=auto idea

> should simply work with every arch.


Thanks! I’ll be making it arch-independent in the v2 patch.

> 

> I think this rounding may be better moved to the arch specified part

> where parse_crashkernel is called?


Thanks for the suggestion. Could you please elaborate why do we need to do that?

Thanks,
Saeed
Guilherme G. Piccoli Nov. 19, 2020, 9:56 p.m. UTC | #5
Hi Saeed, thanks for your patch/idea! Comments inline, below.

On Wed, Nov 18, 2020 at 8:29 PM Saeed Mirzamohammadi
<saeed.mirzamohammadi@oracle.com> wrote:
>

> This adds crashkernel=auto feature to configure reserved memory for

> vmcore creation to both x86 and ARM platforms based on the total memory

> size.

>

> Cc: stable@vger.kernel.org

> Signed-off-by: John Donnelly <john.p.donnelly@oracle.com>

> Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi@oracle.com>

> ---

>  Documentation/admin-guide/kdump/kdump.rst |  5 +++++

>  arch/arm64/Kconfig                        | 26 ++++++++++++++++++++++-

>  arch/arm64/configs/defconfig              |  1 +

>  arch/x86/Kconfig                          | 26 ++++++++++++++++++++++-

>  arch/x86/configs/x86_64_defconfig         |  1 +

>  kernel/crash_core.c                       | 20 +++++++++++++++--

>  6 files changed, 75 insertions(+), 4 deletions(-)

>

> diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst

> index 75a9dd98e76e..f95a2af64f59 100644

> --- a/Documentation/admin-guide/kdump/kdump.rst

> +++ b/Documentation/admin-guide/kdump/kdump.rst

> @@ -285,7 +285,12 @@ This would mean:

>      2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M

>      3) if the RAM size is larger than 2G, then reserve 128M

>

> +Or you can use crashkernel=auto if you have enough memory. The threshold

> +is 1G on x86_64 and arm64. If your system memory is less than the threshold,

> +crashkernel=auto will not reserve memory. The size changes according to

> +the system memory size like below:

>

> +    x86_64/arm64: 1G-64G:128M,64G-1T:256M,1T-:512M


As mentioned in the thread, this was tried before and never got merged
- I'm not sure the all the reasons, but I speculate that a stronger
reason is that it'd likely fail in many cases. I've seen cases of 256G
servers that require crashkernel=600M (or more), due to the amount of
devices. Also, the minimum nowadays would likely be 96M or more - I'm
looping Cascardo and Dann (Debian/Ubuntu maintainers of kdump stuff)
so they maybe can jump in with even more examples/considerations.

What we've been trying to do in Ubuntu/Debian is using an estimator
approach [0] - this is purely userspace and tries to infer the amount
of necessary memory a kdump minimal[1] kernel would take. I'm not
-1'ing your approach totally, but I think a bit more consideration is
needed in the ranges, at least accounting the number of devices of the
machine or something like that.

Cheers,


Guilherme

[0] https://salsa.debian.org/debian/makedumpfile/-/merge_requests/7
[1] Minimal as having a reduced initrd + "shrinking" parameters (like
nr_cpus=1).
Dave Young Nov. 20, 2020, 2:26 a.m. UTC | #6
Hi Guilherme,
On 11/19/20 at 06:56pm, Guilherme Piccoli wrote:
> Hi Saeed, thanks for your patch/idea! Comments inline, below.

> 

> On Wed, Nov 18, 2020 at 8:29 PM Saeed Mirzamohammadi

> <saeed.mirzamohammadi@oracle.com> wrote:

> >

> > This adds crashkernel=auto feature to configure reserved memory for

> > vmcore creation to both x86 and ARM platforms based on the total memory

> > size.

> >

> > Cc: stable@vger.kernel.org

> > Signed-off-by: John Donnelly <john.p.donnelly@oracle.com>

> > Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi@oracle.com>

> > ---

> >  Documentation/admin-guide/kdump/kdump.rst |  5 +++++

> >  arch/arm64/Kconfig                        | 26 ++++++++++++++++++++++-

> >  arch/arm64/configs/defconfig              |  1 +

> >  arch/x86/Kconfig                          | 26 ++++++++++++++++++++++-

> >  arch/x86/configs/x86_64_defconfig         |  1 +

> >  kernel/crash_core.c                       | 20 +++++++++++++++--

> >  6 files changed, 75 insertions(+), 4 deletions(-)

> >

> > diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst

> > index 75a9dd98e76e..f95a2af64f59 100644

> > --- a/Documentation/admin-guide/kdump/kdump.rst

> > +++ b/Documentation/admin-guide/kdump/kdump.rst

> > @@ -285,7 +285,12 @@ This would mean:

> >      2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M

> >      3) if the RAM size is larger than 2G, then reserve 128M

> >

> > +Or you can use crashkernel=auto if you have enough memory. The threshold

> > +is 1G on x86_64 and arm64. If your system memory is less than the threshold,

> > +crashkernel=auto will not reserve memory. The size changes according to

> > +the system memory size like below:

> >

> > +    x86_64/arm64: 1G-64G:128M,64G-1T:256M,1T-:512M

> 

> As mentioned in the thread, this was tried before and never got merged

> - I'm not sure the all the reasons, but I speculate that a stronger

> reason is that it'd likely fail in many cases. I've seen cases of 256G


Yes, there were a few tries, last time I tried to set a default value, I
do not think people are strongly against it.  We have been using the
auto in Red Hat for long time, it does work for most of usual cases
like Saeed said in the patch. But I think all of us are aligned it is
not possible to satisfy all the user cases.  Anyway I also think this is
good to have.

> servers that require crashkernel=600M (or more), due to the amount of

> devices. Also, the minimum nowadays would likely be 96M or more - I'm

> looping Cascardo and Dann (Debian/Ubuntu maintainers of kdump stuff)

> so they maybe can jump in with even more examples/considerations.


Another reason of people have different feeling about the memory
requirement is currently distributions are doing different on kdump,
especially for the userspace part. Kairui did a lot of work in dracut to
reduce the memory requirements in dracut, for example only add dump
required kernel modules in 2nd kernel initramfs, also we have a lot of
other twicks for dracut to use "hostonly" mode, eg. hostonly multipath
configurations will just bring up necessary paths instead of creating
all of the multipath devices.

> 

> What we've been trying to do in Ubuntu/Debian is using an estimator

> approach [0] - this is purely userspace and tries to infer the amount

> of necessary memory a kdump minimal[1] kernel would take. I'm not

> -1'ing your approach totally, but I think a bit more consideration is

> needed in the ranges, at least accounting the number of devices of the

> machine or something like that.


There are definitely room to improve and make it better in the future,
but I think this is a good start and simple enough proposal for the time
being :)

> 

> Cheers,

> 

> 

> Guilherme

> 

> [0] https://salsa.debian.org/debian/makedumpfile/-/merge_requests/7

> [1] Minimal as having a reduced initrd + "shrinking" parameters (like

> nr_cpus=1).

> 


Thanks
Dave
Kairui Song Nov. 20, 2020, 9:34 a.m. UTC | #7
On Fri, Nov 20, 2020 at 4:28 AM Saeed Mirzamohammadi
<saeed.mirzamohammadi@oracle.com> wrote:
>

> Hi,

>

> And I think crashkernel=auto could be used as an indicator that user

> want the kernel to control the crashkernel size, so some further work

> could be done to adjust the crashkernel more accordingly. eg. when

> memory encryption is enabled, increase the crashkernel value for the

> auto estimation, as it's known to consume more crashkernel memory.

>

> Thanks for the suggestion! I tried to keep it simple and leave it to the user to change Kconfig in case a different range is needed. Based on experience, these ranges work well for most of the regular cases.


Yes, I think the current implementation is a very good start.

There are some use cases, where kernel is expected to reserve more memory, like:
- when memory encryption is enabled, an extra swiotlb size of memory
should be reserved
- on pcc, fadump will expect more memory to be reserved

I believe there are a lot more cases like these.
I tried to come up with some patches to let the kernel reserve more
memory automatically, when such conditions are detected, but changing
the crashkernel= specified value is really weird.

But if we have a crashkernel=auto, then kernel automatically reserve
more memory will make sense.

> But why not make it arch-independent? This crashkernel=auto idea

> should simply work with every arch.

>

>

> Thanks! I’ll be making it arch-independent in the v2 patch.

>

>

> #include <asm/page.h>

> #include <asm/sections.h>

> @@ -41,6 +42,15 @@ static int __init parse_crashkernel_mem(char *cmdline,

>                                        unsigned long long *crash_base)

> {

>        char *cur = cmdline, *tmp;

> +       unsigned long long total_mem = system_ram;

> +

> +       /*

> +        * Firmware sometimes reserves some memory regions for it's own use.

> +        * so we get less than actual system memory size.

> +        * Workaround this by round up the total size to 128M which is

> +        * enough for most test cases.

> +        */

> +       total_mem = roundup(total_mem, SZ_128M);

>

>

> I think this rounding may be better moved to the arch specified part

> where parse_crashkernel is called?

>

>

> Thanks for the suggestion. Could you please elaborate why do we need to do that?


Every arch gets their total memory value using different methods,
(just check every parse_crashkernel call, and the system_ram param is
filled in many different ways), so I'm really not sure if this
rounding is always suitable.

>

> Thanks,

> Saeed

>

>

--
Best Regards,
Kairui Song
Guilherme G. Piccoli Nov. 22, 2020, 3:32 p.m. UTC | #8
Hi Dave and Kairui, thanks for your responses! OK, if that makes sense
to you I'm fine with it. I'd just recommend to test recent kernels in
multiple distros with the minimum "range" to see if 64M is enough for
crashkernel, maybe we'd need to bump that.
Cheers,


Guilherme
Dave Young Nov. 23, 2020, 3:47 a.m. UTC | #9
Hi Guilherme,
On 11/22/20 at 12:32pm, Guilherme Piccoli wrote:
> Hi Dave and Kairui, thanks for your responses! OK, if that makes sense

> to you I'm fine with it. I'd just recommend to test recent kernels in

> multiple distros with the minimum "range" to see if 64M is enough for

> crashkernel, maybe we'd need to bump that.


Giving the different kernel configs and the different userspace
initramfs setup it is hard to get an uniform value for all distributions,
but we can have an interface/kconfig-option for them to provide a value like this patch
is doing. And it could be improved like Kairui said about some known
kernel added extra values later, probably some more improvements if
doable.

Thanks
Dave
John Donnelly Jan. 21, 2021, 3:32 p.m. UTC | #10
On 11/22/20 9:47 PM, Dave Young wrote:
> Hi Guilherme,

> On 11/22/20 at 12:32pm, Guilherme Piccoli wrote:

>> Hi Dave and Kairui, thanks for your responses! OK, if that makes sense

>> to you I'm fine with it. I'd just recommend to test recent kernels in

>> multiple distros with the minimum "range" to see if 64M is enough for

>> crashkernel, maybe we'd need to bump that.

> 

> Giving the different kernel configs and the different userspace

> initramfs setup it is hard to get an uniform value for all distributions,

> but we can have an interface/kconfig-option for them to provide a value like this patch

> is doing. And it could be improved like Kairui said about some known

> kernel added extra values later, probably some more improvements if

> doable.

> 

> Thanks

> Dave

> 


Hi.

Are we going to move forward with implementing this for X86 and Arm ?

If other platform maintainers want to include this CONFIG option in 
their configuration settings they have a starting point.

Thank you,

John.

( I am not currently on many of the included dist lists  in this email, 
so hopefully key contributors are included in this exchange )
Dave Young Jan. 22, 2021, 1:22 a.m. UTC | #11
Hi John,

On 01/21/21 at 09:32am, john.p.donnelly@oracle.com wrote:
> On 11/22/20 9:47 PM, Dave Young wrote:

> > Hi Guilherme,

> > On 11/22/20 at 12:32pm, Guilherme Piccoli wrote:

> > > Hi Dave and Kairui, thanks for your responses! OK, if that makes sense

> > > to you I'm fine with it. I'd just recommend to test recent kernels in

> > > multiple distros with the minimum "range" to see if 64M is enough for

> > > crashkernel, maybe we'd need to bump that.

> > 

> > Giving the different kernel configs and the different userspace

> > initramfs setup it is hard to get an uniform value for all distributions,

> > but we can have an interface/kconfig-option for them to provide a value like this patch

> > is doing. And it could be improved like Kairui said about some known

> > kernel added extra values later, probably some more improvements if

> > doable.

> > 

> > Thanks

> > Dave

> > 

> 

> Hi.

> 

> Are we going to move forward with implementing this for X86 and Arm ?

> 

> If other platform maintainers want to include this CONFIG option in their

> configuration settings they have a starting point.


I would expect this become arch independent.

Saeed, Kairui, would any of you like to update the patch?

> 

> Thank you,

> 

> John.

> 

> ( I am not currently on many of the included dist lists  in this email, so

> hopefully key contributors are included in this exchange )

> 


Thanks
Dave
Dave Young Jan. 22, 2021, 3:12 a.m. UTC | #12
On 01/22/21 at 09:22am, Dave Young wrote:
> Hi John,

> 

> On 01/21/21 at 09:32am, john.p.donnelly@oracle.com wrote:

> > On 11/22/20 9:47 PM, Dave Young wrote:

> > > Hi Guilherme,

> > > On 11/22/20 at 12:32pm, Guilherme Piccoli wrote:

> > > > Hi Dave and Kairui, thanks for your responses! OK, if that makes sense

> > > > to you I'm fine with it. I'd just recommend to test recent kernels in

> > > > multiple distros with the minimum "range" to see if 64M is enough for

> > > > crashkernel, maybe we'd need to bump that.

> > > 

> > > Giving the different kernel configs and the different userspace

> > > initramfs setup it is hard to get an uniform value for all distributions,

> > > but we can have an interface/kconfig-option for them to provide a value like this patch

> > > is doing. And it could be improved like Kairui said about some known

> > > kernel added extra values later, probably some more improvements if

> > > doable.

> > > 

> > > Thanks

> > > Dave

> > > 

> > 

> > Hi.

> > 

> > Are we going to move forward with implementing this for X86 and Arm ?

> > 

> > If other platform maintainers want to include this CONFIG option in their

> > configuration settings they have a starting point.

> 

> I would expect this become arch independent.


Clarify a bit, it can be a general config option under arch/Kconfig and
just put the code in general arch independent part.

> 

> Saeed, Kairui, would any of you like to update the patch?

> 

> > 

> > Thank you,

> > 

> > John.

> > 

> > ( I am not currently on many of the included dist lists  in this email, so

> > hopefully key contributors are included in this exchange )

> > 

> 

> Thanks

> Dave
Dave Young Jan. 23, 2021, 3:51 a.m. UTC | #13
Hi Saeed,
On 01/22/21 at 05:14pm, Saeed Mirzamohammadi wrote:
> Hi,

> 

> > On Jan 21, 2021, at 7:12 PM, Dave Young <dyoung@redhat.com> wrote:

> > 

> > On 01/22/21 at 09:22am, Dave Young wrote:

> >> Hi John,

> >> 

> >> On 01/21/21 at 09:32am, john.p.donnelly@oracle.com wrote:

> >>> On 11/22/20 9:47 PM, Dave Young wrote:

> >>>> Hi Guilherme,

> >>>> On 11/22/20 at 12:32pm, Guilherme Piccoli wrote:

> >>>>> Hi Dave and Kairui, thanks for your responses! OK, if that makes sense

> >>>>> to you I'm fine with it. I'd just recommend to test recent kernels in

> >>>>> multiple distros with the minimum "range" to see if 64M is enough for

> >>>>> crashkernel, maybe we'd need to bump that.

> >>>> 

> >>>> Giving the different kernel configs and the different userspace

> >>>> initramfs setup it is hard to get an uniform value for all distributions,

> >>>> but we can have an interface/kconfig-option for them to provide a value like this patch

> >>>> is doing. And it could be improved like Kairui said about some known

> >>>> kernel added extra values later, probably some more improvements if

> >>>> doable.

> >>>> 

> >>>> Thanks

> >>>> Dave

> >>>> 

> >>> 

> >>> Hi.

> >>> 

> >>> Are we going to move forward with implementing this for X86 and Arm ?

> >>> 

> >>> If other platform maintainers want to include this CONFIG option in their

> >>> configuration settings they have a starting point.

> >> 

> >> I would expect this become arch independent.

> > 

> > Clarify a bit, it can be a general config option under arch/Kconfig and

> > just put the code in general arch independent part.

> 

> Does this mean that we need to add the option to def_configs in all archs as well?

> 


I think we do not need to add defconfig, something like this will just work?

BTW, it should depend on CRASH_CORE instead of CRASH_DUMP, the logic of
parsing crashkernel is in kernel/crash_core.c

diff --git a/arch/Kconfig b/arch/Kconfig
index af14a567b493..fa6efeb52dc5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -14,6 +14,11 @@ menu "General architecture-dependent options"
 config CRASH_CORE
 	bool
 
+config CRASH_AUTO_STR
+	depends on CRASH_CORE
+	string "Memory reserved for crash kernel"
+	default "1G-:128M"
+	... help text [snip] ...
+
 config KEXEC_CORE
 	select CRASH_CORE
 	bool

[...]

> Thanks,

> Saeed

> 

> > 

> >> 

> >> Saeed, Kairui, would any of you like to update the patch?

> >> 

> >>> 

> >>> Thank you,

> >>> 

> >>> John.

> >>> 

> >>> ( I am not currently on many of the included dist lists  in this email, so

> >>> hopefully key contributors are included in this exchange )

> >>> 

> >> 

> >> Thanks

> >> Dave

> 


Thanks
Dave
Dave Young Jan. 23, 2021, 3:57 a.m. UTC | #14
On 01/23/21 at 11:51am, Dave Young wrote:
> Hi Saeed,

> On 01/22/21 at 05:14pm, Saeed Mirzamohammadi wrote:

> > Hi,

> > 

> > > On Jan 21, 2021, at 7:12 PM, Dave Young <dyoung@redhat.com> wrote:

> > > 

> > > On 01/22/21 at 09:22am, Dave Young wrote:

> > >> Hi John,

> > >> 

> > >> On 01/21/21 at 09:32am, john.p.donnelly@oracle.com wrote:

> > >>> On 11/22/20 9:47 PM, Dave Young wrote:

> > >>>> Hi Guilherme,

> > >>>> On 11/22/20 at 12:32pm, Guilherme Piccoli wrote:

> > >>>>> Hi Dave and Kairui, thanks for your responses! OK, if that makes sense

> > >>>>> to you I'm fine with it. I'd just recommend to test recent kernels in

> > >>>>> multiple distros with the minimum "range" to see if 64M is enough for

> > >>>>> crashkernel, maybe we'd need to bump that.

> > >>>> 

> > >>>> Giving the different kernel configs and the different userspace

> > >>>> initramfs setup it is hard to get an uniform value for all distributions,

> > >>>> but we can have an interface/kconfig-option for them to provide a value like this patch

> > >>>> is doing. And it could be improved like Kairui said about some known

> > >>>> kernel added extra values later, probably some more improvements if

> > >>>> doable.

> > >>>> 

> > >>>> Thanks

> > >>>> Dave

> > >>>> 

> > >>> 

> > >>> Hi.

> > >>> 

> > >>> Are we going to move forward with implementing this for X86 and Arm ?

> > >>> 

> > >>> If other platform maintainers want to include this CONFIG option in their

> > >>> configuration settings they have a starting point.

> > >> 

> > >> I would expect this become arch independent.

> > > 

> > > Clarify a bit, it can be a general config option under arch/Kconfig and

> > > just put the code in general arch independent part.

> > 

> > Does this mean that we need to add the option to def_configs in all archs as well?

> > 

> 

> I think we do not need to add defconfig, something like this will just work?

> 

> BTW, it should depend on CRASH_CORE instead of CRASH_DUMP, the logic of

> parsing crashkernel is in kernel/crash_core.c

> 

> diff --git a/arch/Kconfig b/arch/Kconfig

> index af14a567b493..fa6efeb52dc5 100644

> --- a/arch/Kconfig

> +++ b/arch/Kconfig

> @@ -14,6 +14,11 @@ menu "General architecture-dependent options"

>  config CRASH_CORE

>  	bool

>  

> +config CRASH_AUTO_STR

> +	depends on CRASH_CORE

> +	string "Memory reserved for crash kernel"

> +	default "1G-:128M"


People do not want to see the default value if they do not need kdump 
so it would be better to add another kconfig option as a switch which is
set default as off in bool state.

> +	... help text [snip] ...

> +

>  config KEXEC_CORE

>  	select CRASH_CORE

>  	bool

> 

> [...]

> 

> > Thanks,

> > Saeed

> > 

> > > 

> > >> 

> > >> Saeed, Kairui, would any of you like to update the patch?

> > >> 

> > >>> 

> > >>> Thank you,

> > >>> 

> > >>> John.

> > >>> 

> > >>> ( I am not currently on many of the included dist lists  in this email, so

> > >>> hopefully key contributors are included in this exchange )

> > >>> 

> > >> 

> > >> Thanks

> > >> Dave

> > 

> 

> Thanks

> Dave
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index 75a9dd98e76e..f95a2af64f59 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -285,7 +285,12 @@  This would mean:
     2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
     3) if the RAM size is larger than 2G, then reserve 128M
 
+Or you can use crashkernel=auto if you have enough memory. The threshold
+is 1G on x86_64 and arm64. If your system memory is less than the threshold,
+crashkernel=auto will not reserve memory. The size changes according to
+the system memory size like below:
 
+    x86_64/arm64: 1G-64G:128M,64G-1T:256M,1T-:512M
 
 Boot into System Kernel
 =======================
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1515f6f153a0..d359dcffa80e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1124,7 +1124,7 @@  comment "Support for PE file signature verification disabled"
 	depends on KEXEC_SIG
 	depends on !EFI || !SIGNED_PE_FILE_VERIFICATION
 
-config CRASH_DUMP
+menuconfig CRASH_DUMP
 	bool "Build kdump crash kernel"
 	help
 	  Generate crash dump after being started by kexec. This should
@@ -1135,6 +1135,30 @@  config CRASH_DUMP
 
 	  For more details see Documentation/admin-guide/kdump/kdump.rst
 
+if CRASH_DUMP
+
+config CRASH_AUTO_STR
+        string "Memory reserved for crash kernel"
+	depends on CRASH_DUMP
+        default "1G-64G:128M,64G-1T:256M,1T-:512M"
+	help
+	  This configures the reserved memory dependent
+	  on the value of System RAM. The syntax is:
+	  crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+	              range=start-[end]
+
+	  For example:
+	      crashkernel=512M-2G:64M,2G-:128M
+
+	  This would mean:
+
+	      1) if the RAM is smaller than 512M, then don't reserve anything
+	         (this is the "rescue" case)
+	      2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
+	      3) if the RAM size is larger than 2G, then reserve 128M
+
+endif # CRASH_DUMP
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 5cfe3cf6f2ac..899ef3b6a78f 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -69,6 +69,7 @@  CONFIG_SECCOMP=y
 CONFIG_KEXEC=y
 CONFIG_KEXEC_FILE=y
 CONFIG_CRASH_DUMP=y
+# CONFIG_CRASH_AUTO_STR is not set
 CONFIG_XEN=y
 CONFIG_COMPAT=y
 CONFIG_RANDOMIZE_BASE=y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f6946b81f74a..bacd17312bb1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2035,7 +2035,7 @@  config KEXEC_BZIMAGE_VERIFY_SIG
 	help
 	  Enable bzImage signature verification support.
 
-config CRASH_DUMP
+menuconfig CRASH_DUMP
 	bool "kernel crash dumps"
 	depends on X86_64 || (X86_32 && HIGHMEM)
 	help
@@ -2049,6 +2049,30 @@  config CRASH_DUMP
 	  (CONFIG_RELOCATABLE=y).
 	  For more details see Documentation/admin-guide/kdump/kdump.rst
 
+if CRASH_DUMP
+
+config CRASH_AUTO_STR
+        string "Memory reserved for crash kernel" if X86_64
+	depends on CRASH_DUMP
+        default "1G-64G:128M,64G-1T:256M,1T-:512M"
+	help
+	  This configures the reserved memory dependent
+	  on the value of System RAM. The syntax is:
+	  crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+	              range=start-[end]
+
+	  For example:
+	      crashkernel=512M-2G:64M,2G-:128M
+
+	  This would mean:
+
+	      1) if the RAM is smaller than 512M, then don't reserve anything
+	         (this is the "rescue" case)
+	      2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
+	      3) if the RAM size is larger than 2G, then reserve 128M
+
+endif # CRASH_DUMP
+
 config KEXEC_JUMP
 	bool "kexec jump"
 	depends on KEXEC && HIBERNATION
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9936528e1939..7a87fbecf40b 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -33,6 +33,7 @@  CONFIG_EFI_MIXED=y
 CONFIG_HZ_1000=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+# CONFIG_CRASH_AUTO_STR is not set
 CONFIG_HIBERNATION=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_TRACE_RTC=y
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 106e4500fd53..a44cd9cc12c4 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -7,6 +7,7 @@ 
 #include <linux/crash_core.h>
 #include <linux/utsname.h>
 #include <linux/vmalloc.h>
+#include <linux/kexec.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
@@ -41,6 +42,15 @@  static int __init parse_crashkernel_mem(char *cmdline,
 					unsigned long long *crash_base)
 {
 	char *cur = cmdline, *tmp;
+	unsigned long long total_mem = system_ram;
+
+	/*
+	 * Firmware sometimes reserves some memory regions for it's own use.
+	 * so we get less than actual system memory size.
+	 * Workaround this by round up the total size to 128M which is
+	 * enough for most test cases.
+	 */
+	total_mem = roundup(total_mem, SZ_128M);
 
 	/* for each entry of the comma-separated list */
 	do {
@@ -85,13 +95,13 @@  static int __init parse_crashkernel_mem(char *cmdline,
 			return -EINVAL;
 		}
 		cur = tmp;
-		if (size >= system_ram) {
+		if (size >= total_mem) {
 			pr_warn("crashkernel: invalid size\n");
 			return -EINVAL;
 		}
 
 		/* match ? */
-		if (system_ram >= start && system_ram < end) {
+		if (total_mem >= start && total_mem < end) {
 			*crash_size = size;
 			break;
 		}
@@ -250,6 +260,12 @@  static int __init __parse_crashkernel(char *cmdline,
 	if (suffix)
 		return parse_crashkernel_suffix(ck_cmdline, crash_size,
 				suffix);
+#ifdef CONFIG_CRASH_AUTO_STR
+	if (strncmp(ck_cmdline, "auto", 4) == 0) {
+		ck_cmdline = CONFIG_CRASH_AUTO_STR;
+		pr_info("Using crashkernel=auto, the size chosen is a best effort estimation.\n");
+	}
+#endif
 	/*
 	 * if the commandline contains a ':', then that's the extended
 	 * syntax -- if not, it must be the classic syntax