diff mbox series

[RFC,1/5] tty/sysrq: Make sysrq handler NMI aware

Message ID 1595333413-30052-2-git-send-email-sumit.garg@linaro.org
State New
Headers show
Series Introduce NMI aware serial drivers | expand

Commit Message

Sumit Garg July 21, 2020, 12:10 p.m. UTC
In a future patch we will add support to the serial core to make it
possible to trigger a magic sysrq from an NMI context. Prepare for this
by marking some sysrq actions as NMI safe. Safe actions will be allowed
to run from NMI context whilst that cannot run from an NMI will be queued
as irq_work for later processing.

A particular sysrq handler is only marked as NMI safe in case the handler
isn't contending for any synchronization primitives as in NMI context
they are expected to cause deadlocks. Note that the debug sysrq do not
contend for any synchronization primitives. It does call kgdb_breakpoint()
to provoke a trap but that trap handler should be NMI safe on
architectures that implement an NMI.

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

---
 drivers/tty/sysrq.c       | 33 ++++++++++++++++++++++++++++++++-
 include/linux/sysrq.h     |  1 +
 kernel/debug/debug_core.c |  1 +
 3 files changed, 34 insertions(+), 1 deletion(-)

-- 
2.7.4

Comments

Doug Anderson Aug. 12, 2020, 11:59 p.m. UTC | #1
Hi,

On Tue, Jul 21, 2020 at 5:10 AM Sumit Garg <sumit.garg@linaro.org> wrote:
>

> In a future patch we will add support to the serial core to make it

> possible to trigger a magic sysrq from an NMI context. Prepare for this

> by marking some sysrq actions as NMI safe. Safe actions will be allowed

> to run from NMI context whilst that cannot run from an NMI will be queued

> as irq_work for later processing.

>

> A particular sysrq handler is only marked as NMI safe in case the handler

> isn't contending for any synchronization primitives as in NMI context

> they are expected to cause deadlocks. Note that the debug sysrq do not

> contend for any synchronization primitives. It does call kgdb_breakpoint()

> to provoke a trap but that trap handler should be NMI safe on

> architectures that implement an NMI.

>

> Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> ---

>  drivers/tty/sysrq.c       | 33 ++++++++++++++++++++++++++++++++-

>  include/linux/sysrq.h     |  1 +

>  kernel/debug/debug_core.c |  1 +

>  3 files changed, 34 insertions(+), 1 deletion(-)

>

> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

> index 7c95afa9..8017e33 100644

> --- a/drivers/tty/sysrq.c

> +++ b/drivers/tty/sysrq.c

> @@ -50,6 +50,8 @@

>  #include <linux/syscalls.h>

>  #include <linux/of.h>

>  #include <linux/rcupdate.h>

> +#include <linux/irq_work.h>

> +#include <linux/kfifo.h>

>

>  #include <asm/ptrace.h>

>  #include <asm/irq_regs.h>

> @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = {

>         .help_msg       = "loglevel(0-9)",

>         .action_msg     = "Changing Loglevel",

>         .enable_mask    = SYSRQ_ENABLE_LOG,

> +       .nmi_safe       = true,

>  };

>

>  #ifdef CONFIG_VT

> @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = {

>         .help_msg       = "crash(c)",

>         .action_msg     = "Trigger a crash",

>         .enable_mask    = SYSRQ_ENABLE_DUMP,

> +       .nmi_safe       = true,

>  };

>

>  static void sysrq_handle_reboot(int key)

> @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = {

>         .help_msg       = "reboot(b)",

>         .action_msg     = "Resetting",

>         .enable_mask    = SYSRQ_ENABLE_BOOT,

> +       .nmi_safe       = true,

>  };

>

>  const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;

> @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = {

>         .handler        = sysrq_handle_showlocks,

>         .help_msg       = "show-all-locks(d)",

>         .action_msg     = "Show Locks Held",

> +       .nmi_safe       = true,

>  };

>  #else

>  #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)

> @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {

>         .help_msg       = "show-registers(p)",

>         .action_msg     = "Show Regs",

>         .enable_mask    = SYSRQ_ENABLE_DUMP,

> +       .nmi_safe       = true,

>  };

>

>  static void sysrq_handle_showstate(int key)

> @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = {

>         .help_msg       = "dump-ftrace-buffer(z)",

>         .action_msg     = "Dump ftrace buffer",

>         .enable_mask    = SYSRQ_ENABLE_DUMP,

> +       .nmi_safe       = true,

>  };

>  #else

>  #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)

> @@ -538,6 +546,23 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)

>                  sysrq_key_table[i] = op_p;

>  }

>

> +#define SYSRQ_NMI_FIFO_SIZE    64

> +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);


A 64-entry FIFO seems excessive. Quite honestly even a FIFO seems a
bit excessive and it feels like if two sysrqs were received in super
quick succession that it would be OK to just process the first one.  I
guess if it simplifies the processing to have a FIFO then it shouldn't
hurt, but no need for 64 entries.


> +static void sysrq_do_nmi_work(struct irq_work *work)

> +{

> +       const struct sysrq_key_op *op_p;

> +       int key;

> +

> +       while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {

> +               op_p = __sysrq_get_key_op(key);

> +               if (op_p)

> +                       op_p->handler(key);

> +       }


Do you need to manage "suppress_printk" in this function?  Do you need
to call rcu_sysrq_start() and rcu_read_lock()?

If so, how do you prevent racing between the mucking we're doing with
these things and the mucking that the NMI does with them?


> +}

> +

> +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);

> +

>  void __handle_sysrq(int key, bool check_mask)

>  {

>         const struct sysrq_key_op *op_p;

> @@ -568,7 +593,13 @@ void __handle_sysrq(int key, bool check_mask)

>                 if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {

>                         pr_info("%s\n", op_p->action_msg);

>                         console_loglevel = orig_log_level;

> -                       op_p->handler(key);

> +

> +                       if (in_nmi() && !op_p->nmi_safe) {

> +                               kfifo_in(&sysrq_nmi_fifo, &key, 1);


Rather than kfifo_in() and kfifo_out(), I think you can use
kfifo_put() and kfifo_get().  As I understand it those just get/put
one element which is what you want.


> +                               irq_work_queue(&sysrq_nmi_work);


Wishful thinking, but (as far as I can tell) irq_work_queue() only
queues work on the CPU running the NMI.  I don't have lots of NMI
experience, but any chance there is a variant that will queue work on
any CPU?  Then sysrq handlers that aren't NMI aware will be more
likely to work.




> +                       } else {

> +                               op_p->handler(key);

> +                       }

>                 } else {

>                         pr_info("This sysrq operation is disabled.\n");

>                         console_loglevel = orig_log_level;

> diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h

> index 3a582ec..630b5b9 100644

> --- a/include/linux/sysrq.h

> +++ b/include/linux/sysrq.h

> @@ -34,6 +34,7 @@ struct sysrq_key_op {

>         const char * const help_msg;

>         const char * const action_msg;

>         const int enable_mask;

> +       const bool nmi_safe;

>  };

>

>  #ifdef CONFIG_MAGIC_SYSRQ

> diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c

> index 9e59347..2b51173 100644

> --- a/kernel/debug/debug_core.c

> +++ b/kernel/debug/debug_core.c

> @@ -943,6 +943,7 @@ static const struct sysrq_key_op sysrq_dbg_op = {

>         .handler        = sysrq_handle_dbg,

>         .help_msg       = "debug(g)",

>         .action_msg     = "DEBUG",

> +       .nmi_safe       = true,

>  };

>  #endif

>

> --

> 2.7.4

>
Sumit Garg Aug. 14, 2020, 7:24 a.m. UTC | #2
+ Peter (author of irq_work.c)

On Thu, 13 Aug 2020 at 05:30, Doug Anderson <dianders@chromium.org> wrote:
>

> Hi,

>

> On Tue, Jul 21, 2020 at 5:10 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> >

> > In a future patch we will add support to the serial core to make it

> > possible to trigger a magic sysrq from an NMI context. Prepare for this

> > by marking some sysrq actions as NMI safe. Safe actions will be allowed

> > to run from NMI context whilst that cannot run from an NMI will be queued

> > as irq_work for later processing.

> >

> > A particular sysrq handler is only marked as NMI safe in case the handler

> > isn't contending for any synchronization primitives as in NMI context

> > they are expected to cause deadlocks. Note that the debug sysrq do not

> > contend for any synchronization primitives. It does call kgdb_breakpoint()

> > to provoke a trap but that trap handler should be NMI safe on

> > architectures that implement an NMI.

> >

> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > ---

> >  drivers/tty/sysrq.c       | 33 ++++++++++++++++++++++++++++++++-

> >  include/linux/sysrq.h     |  1 +

> >  kernel/debug/debug_core.c |  1 +

> >  3 files changed, 34 insertions(+), 1 deletion(-)

> >

> > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

> > index 7c95afa9..8017e33 100644

> > --- a/drivers/tty/sysrq.c

> > +++ b/drivers/tty/sysrq.c

> > @@ -50,6 +50,8 @@

> >  #include <linux/syscalls.h>

> >  #include <linux/of.h>

> >  #include <linux/rcupdate.h>

> > +#include <linux/irq_work.h>

> > +#include <linux/kfifo.h>

> >

> >  #include <asm/ptrace.h>

> >  #include <asm/irq_regs.h>

> > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = {

> >         .help_msg       = "loglevel(0-9)",

> >         .action_msg     = "Changing Loglevel",

> >         .enable_mask    = SYSRQ_ENABLE_LOG,

> > +       .nmi_safe       = true,

> >  };

> >

> >  #ifdef CONFIG_VT

> > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = {

> >         .help_msg       = "crash(c)",

> >         .action_msg     = "Trigger a crash",

> >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > +       .nmi_safe       = true,

> >  };

> >

> >  static void sysrq_handle_reboot(int key)

> > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = {

> >         .help_msg       = "reboot(b)",

> >         .action_msg     = "Resetting",

> >         .enable_mask    = SYSRQ_ENABLE_BOOT,

> > +       .nmi_safe       = true,

> >  };

> >

> >  const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;

> > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = {

> >         .handler        = sysrq_handle_showlocks,

> >         .help_msg       = "show-all-locks(d)",

> >         .action_msg     = "Show Locks Held",

> > +       .nmi_safe       = true,

> >  };

> >  #else

> >  #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)

> > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {

> >         .help_msg       = "show-registers(p)",

> >         .action_msg     = "Show Regs",

> >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > +       .nmi_safe       = true,

> >  };

> >

> >  static void sysrq_handle_showstate(int key)

> > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = {

> >         .help_msg       = "dump-ftrace-buffer(z)",

> >         .action_msg     = "Dump ftrace buffer",

> >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > +       .nmi_safe       = true,

> >  };

> >  #else

> >  #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)

> > @@ -538,6 +546,23 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)

> >                  sysrq_key_table[i] = op_p;

> >  }

> >

> > +#define SYSRQ_NMI_FIFO_SIZE    64

> > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

>

> A 64-entry FIFO seems excessive. Quite honestly even a FIFO seems a

> bit excessive and it feels like if two sysrqs were received in super

> quick succession that it would be OK to just process the first one.  I

> guess if it simplifies the processing to have a FIFO then it shouldn't

> hurt, but no need for 64 entries.

>


Okay, would a 2-entry FIFO work here? As here we need a FIFO to pass
on the key parameter.

>

> > +static void sysrq_do_nmi_work(struct irq_work *work)

> > +{

> > +       const struct sysrq_key_op *op_p;

> > +       int key;

> > +

> > +       while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {

> > +               op_p = __sysrq_get_key_op(key);

> > +               if (op_p)

> > +                       op_p->handler(key);

> > +       }

>

> Do you need to manage "suppress_printk" in this function?  Do you need

> to call rcu_sysrq_start() and rcu_read_lock()?


Ah I missed those. Will add them here instead.

>

> If so, how do you prevent racing between the mucking we're doing with

> these things and the mucking that the NMI does with them?


IIUC, here you meant to highlight the race while scheduled sysrq is
executing in IRQ context and we receive a new sysrq in NMI context,
correct? If yes, this seems to be a trickier situation. I think the
appropriate way to handle it would be to deny any further sysrq
handling until the prior sysrq handling is complete, your views?

>

>

> > +}

> > +

> > +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);

> > +

> >  void __handle_sysrq(int key, bool check_mask)

> >  {

> >         const struct sysrq_key_op *op_p;

> > @@ -568,7 +593,13 @@ void __handle_sysrq(int key, bool check_mask)

> >                 if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {

> >                         pr_info("%s\n", op_p->action_msg);

> >                         console_loglevel = orig_log_level;

> > -                       op_p->handler(key);

> > +

> > +                       if (in_nmi() && !op_p->nmi_safe) {

> > +                               kfifo_in(&sysrq_nmi_fifo, &key, 1);

>

> Rather than kfifo_in() and kfifo_out(), I think you can use

> kfifo_put() and kfifo_get().  As I understand it those just get/put

> one element which is what you want.


Okay, will use kfifo_put() and kfifo_get() here instead.

>

>

> > +                               irq_work_queue(&sysrq_nmi_work);

>

> Wishful thinking, but (as far as I can tell) irq_work_queue() only

> queues work on the CPU running the NMI.  I don't have lots of NMI

> experience, but any chance there is a variant that will queue work on

> any CPU?  Then sysrq handlers that aren't NMI aware will be more

> likely to work.

>


Unfortunately, queuing work on other CPUs isn't safe in NMI context,
see this warning [1]. The comment mentions:

/* Arch remote IPI send/receive backend aren't NMI safe */

Peter,

Can you throw some light here as to why it isn't considered NMI-safe
to send remote IPI in NMI context? Is it an arch specific limitation?

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/irq_work.c#n103

-Sumit

>

>

>

> > +                       } else {

> > +                               op_p->handler(key);

> > +                       }

> >                 } else {

> >                         pr_info("This sysrq operation is disabled.\n");

> >                         console_loglevel = orig_log_level;

> > diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h

> > index 3a582ec..630b5b9 100644

> > --- a/include/linux/sysrq.h

> > +++ b/include/linux/sysrq.h

> > @@ -34,6 +34,7 @@ struct sysrq_key_op {

> >         const char * const help_msg;

> >         const char * const action_msg;

> >         const int enable_mask;

> > +       const bool nmi_safe;

> >  };

> >

> >  #ifdef CONFIG_MAGIC_SYSRQ

> > diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c

> > index 9e59347..2b51173 100644

> > --- a/kernel/debug/debug_core.c

> > +++ b/kernel/debug/debug_core.c

> > @@ -943,6 +943,7 @@ static const struct sysrq_key_op sysrq_dbg_op = {

> >         .handler        = sysrq_handle_dbg,

> >         .help_msg       = "debug(g)",

> >         .action_msg     = "DEBUG",

> > +       .nmi_safe       = true,

> >  };

> >  #endif

> >

> > --

> > 2.7.4

> >
Peter Zijlstra Aug. 14, 2020, 2:34 p.m. UTC | #3
On Fri, Aug 14, 2020 at 12:54:35PM +0530, Sumit Garg wrote:
> On Thu, 13 Aug 2020 at 05:30, Doug Anderson <dianders@chromium.org> wrote:

> > On Tue, Jul 21, 2020 at 5:10 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > Wishful thinking, but (as far as I can tell) irq_work_queue() only

> > queues work on the CPU running the NMI.  I don't have lots of NMI

> > experience, but any chance there is a variant that will queue work on

> > any CPU?  Then sysrq handlers that aren't NMI aware will be more

> > likely to work.

> >

> 

> Unfortunately, queuing work on other CPUs isn't safe in NMI context,

> see this warning [1]. The comment mentions:

> 

> /* Arch remote IPI send/receive backend aren't NMI safe */

> 

> Peter,

> 

> Can you throw some light here as to why it isn't considered NMI-safe

> to send remote IPI in NMI context? Is it an arch specific limitation?


Yeah, remote irq_work uses __smp_call_single_queue() /
send_call_function_single_ipi() which isn't safe to be used from NMI
context in general.

arch_irq_work_raise() is very carefully implemented on a number of
platforms to be able to (self) IPI from NMI context.
Doug Anderson Aug. 14, 2020, 2:57 p.m. UTC | #4
Hi,

On Fri, Aug 14, 2020 at 12:24 AM Sumit Garg <sumit.garg@linaro.org> wrote:
>

> + Peter (author of irq_work.c)

>

> On Thu, 13 Aug 2020 at 05:30, Doug Anderson <dianders@chromium.org> wrote:

> >

> > Hi,

> >

> > On Tue, Jul 21, 2020 at 5:10 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > >

> > > In a future patch we will add support to the serial core to make it

> > > possible to trigger a magic sysrq from an NMI context. Prepare for this

> > > by marking some sysrq actions as NMI safe. Safe actions will be allowed

> > > to run from NMI context whilst that cannot run from an NMI will be queued

> > > as irq_work for later processing.

> > >

> > > A particular sysrq handler is only marked as NMI safe in case the handler

> > > isn't contending for any synchronization primitives as in NMI context

> > > they are expected to cause deadlocks. Note that the debug sysrq do not

> > > contend for any synchronization primitives. It does call kgdb_breakpoint()

> > > to provoke a trap but that trap handler should be NMI safe on

> > > architectures that implement an NMI.

> > >

> > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > ---

> > >  drivers/tty/sysrq.c       | 33 ++++++++++++++++++++++++++++++++-

> > >  include/linux/sysrq.h     |  1 +

> > >  kernel/debug/debug_core.c |  1 +

> > >  3 files changed, 34 insertions(+), 1 deletion(-)

> > >

> > > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

> > > index 7c95afa9..8017e33 100644

> > > --- a/drivers/tty/sysrq.c

> > > +++ b/drivers/tty/sysrq.c

> > > @@ -50,6 +50,8 @@

> > >  #include <linux/syscalls.h>

> > >  #include <linux/of.h>

> > >  #include <linux/rcupdate.h>

> > > +#include <linux/irq_work.h>

> > > +#include <linux/kfifo.h>

> > >

> > >  #include <asm/ptrace.h>

> > >  #include <asm/irq_regs.h>

> > > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = {

> > >         .help_msg       = "loglevel(0-9)",

> > >         .action_msg     = "Changing Loglevel",

> > >         .enable_mask    = SYSRQ_ENABLE_LOG,

> > > +       .nmi_safe       = true,

> > >  };

> > >

> > >  #ifdef CONFIG_VT

> > > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = {

> > >         .help_msg       = "crash(c)",

> > >         .action_msg     = "Trigger a crash",

> > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > +       .nmi_safe       = true,

> > >  };

> > >

> > >  static void sysrq_handle_reboot(int key)

> > > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = {

> > >         .help_msg       = "reboot(b)",

> > >         .action_msg     = "Resetting",

> > >         .enable_mask    = SYSRQ_ENABLE_BOOT,

> > > +       .nmi_safe       = true,

> > >  };

> > >

> > >  const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;

> > > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = {

> > >         .handler        = sysrq_handle_showlocks,

> > >         .help_msg       = "show-all-locks(d)",

> > >         .action_msg     = "Show Locks Held",

> > > +       .nmi_safe       = true,

> > >  };

> > >  #else

> > >  #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)

> > > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {

> > >         .help_msg       = "show-registers(p)",

> > >         .action_msg     = "Show Regs",

> > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > +       .nmi_safe       = true,

> > >  };

> > >

> > >  static void sysrq_handle_showstate(int key)

> > > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = {

> > >         .help_msg       = "dump-ftrace-buffer(z)",

> > >         .action_msg     = "Dump ftrace buffer",

> > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > +       .nmi_safe       = true,

> > >  };

> > >  #else

> > >  #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)

> > > @@ -538,6 +546,23 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)

> > >                  sysrq_key_table[i] = op_p;

> > >  }

> > >

> > > +#define SYSRQ_NMI_FIFO_SIZE    64

> > > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

> >

> > A 64-entry FIFO seems excessive. Quite honestly even a FIFO seems a

> > bit excessive and it feels like if two sysrqs were received in super

> > quick succession that it would be OK to just process the first one.  I

> > guess if it simplifies the processing to have a FIFO then it shouldn't

> > hurt, but no need for 64 entries.

> >

>

> Okay, would a 2-entry FIFO work here? As here we need a FIFO to pass

> on the key parameter.


...or even a 1-entry FIFO if that makes sense?


> > > +static void sysrq_do_nmi_work(struct irq_work *work)

> > > +{

> > > +       const struct sysrq_key_op *op_p;

> > > +       int key;

> > > +

> > > +       while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {

> > > +               op_p = __sysrq_get_key_op(key);

> > > +               if (op_p)

> > > +                       op_p->handler(key);

> > > +       }

> >

> > Do you need to manage "suppress_printk" in this function?  Do you need

> > to call rcu_sysrq_start() and rcu_read_lock()?

>

> Ah I missed those. Will add them here instead.

>

> >

> > If so, how do you prevent racing between the mucking we're doing with

> > these things and the mucking that the NMI does with them?

>

> IIUC, here you meant to highlight the race while scheduled sysrq is

> executing in IRQ context and we receive a new sysrq in NMI context,

> correct? If yes, this seems to be a trickier situation. I think the

> appropriate way to handle it would be to deny any further sysrq

> handling until the prior sysrq handling is complete, your views?


The problem is that in some cases you're running NMIs directly at FIQ
time and other cases you're running them at IRQ time.  So you
definitely can't just move it to NMI.

Skipping looking for other SYSRQs until the old one is complete sounds
good to me.  Again my ignorance will make me sound like a fool,
probably, but can you use the kfifo as a form of mutual exclusion?  If
you have a 1-entry kfifo, maybe:

1. First try to add to the "FIFO".  If it fails (out of space) then a
sysrq is in progress.  Ignore this one.
2. Decide if you're NMI-safe or not.
3. If NMI safe, modify "suppress_printk", call rcu functions, then
call the handler.  Restore suppress_printk and then dequeue from FIFO.
4. If not-NMI safe, the irq worker would "peek" into the FIFO, do its
work (wrapped with "suppress_printk" and the like), and not dequeue
until it's done.

In the above you'd use the FIFO as a locking mechanism.  I don't know
if that's a valid use of it or if there is a better NMI-safe mechanism
for this.  I think the kfifo docs talk about only one reader and one
writer and here we have two readers, so maybe it's illegal.  It also
seems weird to have a 1-entry "FIFO" and feels like there's probably a
better data structure for this.
Sumit Garg Aug. 17, 2020, 2:08 p.m. UTC | #5
On Fri, 14 Aug 2020 at 20:27, Doug Anderson <dianders@chromium.org> wrote:
>

> Hi,

>

> On Fri, Aug 14, 2020 at 12:24 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> >

> > + Peter (author of irq_work.c)

> >

> > On Thu, 13 Aug 2020 at 05:30, Doug Anderson <dianders@chromium.org> wrote:

> > >

> > > Hi,

> > >

> > > On Tue, Jul 21, 2020 at 5:10 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > >

> > > > In a future patch we will add support to the serial core to make it

> > > > possible to trigger a magic sysrq from an NMI context. Prepare for this

> > > > by marking some sysrq actions as NMI safe. Safe actions will be allowed

> > > > to run from NMI context whilst that cannot run from an NMI will be queued

> > > > as irq_work for later processing.

> > > >

> > > > A particular sysrq handler is only marked as NMI safe in case the handler

> > > > isn't contending for any synchronization primitives as in NMI context

> > > > they are expected to cause deadlocks. Note that the debug sysrq do not

> > > > contend for any synchronization primitives. It does call kgdb_breakpoint()

> > > > to provoke a trap but that trap handler should be NMI safe on

> > > > architectures that implement an NMI.

> > > >

> > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > > ---

> > > >  drivers/tty/sysrq.c       | 33 ++++++++++++++++++++++++++++++++-

> > > >  include/linux/sysrq.h     |  1 +

> > > >  kernel/debug/debug_core.c |  1 +

> > > >  3 files changed, 34 insertions(+), 1 deletion(-)

> > > >

> > > > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

> > > > index 7c95afa9..8017e33 100644

> > > > --- a/drivers/tty/sysrq.c

> > > > +++ b/drivers/tty/sysrq.c

> > > > @@ -50,6 +50,8 @@

> > > >  #include <linux/syscalls.h>

> > > >  #include <linux/of.h>

> > > >  #include <linux/rcupdate.h>

> > > > +#include <linux/irq_work.h>

> > > > +#include <linux/kfifo.h>

> > > >

> > > >  #include <asm/ptrace.h>

> > > >  #include <asm/irq_regs.h>

> > > > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = {

> > > >         .help_msg       = "loglevel(0-9)",

> > > >         .action_msg     = "Changing Loglevel",

> > > >         .enable_mask    = SYSRQ_ENABLE_LOG,

> > > > +       .nmi_safe       = true,

> > > >  };

> > > >

> > > >  #ifdef CONFIG_VT

> > > > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = {

> > > >         .help_msg       = "crash(c)",

> > > >         .action_msg     = "Trigger a crash",

> > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > +       .nmi_safe       = true,

> > > >  };

> > > >

> > > >  static void sysrq_handle_reboot(int key)

> > > > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = {

> > > >         .help_msg       = "reboot(b)",

> > > >         .action_msg     = "Resetting",

> > > >         .enable_mask    = SYSRQ_ENABLE_BOOT,

> > > > +       .nmi_safe       = true,

> > > >  };

> > > >

> > > >  const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;

> > > > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = {

> > > >         .handler        = sysrq_handle_showlocks,

> > > >         .help_msg       = "show-all-locks(d)",

> > > >         .action_msg     = "Show Locks Held",

> > > > +       .nmi_safe       = true,

> > > >  };

> > > >  #else

> > > >  #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)

> > > > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {

> > > >         .help_msg       = "show-registers(p)",

> > > >         .action_msg     = "Show Regs",

> > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > +       .nmi_safe       = true,

> > > >  };

> > > >

> > > >  static void sysrq_handle_showstate(int key)

> > > > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = {

> > > >         .help_msg       = "dump-ftrace-buffer(z)",

> > > >         .action_msg     = "Dump ftrace buffer",

> > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > +       .nmi_safe       = true,

> > > >  };

> > > >  #else

> > > >  #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)

> > > > @@ -538,6 +546,23 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)

> > > >                  sysrq_key_table[i] = op_p;

> > > >  }

> > > >

> > > > +#define SYSRQ_NMI_FIFO_SIZE    64

> > > > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

> > >

> > > A 64-entry FIFO seems excessive. Quite honestly even a FIFO seems a

> > > bit excessive and it feels like if two sysrqs were received in super

> > > quick succession that it would be OK to just process the first one.  I

> > > guess if it simplifies the processing to have a FIFO then it shouldn't

> > > hurt, but no need for 64 entries.

> > >

> >

> > Okay, would a 2-entry FIFO work here? As here we need a FIFO to pass

> > on the key parameter.

>

> ...or even a 1-entry FIFO if that makes sense?

>


Yes it would make sense but unfortunately not supported by kfifo
(size: power of 2).

>

> > > > +static void sysrq_do_nmi_work(struct irq_work *work)

> > > > +{

> > > > +       const struct sysrq_key_op *op_p;

> > > > +       int key;

> > > > +

> > > > +       while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {

> > > > +               op_p = __sysrq_get_key_op(key);

> > > > +               if (op_p)

> > > > +                       op_p->handler(key);

> > > > +       }

> > >

> > > Do you need to manage "suppress_printk" in this function?  Do you need

> > > to call rcu_sysrq_start() and rcu_read_lock()?

> >

> > Ah I missed those. Will add them here instead.

> >

> > >

> > > If so, how do you prevent racing between the mucking we're doing with

> > > these things and the mucking that the NMI does with them?

> >

> > IIUC, here you meant to highlight the race while scheduled sysrq is

> > executing in IRQ context and we receive a new sysrq in NMI context,

> > correct? If yes, this seems to be a trickier situation. I think the

> > appropriate way to handle it would be to deny any further sysrq

> > handling until the prior sysrq handling is complete, your views?

>

> The problem is that in some cases you're running NMIs directly at FIQ

> time and other cases you're running them at IRQ time.  So you

> definitely can't just move it to NMI.

>

> Skipping looking for other SYSRQs until the old one is complete sounds

> good to me.  Again my ignorance will make me sound like a fool,

> probably, but can you use the kfifo as a form of mutual exclusion?  If

> you have a 1-entry kfifo, maybe:

>

> 1. First try to add to the "FIFO".  If it fails (out of space) then a

> sysrq is in progress.  Ignore this one.

> 2. Decide if you're NMI-safe or not.

> 3. If NMI safe, modify "suppress_printk", call rcu functions, then

> call the handler.  Restore suppress_printk and then dequeue from FIFO.

> 4. If not-NMI safe, the irq worker would "peek" into the FIFO, do its

> work (wrapped with "suppress_printk" and the like), and not dequeue

> until it's done.

>

> In the above you'd use the FIFO as a locking mechanism.  I don't know

> if that's a valid use of it or if there is a better NMI-safe mechanism

> for this.  I think the kfifo docs talk about only one reader and one

> writer and here we have two readers, so maybe it's illegal.  It also

> seems weird to have a 1-entry "FIFO" and feels like there's probably a

> better data structure for this.


Thanks for your suggestions. Have a look at below implementation, I
have used 2-entry fifo but only single entry used for locking
mechanism:

@@ -538,6 +546,39 @@ static void __sysrq_put_key_op(int key, const
struct sysrq_key_op *op_p)
                 sysrq_key_table[i] = op_p;
 }

+#define SYSRQ_NMI_FIFO_SIZE    2
+static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);
+
+static void sysrq_do_nmi_work(struct irq_work *work)
+{
+       const struct sysrq_key_op *op_p;
+       int orig_suppress_printk;
+       int key;
+
+       orig_suppress_printk = suppress_printk;
+       suppress_printk = 0;
+
+       rcu_sysrq_start();
+       rcu_read_lock();
+
+       if (kfifo_peek(&sysrq_nmi_fifo, &key)) {
+               op_p = __sysrq_get_key_op(key);
+               if (op_p)
+                       op_p->handler(key);
+       }
+
+       rcu_read_unlock();
+       rcu_sysrq_end();
+
+       suppress_printk = orig_suppress_printk;
+
+       /* Pop contents from fifo if any */
+       while (kfifo_get(&sysrq_nmi_fifo, &key))
+               ;
+}
+
+static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);
+
 void __handle_sysrq(int key, bool check_mask)
 {
        const struct sysrq_key_op *op_p;
+}
+
+static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);
+
 void __handle_sysrq(int key, bool check_mask)
 {
        const struct sysrq_key_op *op_p;
@@ -545,6 +586,10 @@ void __handle_sysrq(int key, bool check_mask)
        int orig_suppress_printk;
        int i;

+       /* Skip sysrq handling if one already in progress */
+       if (!kfifo_is_empty(&sysrq_nmi_fifo))
+               return;
+
        orig_suppress_printk = suppress_printk;
        suppress_printk = 0;

@@ -568,7 +613,13 @@ void __handle_sysrq(int key, bool check_mask)
                if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
                        pr_info("%s\n", op_p->action_msg);
                        console_loglevel = orig_log_level;
-                       op_p->handler(key);
+
+                       if (in_nmi() && !op_p->nmi_safe) {
+                               kfifo_put(&sysrq_nmi_fifo, key);
+                               irq_work_queue(&sysrq_nmi_work);
+                       } else {
+                               op_p->handler(key);
+                       }
                } else {
                        pr_info("This sysrq operation is disabled.\n");
                        console_loglevel = orig_log_level;

-Sumit
Doug Anderson Aug. 17, 2020, 5:19 p.m. UTC | #6
Hi,

On Mon, Aug 17, 2020 at 7:08 AM Sumit Garg <sumit.garg@linaro.org> wrote:
>

> On Fri, 14 Aug 2020 at 20:27, Doug Anderson <dianders@chromium.org> wrote:

> >

> > Hi,

> >

> > On Fri, Aug 14, 2020 at 12:24 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > >

> > > + Peter (author of irq_work.c)

> > >

> > > On Thu, 13 Aug 2020 at 05:30, Doug Anderson <dianders@chromium.org> wrote:

> > > >

> > > > Hi,

> > > >

> > > > On Tue, Jul 21, 2020 at 5:10 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > > >

> > > > > In a future patch we will add support to the serial core to make it

> > > > > possible to trigger a magic sysrq from an NMI context. Prepare for this

> > > > > by marking some sysrq actions as NMI safe. Safe actions will be allowed

> > > > > to run from NMI context whilst that cannot run from an NMI will be queued

> > > > > as irq_work for later processing.

> > > > >

> > > > > A particular sysrq handler is only marked as NMI safe in case the handler

> > > > > isn't contending for any synchronization primitives as in NMI context

> > > > > they are expected to cause deadlocks. Note that the debug sysrq do not

> > > > > contend for any synchronization primitives. It does call kgdb_breakpoint()

> > > > > to provoke a trap but that trap handler should be NMI safe on

> > > > > architectures that implement an NMI.

> > > > >

> > > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > > > ---

> > > > >  drivers/tty/sysrq.c       | 33 ++++++++++++++++++++++++++++++++-

> > > > >  include/linux/sysrq.h     |  1 +

> > > > >  kernel/debug/debug_core.c |  1 +

> > > > >  3 files changed, 34 insertions(+), 1 deletion(-)

> > > > >

> > > > > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

> > > > > index 7c95afa9..8017e33 100644

> > > > > --- a/drivers/tty/sysrq.c

> > > > > +++ b/drivers/tty/sysrq.c

> > > > > @@ -50,6 +50,8 @@

> > > > >  #include <linux/syscalls.h>

> > > > >  #include <linux/of.h>

> > > > >  #include <linux/rcupdate.h>

> > > > > +#include <linux/irq_work.h>

> > > > > +#include <linux/kfifo.h>

> > > > >

> > > > >  #include <asm/ptrace.h>

> > > > >  #include <asm/irq_regs.h>

> > > > > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = {

> > > > >         .help_msg       = "loglevel(0-9)",

> > > > >         .action_msg     = "Changing Loglevel",

> > > > >         .enable_mask    = SYSRQ_ENABLE_LOG,

> > > > > +       .nmi_safe       = true,

> > > > >  };

> > > > >

> > > > >  #ifdef CONFIG_VT

> > > > > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = {

> > > > >         .help_msg       = "crash(c)",

> > > > >         .action_msg     = "Trigger a crash",

> > > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > > +       .nmi_safe       = true,

> > > > >  };

> > > > >

> > > > >  static void sysrq_handle_reboot(int key)

> > > > > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = {

> > > > >         .help_msg       = "reboot(b)",

> > > > >         .action_msg     = "Resetting",

> > > > >         .enable_mask    = SYSRQ_ENABLE_BOOT,

> > > > > +       .nmi_safe       = true,

> > > > >  };

> > > > >

> > > > >  const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;

> > > > > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = {

> > > > >         .handler        = sysrq_handle_showlocks,

> > > > >         .help_msg       = "show-all-locks(d)",

> > > > >         .action_msg     = "Show Locks Held",

> > > > > +       .nmi_safe       = true,

> > > > >  };

> > > > >  #else

> > > > >  #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)

> > > > > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {

> > > > >         .help_msg       = "show-registers(p)",

> > > > >         .action_msg     = "Show Regs",

> > > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > > +       .nmi_safe       = true,

> > > > >  };

> > > > >

> > > > >  static void sysrq_handle_showstate(int key)

> > > > > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = {

> > > > >         .help_msg       = "dump-ftrace-buffer(z)",

> > > > >         .action_msg     = "Dump ftrace buffer",

> > > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > > +       .nmi_safe       = true,

> > > > >  };

> > > > >  #else

> > > > >  #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)

> > > > > @@ -538,6 +546,23 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)

> > > > >                  sysrq_key_table[i] = op_p;

> > > > >  }

> > > > >

> > > > > +#define SYSRQ_NMI_FIFO_SIZE    64

> > > > > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

> > > >

> > > > A 64-entry FIFO seems excessive. Quite honestly even a FIFO seems a

> > > > bit excessive and it feels like if two sysrqs were received in super

> > > > quick succession that it would be OK to just process the first one.  I

> > > > guess if it simplifies the processing to have a FIFO then it shouldn't

> > > > hurt, but no need for 64 entries.

> > > >

> > >

> > > Okay, would a 2-entry FIFO work here? As here we need a FIFO to pass

> > > on the key parameter.

> >

> > ...or even a 1-entry FIFO if that makes sense?

> >

>

> Yes it would make sense but unfortunately not supported by kfifo

> (size: power of 2).


Typically 1 is considered to be a power of 2 since 2^0 = 1.

...ah, but it appears that size < 2 is not allowed.  Oh well.


> > > > > +static void sysrq_do_nmi_work(struct irq_work *work)

> > > > > +{

> > > > > +       const struct sysrq_key_op *op_p;

> > > > > +       int key;

> > > > > +

> > > > > +       while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {

> > > > > +               op_p = __sysrq_get_key_op(key);

> > > > > +               if (op_p)

> > > > > +                       op_p->handler(key);

> > > > > +       }

> > > >

> > > > Do you need to manage "suppress_printk" in this function?  Do you need

> > > > to call rcu_sysrq_start() and rcu_read_lock()?

> > >

> > > Ah I missed those. Will add them here instead.

> > >

> > > >

> > > > If so, how do you prevent racing between the mucking we're doing with

> > > > these things and the mucking that the NMI does with them?

> > >

> > > IIUC, here you meant to highlight the race while scheduled sysrq is

> > > executing in IRQ context and we receive a new sysrq in NMI context,

> > > correct? If yes, this seems to be a trickier situation. I think the

> > > appropriate way to handle it would be to deny any further sysrq

> > > handling until the prior sysrq handling is complete, your views?

> >

> > The problem is that in some cases you're running NMIs directly at FIQ

> > time and other cases you're running them at IRQ time.  So you

> > definitely can't just move it to NMI.

> >

> > Skipping looking for other SYSRQs until the old one is complete sounds

> > good to me.  Again my ignorance will make me sound like a fool,

> > probably, but can you use the kfifo as a form of mutual exclusion?  If

> > you have a 1-entry kfifo, maybe:

> >

> > 1. First try to add to the "FIFO".  If it fails (out of space) then a

> > sysrq is in progress.  Ignore this one.

> > 2. Decide if you're NMI-safe or not.

> > 3. If NMI safe, modify "suppress_printk", call rcu functions, then

> > call the handler.  Restore suppress_printk and then dequeue from FIFO.

> > 4. If not-NMI safe, the irq worker would "peek" into the FIFO, do its

> > work (wrapped with "suppress_printk" and the like), and not dequeue

> > until it's done.

> >

> > In the above you'd use the FIFO as a locking mechanism.  I don't know

> > if that's a valid use of it or if there is a better NMI-safe mechanism

> > for this.  I think the kfifo docs talk about only one reader and one

> > writer and here we have two readers, so maybe it's illegal.  It also

> > seems weird to have a 1-entry "FIFO" and feels like there's probably a

> > better data structure for this.

>

> Thanks for your suggestions. Have a look at below implementation, I

> have used 2-entry fifo but only single entry used for locking

> mechanism:

>

> @@ -538,6 +546,39 @@ static void __sysrq_put_key_op(int key, const

> struct sysrq_key_op *op_p)

>                  sysrq_key_table[i] = op_p;

>  }

>

> +#define SYSRQ_NMI_FIFO_SIZE    2

> +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

> +

> +static void sysrq_do_nmi_work(struct irq_work *work)

> +{

> +       const struct sysrq_key_op *op_p;

> +       int orig_suppress_printk;

> +       int key;

> +

> +       orig_suppress_printk = suppress_printk;

> +       suppress_printk = 0;

> +

> +       rcu_sysrq_start();

> +       rcu_read_lock();

> +

> +       if (kfifo_peek(&sysrq_nmi_fifo, &key)) {

> +               op_p = __sysrq_get_key_op(key);

> +               if (op_p)

> +                       op_p->handler(key);

> +       }

> +

> +       rcu_read_unlock();

> +       rcu_sysrq_end();

> +

> +       suppress_printk = orig_suppress_printk;

> +

> +       /* Pop contents from fifo if any */

> +       while (kfifo_get(&sysrq_nmi_fifo, &key))

> +               ;


I think you can use kfifo_reset_out().


> +}

> +

> +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);

> +

>  void __handle_sysrq(int key, bool check_mask)

>  {

>         const struct sysrq_key_op *op_p;

> +}

> +

> +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);

> +

>  void __handle_sysrq(int key, bool check_mask)

>  {

>         const struct sysrq_key_op *op_p;

> @@ -545,6 +586,10 @@ void __handle_sysrq(int key, bool check_mask)

>         int orig_suppress_printk;

>         int i;

>

> +       /* Skip sysrq handling if one already in progress */

> +       if (!kfifo_is_empty(&sysrq_nmi_fifo))

> +               return;


This _seems_ OK to me since I'd imagine kfifo_is_empty() is as safe
for the writer to do as kfifo_is_full() is and kfifo_is_full() is part
of kfifo_put().

I guess there's no better synchronism mechanism that we can use?


> +

>         orig_suppress_printk = suppress_printk;

>         suppress_printk = 0;

>

> @@ -568,7 +613,13 @@ void __handle_sysrq(int key, bool check_mask)

>                 if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {

>                         pr_info("%s\n", op_p->action_msg);

>                         console_loglevel = orig_log_level;

> -                       op_p->handler(key);

> +

> +                       if (in_nmi() && !op_p->nmi_safe) {

> +                               kfifo_put(&sysrq_nmi_fifo, key);

> +                               irq_work_queue(&sysrq_nmi_work);

> +                       } else {

> +                               op_p->handler(key);

> +                       }

>                 } else {

>                         pr_info("This sysrq operation is disabled.\n");

>                         console_loglevel = orig_log_level;

>

> -Sumit
Sumit Garg Aug. 18, 2020, 1:30 p.m. UTC | #7
On Mon, 17 Aug 2020 at 22:49, Doug Anderson <dianders@chromium.org> wrote:
>

> Hi,

>

> On Mon, Aug 17, 2020 at 7:08 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> >

> > On Fri, 14 Aug 2020 at 20:27, Doug Anderson <dianders@chromium.org> wrote:

> > >

> > > Hi,

> > >

> > > On Fri, Aug 14, 2020 at 12:24 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > >

> > > > + Peter (author of irq_work.c)

> > > >

> > > > On Thu, 13 Aug 2020 at 05:30, Doug Anderson <dianders@chromium.org> wrote:

> > > > >

> > > > > Hi,

> > > > >

> > > > > On Tue, Jul 21, 2020 at 5:10 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > > > >

> > > > > > In a future patch we will add support to the serial core to make it

> > > > > > possible to trigger a magic sysrq from an NMI context. Prepare for this

> > > > > > by marking some sysrq actions as NMI safe. Safe actions will be allowed

> > > > > > to run from NMI context whilst that cannot run from an NMI will be queued

> > > > > > as irq_work for later processing.

> > > > > >

> > > > > > A particular sysrq handler is only marked as NMI safe in case the handler

> > > > > > isn't contending for any synchronization primitives as in NMI context

> > > > > > they are expected to cause deadlocks. Note that the debug sysrq do not

> > > > > > contend for any synchronization primitives. It does call kgdb_breakpoint()

> > > > > > to provoke a trap but that trap handler should be NMI safe on

> > > > > > architectures that implement an NMI.

> > > > > >

> > > > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > > > > ---

> > > > > >  drivers/tty/sysrq.c       | 33 ++++++++++++++++++++++++++++++++-

> > > > > >  include/linux/sysrq.h     |  1 +

> > > > > >  kernel/debug/debug_core.c |  1 +

> > > > > >  3 files changed, 34 insertions(+), 1 deletion(-)

> > > > > >

> > > > > > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

> > > > > > index 7c95afa9..8017e33 100644

> > > > > > --- a/drivers/tty/sysrq.c

> > > > > > +++ b/drivers/tty/sysrq.c

> > > > > > @@ -50,6 +50,8 @@

> > > > > >  #include <linux/syscalls.h>

> > > > > >  #include <linux/of.h>

> > > > > >  #include <linux/rcupdate.h>

> > > > > > +#include <linux/irq_work.h>

> > > > > > +#include <linux/kfifo.h>

> > > > > >

> > > > > >  #include <asm/ptrace.h>

> > > > > >  #include <asm/irq_regs.h>

> > > > > > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = {

> > > > > >         .help_msg       = "loglevel(0-9)",

> > > > > >         .action_msg     = "Changing Loglevel",

> > > > > >         .enable_mask    = SYSRQ_ENABLE_LOG,

> > > > > > +       .nmi_safe       = true,

> > > > > >  };

> > > > > >

> > > > > >  #ifdef CONFIG_VT

> > > > > > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = {

> > > > > >         .help_msg       = "crash(c)",

> > > > > >         .action_msg     = "Trigger a crash",

> > > > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > > > +       .nmi_safe       = true,

> > > > > >  };

> > > > > >

> > > > > >  static void sysrq_handle_reboot(int key)

> > > > > > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = {

> > > > > >         .help_msg       = "reboot(b)",

> > > > > >         .action_msg     = "Resetting",

> > > > > >         .enable_mask    = SYSRQ_ENABLE_BOOT,

> > > > > > +       .nmi_safe       = true,

> > > > > >  };

> > > > > >

> > > > > >  const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;

> > > > > > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = {

> > > > > >         .handler        = sysrq_handle_showlocks,

> > > > > >         .help_msg       = "show-all-locks(d)",

> > > > > >         .action_msg     = "Show Locks Held",

> > > > > > +       .nmi_safe       = true,

> > > > > >  };

> > > > > >  #else

> > > > > >  #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)

> > > > > > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {

> > > > > >         .help_msg       = "show-registers(p)",

> > > > > >         .action_msg     = "Show Regs",

> > > > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > > > +       .nmi_safe       = true,

> > > > > >  };

> > > > > >

> > > > > >  static void sysrq_handle_showstate(int key)

> > > > > > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = {

> > > > > >         .help_msg       = "dump-ftrace-buffer(z)",

> > > > > >         .action_msg     = "Dump ftrace buffer",

> > > > > >         .enable_mask    = SYSRQ_ENABLE_DUMP,

> > > > > > +       .nmi_safe       = true,

> > > > > >  };

> > > > > >  #else

> > > > > >  #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)

> > > > > > @@ -538,6 +546,23 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)

> > > > > >                  sysrq_key_table[i] = op_p;

> > > > > >  }

> > > > > >

> > > > > > +#define SYSRQ_NMI_FIFO_SIZE    64

> > > > > > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

> > > > >

> > > > > A 64-entry FIFO seems excessive. Quite honestly even a FIFO seems a

> > > > > bit excessive and it feels like if two sysrqs were received in super

> > > > > quick succession that it would be OK to just process the first one.  I

> > > > > guess if it simplifies the processing to have a FIFO then it shouldn't

> > > > > hurt, but no need for 64 entries.

> > > > >

> > > >

> > > > Okay, would a 2-entry FIFO work here? As here we need a FIFO to pass

> > > > on the key parameter.

> > >

> > > ...or even a 1-entry FIFO if that makes sense?

> > >

> >

> > Yes it would make sense but unfortunately not supported by kfifo

> > (size: power of 2).

>

> Typically 1 is considered to be a power of 2 since 2^0 = 1.

>

> ...ah, but it appears that size < 2 is not allowed.  Oh well.

>

>

> > > > > > +static void sysrq_do_nmi_work(struct irq_work *work)

> > > > > > +{

> > > > > > +       const struct sysrq_key_op *op_p;

> > > > > > +       int key;

> > > > > > +

> > > > > > +       while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {

> > > > > > +               op_p = __sysrq_get_key_op(key);

> > > > > > +               if (op_p)

> > > > > > +                       op_p->handler(key);

> > > > > > +       }

> > > > >

> > > > > Do you need to manage "suppress_printk" in this function?  Do you need

> > > > > to call rcu_sysrq_start() and rcu_read_lock()?

> > > >

> > > > Ah I missed those. Will add them here instead.

> > > >

> > > > >

> > > > > If so, how do you prevent racing between the mucking we're doing with

> > > > > these things and the mucking that the NMI does with them?

> > > >

> > > > IIUC, here you meant to highlight the race while scheduled sysrq is

> > > > executing in IRQ context and we receive a new sysrq in NMI context,

> > > > correct? If yes, this seems to be a trickier situation. I think the

> > > > appropriate way to handle it would be to deny any further sysrq

> > > > handling until the prior sysrq handling is complete, your views?

> > >

> > > The problem is that in some cases you're running NMIs directly at FIQ

> > > time and other cases you're running them at IRQ time.  So you

> > > definitely can't just move it to NMI.

> > >

> > > Skipping looking for other SYSRQs until the old one is complete sounds

> > > good to me.  Again my ignorance will make me sound like a fool,

> > > probably, but can you use the kfifo as a form of mutual exclusion?  If

> > > you have a 1-entry kfifo, maybe:

> > >

> > > 1. First try to add to the "FIFO".  If it fails (out of space) then a

> > > sysrq is in progress.  Ignore this one.

> > > 2. Decide if you're NMI-safe or not.

> > > 3. If NMI safe, modify "suppress_printk", call rcu functions, then

> > > call the handler.  Restore suppress_printk and then dequeue from FIFO.

> > > 4. If not-NMI safe, the irq worker would "peek" into the FIFO, do its

> > > work (wrapped with "suppress_printk" and the like), and not dequeue

> > > until it's done.

> > >

> > > In the above you'd use the FIFO as a locking mechanism.  I don't know

> > > if that's a valid use of it or if there is a better NMI-safe mechanism

> > > for this.  I think the kfifo docs talk about only one reader and one

> > > writer and here we have two readers, so maybe it's illegal.  It also

> > > seems weird to have a 1-entry "FIFO" and feels like there's probably a

> > > better data structure for this.

> >

> > Thanks for your suggestions. Have a look at below implementation, I

> > have used 2-entry fifo but only single entry used for locking

> > mechanism:

> >

> > @@ -538,6 +546,39 @@ static void __sysrq_put_key_op(int key, const

> > struct sysrq_key_op *op_p)

> >                  sysrq_key_table[i] = op_p;

> >  }

> >

> > +#define SYSRQ_NMI_FIFO_SIZE    2

> > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

> > +

> > +static void sysrq_do_nmi_work(struct irq_work *work)

> > +{

> > +       const struct sysrq_key_op *op_p;

> > +       int orig_suppress_printk;

> > +       int key;

> > +

> > +       orig_suppress_printk = suppress_printk;

> > +       suppress_printk = 0;

> > +

> > +       rcu_sysrq_start();

> > +       rcu_read_lock();

> > +

> > +       if (kfifo_peek(&sysrq_nmi_fifo, &key)) {

> > +               op_p = __sysrq_get_key_op(key);

> > +               if (op_p)

> > +                       op_p->handler(key);

> > +       }

> > +

> > +       rcu_read_unlock();

> > +       rcu_sysrq_end();

> > +

> > +       suppress_printk = orig_suppress_printk;

> > +

> > +       /* Pop contents from fifo if any */

> > +       while (kfifo_get(&sysrq_nmi_fifo, &key))

> > +               ;

>

> I think you can use kfifo_reset_out().

>


Okay, it sounds safe as well when used concurrently with
kfifo_is_empty(). Will use it instead.

>

> > +}

> > +

> > +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);

> > +

> >  void __handle_sysrq(int key, bool check_mask)

> >  {

> >         const struct sysrq_key_op *op_p;

> > +}

> > +

> > +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);

> > +

> >  void __handle_sysrq(int key, bool check_mask)

> >  {

> >         const struct sysrq_key_op *op_p;

> > @@ -545,6 +586,10 @@ void __handle_sysrq(int key, bool check_mask)

> >         int orig_suppress_printk;

> >         int i;

> >

> > +       /* Skip sysrq handling if one already in progress */

> > +       if (!kfifo_is_empty(&sysrq_nmi_fifo))

> > +               return;

>

> This _seems_ OK to me since I'd imagine kfifo_is_empty() is as safe

> for the writer to do as kfifo_is_full() is and kfifo_is_full() is part

> of kfifo_put().

>

> I guess there's no better synchronism mechanism that we can use?

>


Yeah, unless someone else has a better idea.

-Sumit

>

> > +

> >         orig_suppress_printk = suppress_printk;

> >         suppress_printk = 0;

> >

> > @@ -568,7 +613,13 @@ void __handle_sysrq(int key, bool check_mask)

> >                 if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {

> >                         pr_info("%s\n", op_p->action_msg);

> >                         console_loglevel = orig_log_level;

> > -                       op_p->handler(key);

> > +

> > +                       if (in_nmi() && !op_p->nmi_safe) {

> > +                               kfifo_put(&sysrq_nmi_fifo, key);

> > +                               irq_work_queue(&sysrq_nmi_work);

> > +                       } else {

> > +                               op_p->handler(key);

> > +                       }

> >                 } else {

> >                         pr_info("This sysrq operation is disabled.\n");

> >                         console_loglevel = orig_log_level;

> >

> > -Sumit
diff mbox series

Patch

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 7c95afa9..8017e33 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -50,6 +50,8 @@ 
 #include <linux/syscalls.h>
 #include <linux/of.h>
 #include <linux/rcupdate.h>
+#include <linux/irq_work.h>
+#include <linux/kfifo.h>
 
 #include <asm/ptrace.h>
 #include <asm/irq_regs.h>
@@ -111,6 +113,7 @@  static const struct sysrq_key_op sysrq_loglevel_op = {
 	.help_msg	= "loglevel(0-9)",
 	.action_msg	= "Changing Loglevel",
 	.enable_mask	= SYSRQ_ENABLE_LOG,
+	.nmi_safe	= true,
 };
 
 #ifdef CONFIG_VT
@@ -157,6 +160,7 @@  static const struct sysrq_key_op sysrq_crash_op = {
 	.help_msg	= "crash(c)",
 	.action_msg	= "Trigger a crash",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
+	.nmi_safe	= true,
 };
 
 static void sysrq_handle_reboot(int key)
@@ -170,6 +174,7 @@  static const struct sysrq_key_op sysrq_reboot_op = {
 	.help_msg	= "reboot(b)",
 	.action_msg	= "Resetting",
 	.enable_mask	= SYSRQ_ENABLE_BOOT,
+	.nmi_safe	= true,
 };
 
 const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;
@@ -217,6 +222,7 @@  static const struct sysrq_key_op sysrq_showlocks_op = {
 	.handler	= sysrq_handle_showlocks,
 	.help_msg	= "show-all-locks(d)",
 	.action_msg	= "Show Locks Held",
+	.nmi_safe	= true,
 };
 #else
 #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)
@@ -289,6 +295,7 @@  static const struct sysrq_key_op sysrq_showregs_op = {
 	.help_msg	= "show-registers(p)",
 	.action_msg	= "Show Regs",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
+	.nmi_safe	= true,
 };
 
 static void sysrq_handle_showstate(int key)
@@ -326,6 +333,7 @@  static const struct sysrq_key_op sysrq_ftrace_dump_op = {
 	.help_msg	= "dump-ftrace-buffer(z)",
 	.action_msg	= "Dump ftrace buffer",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
+	.nmi_safe	= true,
 };
 #else
 #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)
@@ -538,6 +546,23 @@  static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)
                 sysrq_key_table[i] = op_p;
 }
 
+#define SYSRQ_NMI_FIFO_SIZE	64
+static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);
+
+static void sysrq_do_nmi_work(struct irq_work *work)
+{
+	const struct sysrq_key_op *op_p;
+	int key;
+
+	while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {
+		op_p = __sysrq_get_key_op(key);
+		if (op_p)
+			op_p->handler(key);
+	}
+}
+
+static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);
+
 void __handle_sysrq(int key, bool check_mask)
 {
 	const struct sysrq_key_op *op_p;
@@ -568,7 +593,13 @@  void __handle_sysrq(int key, bool check_mask)
 		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
 			pr_info("%s\n", op_p->action_msg);
 			console_loglevel = orig_log_level;
-			op_p->handler(key);
+
+			if (in_nmi() && !op_p->nmi_safe) {
+				kfifo_in(&sysrq_nmi_fifo, &key, 1);
+				irq_work_queue(&sysrq_nmi_work);
+			} else {
+				op_p->handler(key);
+			}
 		} else {
 			pr_info("This sysrq operation is disabled.\n");
 			console_loglevel = orig_log_level;
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 3a582ec..630b5b9 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -34,6 +34,7 @@  struct sysrq_key_op {
 	const char * const help_msg;
 	const char * const action_msg;
 	const int enable_mask;
+	const bool nmi_safe;
 };
 
 #ifdef CONFIG_MAGIC_SYSRQ
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 9e59347..2b51173 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -943,6 +943,7 @@  static const struct sysrq_key_op sysrq_dbg_op = {
 	.handler	= sysrq_handle_dbg,
 	.help_msg	= "debug(g)",
 	.action_msg	= "DEBUG",
+	.nmi_safe	= true,
 };
 #endif