Message ID | 20201016035109.3952356-1-josephjang@google.com |
---|---|
State | New |
Headers | show |
Series | power: suspend: Add suspend timeout handler | expand |
On Fri, Oct 16, 2020 at 11:51:09AM +0800, Joseph Jang wrote: > From: josephjang <josephjang@google.com> Please use your name as spelled out like you did above in the email header. > > Add suspend timeout handler to prevent device stuck during suspend/ > resume process. Suspend timeout handler will dump disk sleep task > at first round timeout and trigger kernel panic at second round timeout. > The default timer for each round is 30 seconds. > > Note: Can use following command to simulate suspend hang for testing. > adb shell echo 1 > /sys/power/pm_hang > adb shell echo mem > /sys/power/state > Signed-off-by: josephjang <josephjang@google.com> Need a blank line before the signed-off-by: and again, spell your name the same way. > --- > include/linux/console.h | 1 + > kernel/power/Kconfig | 9 +++ > kernel/power/main.c | 66 ++++++++++++++++ > kernel/power/suspend.c | 162 ++++++++++++++++++++++++++++++++++++++++ > kernel/printk/printk.c | 5 ++ > 5 files changed, 243 insertions(+) > > diff --git a/include/linux/console.h b/include/linux/console.h > index 0670d3491e0e..ac468c602c0b 100644 > --- a/include/linux/console.h > +++ b/include/linux/console.h > @@ -192,6 +192,7 @@ static inline void console_sysfs_notify(void) > { } > #endif > extern bool console_suspend_enabled; > +extern int is_console_suspended(void); For global functions, how about: bool console_is_suspended(void); ? > > /* Suspend and resume console messages over PM events */ > extern void suspend_console(void); > diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig > index a7320f07689d..52b7a181b6d8 100644 > --- a/kernel/power/Kconfig > +++ b/kernel/power/Kconfig > @@ -207,6 +207,15 @@ config PM_SLEEP_DEBUG > def_bool y > depends on PM_DEBUG && PM_SLEEP > > +config PM_SLEEP_MONITOR > + bool "Linux kernel suspend/resume process monitor" > + depends on PM_SLEEP > + help > + This option will enable suspend/resume monitor to prevent device > + stuck during suspend/resume process. Suspend timeout handler will > + dump disk sleep task at first round timeout and trigger kernel panic > + at second round timeout. The default timer for each round is 30 seconds. Ouch, are you sure you want to panic? > + > config DPM_WATCHDOG > bool "Device suspend/resume watchdog" > depends on PM_DEBUG && PSTORE && EXPERT > diff --git a/kernel/power/main.c b/kernel/power/main.c > index 40f86ec4ab30..f25b8a47583e 100644 > --- a/kernel/power/main.c > +++ b/kernel/power/main.c > @@ -575,6 +575,69 @@ void __pm_pr_dbg(bool defer, const char *fmt, ...) > static inline void pm_print_times_init(void) {} > #endif /* CONFIG_PM_SLEEP_DEBUG */ > > +#ifdef CONFIG_PM_SLEEP_MONITOR > +/* If set, devices will stuck at suspend for verification */ > +static bool pm_hang_enabled; > + > +static int pm_notify_test(struct notifier_block *nb, > + unsigned long mode, void *_unused) > +{ > + pr_info("Jump into infinite loop now\n"); Why do you have debugging code still enabled? > + > + /* Suspend thread stuck at a loop forever */ > + for (;;) > + ; > + Don't busy spin, that will burn power. > + pr_info("Fail to stuck at loop\n"); And how can this happen? > + > + return 0; > +} > + > +static struct notifier_block pm_notify_nb = { > + .notifier_call = pm_notify_test, > +}; > + > +static ssize_t pm_hang_show(struct kobject *kobj, struct kobj_attribute *attr, > + char *buf) > +{ > + return snprintf(buf, 10, "%d\n", pm_hang_enabled); > +} > + > +static ssize_t pm_hang_store(struct kobject *kobj, struct kobj_attribute *attr, > + const char *buf, size_t n) > +{ > + unsigned long val; > + int result; > + > + if (kstrtoul(buf, 10, &val)) > + return -EINVAL; > + > + if (val > 1) > + return -EINVAL; > + > + pm_hang_enabled = !!val; > + > + if (pm_hang_enabled == true) { > + > + result = register_pm_notifier(&pm_notify_nb); > + if (result) > + pr_warn("Can not register suspend notifier, return %d\n", > + result); > + > + } else { > + > + result = unregister_pm_notifier(&pm_notify_nb); > + if (result) > + pr_warn("Can not unregister suspend notifier, return %d\n", > + result); > + } > + > + return n; > +} > + > +power_attr(pm_hang); > +#endif > + > struct kobject *power_kobj; > > /** > @@ -909,6 +972,9 @@ static struct attribute * g[] = { > &pm_wakeup_irq_attr.attr, > &pm_debug_messages_attr.attr, > #endif > +#ifdef CONFIG_PM_SLEEP_MONITOR > + &pm_hang_attr.attr, You added a sysfs file, but no Documentation/ABI/ update? That's not ok. > +#endif > #endif > #ifdef CONFIG_FREEZER > &pm_freeze_timeout_attr.attr, > diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c > index 8b1bb5ee7e5d..6f2679cfd9d1 100644 > --- a/kernel/power/suspend.c > +++ b/kernel/power/suspend.c > @@ -30,6 +30,12 @@ > #include <trace/events/power.h> > #include <linux/compiler.h> > #include <linux/moduleparam.h> > +#ifdef CONFIG_PM_SLEEP_MONITOR > +#include <linux/sched/debug.h> > +#include <linux/kthread.h> > +#include <linux/sched.h> > +#include <uapi/linux/sched/types.h> > +#endif Don't #ifdef include files. And why the uapi file? > > #include "power.h" > > @@ -61,6 +67,133 @@ static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head); > enum s2idle_states __read_mostly s2idle_state; > static DEFINE_RAW_SPINLOCK(s2idle_lock); > > +#ifdef CONFIG_PM_SLEEP_MONITOR > +/* Suspend monitor thread toggle reason */ > +enum toggle_reason { > + TOGGLE_NONE, > + TOGGLE_START, > + TOGGLE_STOP, > +}; > + > +#define SUSPEND_TIMER_TIMEOUT_MS 30000 > +static struct task_struct *ksuspend_mon_tsk; > +static DECLARE_WAIT_QUEUE_HEAD(power_suspend_waitqueue); > +static enum toggle_reason suspend_mon_toggle; > +static DEFINE_MUTEX(suspend_mon_lock); > + > +static void start_suspend_mon(void) > +{ > + mutex_lock(&suspend_mon_lock); > + suspend_mon_toggle = TOGGLE_START; > + mutex_unlock(&suspend_mon_lock); Why do you need a lock for a single integer? > + wake_up(&power_suspend_waitqueue); > +} > + > +static void stop_suspend_mon(void) > +{ > + mutex_lock(&suspend_mon_lock); > + suspend_mon_toggle = TOGGLE_STOP; > + mutex_unlock(&suspend_mon_lock); > + wake_up(&power_suspend_waitqueue); > +} > + > +static void suspend_timeout(int timeout_count) > +{ > + char *null_pointer = NULL; > + > + pr_info("Suspend monitor timeout (timer is %d seconds)\n", > + (SUSPEND_TIMER_TIMEOUT_MS/1000)); > + > + show_state_filter(TASK_UNINTERRUPTIBLE); > + > + if (timeout_count < 2) > + return; > + > + if (is_console_suspended()) > + resume_console(); > + > + pr_info("Trigger a panic\n"); Again, debugging code? > + > + /* Trigger a NULL pointer dereference */ > + *null_pointer = 'a'; Are you sure this will work on all platforms? We do have a panic function if you really want to do that. > + > + /* Should not reach here */ > + pr_err("Trigger panic failed!\n"); > +} > + > +static int suspend_monitor_kthread(void *arg) > +{ > + long err; > + struct sched_param param = {.sched_priority > + = MAX_RT_PRIO-1}; Ick, no, call the scheduler functions properly, don't do this "by hand" ever. > + static int timeout_count; > + static long timeout; > + > + pr_info("Init ksuspend_mon thread\n"); Again, debugging code :( > + > + sched_setscheduler(current, SCHED_FIFO, ¶m); > + > + timeout_count = 0; > + timeout = MAX_SCHEDULE_TIMEOUT; > + > + do { > + /* Wait suspend timer timeout */ > + err = wait_event_interruptible_timeout( > + power_suspend_waitqueue, > + (suspend_mon_toggle != TOGGLE_NONE), > + timeout); > + > + mutex_lock(&suspend_mon_lock); > + /* suspend monitor state change */ > + if (suspend_mon_toggle != TOGGLE_NONE) { > + if (suspend_mon_toggle == TOGGLE_START) { > + timeout = msecs_to_jiffies( > + SUSPEND_TIMER_TIMEOUT_MS); > + pr_info("Start suspend monitor\n"); > + } else if (suspend_mon_toggle == TOGGLE_STOP) { > + timeout = MAX_SCHEDULE_TIMEOUT; > + timeout_count = 0; > + pr_info("Stop suspend monitor\n"); > + } > + suspend_mon_toggle = TOGGLE_NONE; > + mutex_unlock(&suspend_mon_lock); > + continue; > + } > + mutex_unlock(&suspend_mon_lock); > + > + /* suspend monitor event handler */ > + if (err == 0) { > + timeout_count++; > + suspend_timeout(timeout_count); > + } else if (err == -ERESTARTSYS) { > + pr_info("Exit ksuspend_mon!"); > + break; > + } > + } while (1); > + > + return 0; > +} > + > +static void init_suspend_monitor_thread(void) > +{ > + int ret; > + > + ksuspend_mon_tsk = kthread_create(suspend_monitor_kthread, > + NULL, "ksuspend_mon"); > + if (IS_ERR(ksuspend_mon_tsk)) { > + ret = PTR_ERR(ksuspend_mon_tsk); > + ksuspend_mon_tsk = NULL; > + pr_err("Create suspend_monitor_kthread failed! ret = %d\n", > + ret); > + return; > + } > + > + suspend_mon_toggle = TOGGLE_NONE; > + wake_up_process(ksuspend_mon_tsk); > + > +} > +#endif > + > /** > * pm_suspend_default_s2idle - Check if suspend-to-idle is the default suspend. > * > @@ -89,6 +222,10 @@ static void s2idle_enter(void) > { > trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, true); > > +#ifdef CONFIG_PM_SLEEP_MONITOR > + stop_suspend_mon(); > +#endif Do not put #ifdef in .c files, that's not the proper kernel coding style. Especially for single function calls. I've stopped here... greg k-h
On Fri 2020-10-16 11:51:09, Joseph Jang wrote: > From: josephjang <josephjang@google.com> > > Add suspend timeout handler to prevent device stuck during suspend/ > resume process. Suspend timeout handler will dump disk sleep task > at first round timeout and trigger kernel panic at second round timeout. > The default timer for each round is 30 seconds. A better solution would be to resume instead of panic(). > Note: Can use following command to simulate suspend hang for testing. > adb shell echo 1 > /sys/power/pm_hang This looks dangerous. It adds a simple way to panic() the system. First, it should get enabled separately. e.g. CONFIG_TEST_PM_SLEEP_MONITOR. Second, I would add it as a module that might get loaded and unloaded. > diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c > index 8b1bb5ee7e5d..6f2679cfd9d1 100644 > --- a/kernel/power/suspend.c > +++ b/kernel/power/suspend.c > +static int suspend_monitor_kthread(void *arg) > +{ > + long err; > + struct sched_param param = {.sched_priority > + = MAX_RT_PRIO-1}; > + static int timeout_count; > + static long timeout; > + > + pr_info("Init ksuspend_mon thread\n"); > + > + sched_setscheduler(current, SCHED_FIFO, ¶m); > + > + timeout_count = 0; > + timeout = MAX_SCHEDULE_TIMEOUT; > + > + do { > + /* Wait suspend timer timeout */ > + err = wait_event_interruptible_timeout( > + power_suspend_waitqueue, > + (suspend_mon_toggle != TOGGLE_NONE), > + timeout); > + > + mutex_lock(&suspend_mon_lock); > + /* suspend monitor state change */ > + if (suspend_mon_toggle != TOGGLE_NONE) { > + if (suspend_mon_toggle == TOGGLE_START) { > + timeout = msecs_to_jiffies( > + SUSPEND_TIMER_TIMEOUT_MS); > + pr_info("Start suspend monitor\n"); > + } else if (suspend_mon_toggle == TOGGLE_STOP) { > + timeout = MAX_SCHEDULE_TIMEOUT; > + timeout_count = 0; > + pr_info("Stop suspend monitor\n"); > + } > + suspend_mon_toggle = TOGGLE_NONE; > + mutex_unlock(&suspend_mon_lock); > + continue; > + } > + mutex_unlock(&suspend_mon_lock); > + > + /* suspend monitor event handler */ > + if (err == 0) { > + timeout_count++; > + suspend_timeout(timeout_count); > + } else if (err == -ERESTARTSYS) { > + pr_info("Exit ksuspend_mon!"); > + break; > + } > + } while (1); > + > + return 0; > +} Using kthread looks like an overkill to me. I wonder how this actually works when the kthreads get freezed. It might be enough to implement just a timer callback. Start the timer in start_suspend_mon() and delete it in stop_suspend_mon(). Or do I miss anything? Anyway, the kthread implementation looks a but hairy. If you really need to use kthread, I suggest to use kthread_worker API. You would need to run an init work to setup the RT scheduling. Then you could just call kthread_queue_delayed_work(() and kthread_cancel_delayed_work_sync() to start and stop the monitor. > @@ -114,6 +251,10 @@ static void s2idle_enter(void) > s2idle_state = S2IDLE_STATE_NONE; > raw_spin_unlock_irq(&s2idle_lock); > > +#ifdef CONFIG_PM_SLEEP_MONITOR > + start_suspend_mon(); > +#endif It is better to solve this by defining start_suspend_mon() as empty function when the config option is disabled. For example, see how vgacon_text_force() is defined in console.h. Best Regards, Petr
On Fri, Oct 16, 2020 at 04:58:38PM +0800, Joseph Jang wrote:
> Thank you Greg's promptly reply.
<snip>
You just sent html email, which got rejected by all of the mailing
lists :(
Please fix your email client to be sand and resend.
thanks,
greg k-h
Thank you Greg's promptly reply.
let me try to explain detail in following. Sorry I forgot to switch to
plain text mode in gmail.
On Fri, Oct 16, 2020 at 11:51:09AM +0800, Joseph Jang wrote:
> From: josephjang <josephjang@google.com>
Please use your name as spelled out like you did above in the email
header.
Sure, I will update the patch again like following.
Thanks Petr promptly response. On Fri 2020-10-16 11:51:09, Joseph Jang wrote: > From: josephjang <josephjang@google.com> > > Add suspend timeout handler to prevent device stuck during suspend/ > resume process. Suspend timeout handler will dump disk sleep task > at first round timeout and trigger kernel panic at second round timeout. > The default timer for each round is 30 seconds. A better solution would be to resume instead of panic(). [Joseph] suspend_timeout() will trigger kernel panic() only when suspend thread stuck (deadlock/hang) for 2*30 seconds. At that moment, I don't know how to resume the suspend thread. So I just could trigger panic to reboot system. If you have better suggestions, I am willing to study it. > Note: Can use following command to simulate suspend hang for testing. > adb shell echo 1 > /sys/power/pm_hang This looks dangerous. It adds a simple way to panic() the system. First, it should get enabled separately. e.g. CONFIG_TEST_PM_SLEEP_MONITOR. Second, I would add it as a module that might get loaded and unloaded. [Joseph] Agree to enable new compile flag for test module. I think it is better to create separate patch for the new test module right? > diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c > index 8b1bb5ee7e5d..6f2679cfd9d1 100644 > --- a/kernel/power/suspend.c > +++ b/kernel/power/suspend.c Using kthread looks like an overkill to me. I wonder how this actually works when the kthreads get freezed. It might be enough to implement just a timer callback. Start the timer in start_suspend_mon() and delete it in stop_suspend_mon(). Or do I miss anything? Anyway, the kthread implementation looks a but hairy. If you really need to use kthread, I suggest to use kthread_worker API. You would need to run an init work to setup the RT scheduling. Then you could just call kthread_queue_delayed_work(() and kthread_cancel_delayed_work_sync() to start and stop the monitor. [Joseph] Actually, I had ever think we just need to use add_timer()/del_timer_sync() for start_suspend_mon()/stop_suspend_mon() before. But I am not sure if add_timer() may cause any performance impact in suspend thread or not. So I try to create a suspend monitor kthread and just flip the flag in suspend thread. Thank you, Joseph. > @@ -114,6 +251,10 @@ static void s2idle_enter(void) > s2idle_state = S2IDLE_STATE_NONE; > raw_spin_unlock_irq(&s2idle_lock); > > +#ifdef CONFIG_PM_SLEEP_MONITOR > + start_suspend_mon(); > +#endif It is better to solve this by defining start_suspend_mon() as empty function when the config option is disabled. For example, see how vgacon_text_force() is defined in console.h. [Joseph] Thank you for good suggestions. May I know if I could use IS_ENABLED() ? if (IS_ENABLED(CONFIG_PM_SLEEP_MONITOR)) start_suspend_mon(); Best Regards, Petr Thank you, Joseph. Petr Mladek <pmladek@suse.com> 於 2020年10月16日 週五 下午5:01寫道: > > On Fri 2020-10-16 11:51:09, Joseph Jang wrote: > > From: josephjang <josephjang@google.com> > > > > Add suspend timeout handler to prevent device stuck during suspend/ > > resume process. Suspend timeout handler will dump disk sleep task > > at first round timeout and trigger kernel panic at second round timeout. > > The default timer for each round is 30 seconds. > > A better solution would be to resume instead of panic(). > > > Note: Can use following command to simulate suspend hang for testing. > > adb shell echo 1 > /sys/power/pm_hang > > This looks dangerous. It adds a simple way to panic() the system. > > First, it should get enabled separately. e.g. > CONFIG_TEST_PM_SLEEP_MONITOR. > > Second, I would add it as a module that might get loaded > and unloaded. > > > diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c > > index 8b1bb5ee7e5d..6f2679cfd9d1 100644 > > --- a/kernel/power/suspend.c > > +++ b/kernel/power/suspend.c > > +static int suspend_monitor_kthread(void *arg) > > +{ > > + long err; > > + struct sched_param param = {.sched_priority > > + = MAX_RT_PRIO-1}; > > + static int timeout_count; > > + static long timeout; > > + > > + pr_info("Init ksuspend_mon thread\n"); > > + > > + sched_setscheduler(current, SCHED_FIFO, ¶m); > > + > > + timeout_count = 0; > > + timeout = MAX_SCHEDULE_TIMEOUT; > > + > > + do { > > + /* Wait suspend timer timeout */ > > + err = wait_event_interruptible_timeout( > > + power_suspend_waitqueue, > > + (suspend_mon_toggle != TOGGLE_NONE), > > + timeout); > > + > > + mutex_lock(&suspend_mon_lock); > > + /* suspend monitor state change */ > > + if (suspend_mon_toggle != TOGGLE_NONE) { > > + if (suspend_mon_toggle == TOGGLE_START) { > > + timeout = msecs_to_jiffies( > > + SUSPEND_TIMER_TIMEOUT_MS); > > + pr_info("Start suspend monitor\n"); > > + } else if (suspend_mon_toggle == TOGGLE_STOP) { > > + timeout = MAX_SCHEDULE_TIMEOUT; > > + timeout_count = 0; > > + pr_info("Stop suspend monitor\n"); > > + } > > + suspend_mon_toggle = TOGGLE_NONE; > > + mutex_unlock(&suspend_mon_lock); > > + continue; > > + } > > + mutex_unlock(&suspend_mon_lock); > > + > > + /* suspend monitor event handler */ > > + if (err == 0) { > > + timeout_count++; > > + suspend_timeout(timeout_count); > > + } else if (err == -ERESTARTSYS) { > > + pr_info("Exit ksuspend_mon!"); > > + break; > > + } > > + } while (1); > > + > > + return 0; > > +} > > Using kthread looks like an overkill to me. I wonder how this actually > works when the kthreads get freezed. It might be enough to implement > just a timer callback. Start the timer in start_suspend_mon() and > delete it in stop_suspend_mon(). Or do I miss anything? > > Anyway, the kthread implementation looks a but hairy. If you really > need to use kthread, I suggest to use kthread_worker API. You would > need to run an init work to setup the RT scheduling. Then you > could just call kthread_queue_delayed_work(() > and kthread_cancel_delayed_work_sync() to start and stop > the monitor. > > > > @@ -114,6 +251,10 @@ static void s2idle_enter(void) > > s2idle_state = S2IDLE_STATE_NONE; > > raw_spin_unlock_irq(&s2idle_lock); > > > > +#ifdef CONFIG_PM_SLEEP_MONITOR > > + start_suspend_mon(); > > +#endif > > It is better to solve this by defining start_suspend_mon() as empty > function when the config option is disabled. For example, see > how vgacon_text_force() is defined in console.h. > > Best Regards, > Petr -- Embedded Software engineer
On Fri, Oct 16, 2020 at 11:01 AM Petr Mladek <pmladek@suse.com> wrote: > > On Fri 2020-10-16 11:51:09, Joseph Jang wrote: > > From: josephjang <josephjang@google.com> > > > > Add suspend timeout handler to prevent device stuck during suspend/ > > resume process. Suspend timeout handler will dump disk sleep task > > at first round timeout and trigger kernel panic at second round timeout. > > The default timer for each round is 30 seconds. > > A better solution would be to resume instead of panic(). Well, abort the suspend if it happens during suspend or continue if it happens during resume, But we have a suspend watchdog already, don't we?
On Fri, Oct 16, 2020 at 5:51 AM Joseph Jang <josephjang@google.com> wrote: > > From: josephjang <josephjang@google.com> > > Add suspend timeout handler to prevent device stuck during suspend/ > resume process. Suspend timeout handler will dump disk sleep task > at first round timeout and trigger kernel panic at second round timeout. > The default timer for each round is 30 seconds. > > Note: Can use following command to simulate suspend hang for testing. > adb shell echo 1 > /sys/power/pm_hang > adb shell echo mem > /sys/power/state > Signed-off-by: josephjang <josephjang@google.com> > --- > include/linux/console.h | 1 + > kernel/power/Kconfig | 9 +++ > kernel/power/main.c | 66 ++++++++++++++++ > kernel/power/suspend.c | 162 ++++++++++++++++++++++++++++++++++++++++ > kernel/printk/printk.c | 5 ++ > 5 files changed, 243 insertions(+) > > diff --git a/include/linux/console.h b/include/linux/console.h > index 0670d3491e0e..ac468c602c0b 100644 > --- a/include/linux/console.h > +++ b/include/linux/console.h > @@ -192,6 +192,7 @@ static inline void console_sysfs_notify(void) > { } > #endif > extern bool console_suspend_enabled; > +extern int is_console_suspended(void); > > /* Suspend and resume console messages over PM events */ > extern void suspend_console(void); > diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig > index a7320f07689d..52b7a181b6d8 100644 > --- a/kernel/power/Kconfig > +++ b/kernel/power/Kconfig > @@ -207,6 +207,15 @@ config PM_SLEEP_DEBUG > def_bool y > depends on PM_DEBUG && PM_SLEEP > > +config PM_SLEEP_MONITOR > + bool "Linux kernel suspend/resume process monitor" > + depends on PM_SLEEP > + help > + This option will enable suspend/resume monitor to prevent device > + stuck during suspend/resume process. Suspend timeout handler will > + dump disk sleep task at first round timeout and trigger kernel panic > + at second round timeout. The default timer for each round is 30 seconds. > + The facility associated with the Kconfig entry right below is supposed to do exactly the same thing. What's the reason to add another one? What is missing? > config DPM_WATCHDOG > bool "Device suspend/resume watchdog" > depends on PM_DEBUG && PSTORE && EXPERT
diff --git a/include/linux/console.h b/include/linux/console.h index 0670d3491e0e..ac468c602c0b 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -192,6 +192,7 @@ static inline void console_sysfs_notify(void) { } #endif extern bool console_suspend_enabled; +extern int is_console_suspended(void); /* Suspend and resume console messages over PM events */ extern void suspend_console(void); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index a7320f07689d..52b7a181b6d8 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -207,6 +207,15 @@ config PM_SLEEP_DEBUG def_bool y depends on PM_DEBUG && PM_SLEEP +config PM_SLEEP_MONITOR + bool "Linux kernel suspend/resume process monitor" + depends on PM_SLEEP + help + This option will enable suspend/resume monitor to prevent device + stuck during suspend/resume process. Suspend timeout handler will + dump disk sleep task at first round timeout and trigger kernel panic + at second round timeout. The default timer for each round is 30 seconds. + config DPM_WATCHDOG bool "Device suspend/resume watchdog" depends on PM_DEBUG && PSTORE && EXPERT diff --git a/kernel/power/main.c b/kernel/power/main.c index 40f86ec4ab30..f25b8a47583e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -575,6 +575,69 @@ void __pm_pr_dbg(bool defer, const char *fmt, ...) static inline void pm_print_times_init(void) {} #endif /* CONFIG_PM_SLEEP_DEBUG */ +#ifdef CONFIG_PM_SLEEP_MONITOR +/* If set, devices will stuck at suspend for verification */ +static bool pm_hang_enabled; + +static int pm_notify_test(struct notifier_block *nb, + unsigned long mode, void *_unused) +{ + pr_info("Jump into infinite loop now\n"); + + /* Suspend thread stuck at a loop forever */ + for (;;) + ; + + pr_info("Fail to stuck at loop\n"); + + return 0; +} + +static struct notifier_block pm_notify_nb = { + .notifier_call = pm_notify_test, +}; + +static ssize_t pm_hang_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return snprintf(buf, 10, "%d\n", pm_hang_enabled); +} + +static ssize_t pm_hang_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + int result; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + + if (val > 1) + return -EINVAL; + + pm_hang_enabled = !!val; + + if (pm_hang_enabled == true) { + + result = register_pm_notifier(&pm_notify_nb); + if (result) + pr_warn("Can not register suspend notifier, return %d\n", + result); + + } else { + + result = unregister_pm_notifier(&pm_notify_nb); + if (result) + pr_warn("Can not unregister suspend notifier, return %d\n", + result); + } + + return n; +} + +power_attr(pm_hang); +#endif + struct kobject *power_kobj; /** @@ -909,6 +972,9 @@ static struct attribute * g[] = { &pm_wakeup_irq_attr.attr, &pm_debug_messages_attr.attr, #endif +#ifdef CONFIG_PM_SLEEP_MONITOR + &pm_hang_attr.attr, +#endif #endif #ifdef CONFIG_FREEZER &pm_freeze_timeout_attr.attr, diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8b1bb5ee7e5d..6f2679cfd9d1 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -30,6 +30,12 @@ #include <trace/events/power.h> #include <linux/compiler.h> #include <linux/moduleparam.h> +#ifdef CONFIG_PM_SLEEP_MONITOR +#include <linux/sched/debug.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <uapi/linux/sched/types.h> +#endif #include "power.h" @@ -61,6 +67,133 @@ static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head); enum s2idle_states __read_mostly s2idle_state; static DEFINE_RAW_SPINLOCK(s2idle_lock); +#ifdef CONFIG_PM_SLEEP_MONITOR +/* Suspend monitor thread toggle reason */ +enum toggle_reason { + TOGGLE_NONE, + TOGGLE_START, + TOGGLE_STOP, +}; + +#define SUSPEND_TIMER_TIMEOUT_MS 30000 +static struct task_struct *ksuspend_mon_tsk; +static DECLARE_WAIT_QUEUE_HEAD(power_suspend_waitqueue); +static enum toggle_reason suspend_mon_toggle; +static DEFINE_MUTEX(suspend_mon_lock); + +static void start_suspend_mon(void) +{ + mutex_lock(&suspend_mon_lock); + suspend_mon_toggle = TOGGLE_START; + mutex_unlock(&suspend_mon_lock); + wake_up(&power_suspend_waitqueue); +} + +static void stop_suspend_mon(void) +{ + mutex_lock(&suspend_mon_lock); + suspend_mon_toggle = TOGGLE_STOP; + mutex_unlock(&suspend_mon_lock); + wake_up(&power_suspend_waitqueue); +} + +static void suspend_timeout(int timeout_count) +{ + char *null_pointer = NULL; + + pr_info("Suspend monitor timeout (timer is %d seconds)\n", + (SUSPEND_TIMER_TIMEOUT_MS/1000)); + + show_state_filter(TASK_UNINTERRUPTIBLE); + + if (timeout_count < 2) + return; + + if (is_console_suspended()) + resume_console(); + + pr_info("Trigger a panic\n"); + + /* Trigger a NULL pointer dereference */ + *null_pointer = 'a'; + + /* Should not reach here */ + pr_err("Trigger panic failed!\n"); +} + +static int suspend_monitor_kthread(void *arg) +{ + long err; + struct sched_param param = {.sched_priority + = MAX_RT_PRIO-1}; + static int timeout_count; + static long timeout; + + pr_info("Init ksuspend_mon thread\n"); + + sched_setscheduler(current, SCHED_FIFO, ¶m); + + timeout_count = 0; + timeout = MAX_SCHEDULE_TIMEOUT; + + do { + /* Wait suspend timer timeout */ + err = wait_event_interruptible_timeout( + power_suspend_waitqueue, + (suspend_mon_toggle != TOGGLE_NONE), + timeout); + + mutex_lock(&suspend_mon_lock); + /* suspend monitor state change */ + if (suspend_mon_toggle != TOGGLE_NONE) { + if (suspend_mon_toggle == TOGGLE_START) { + timeout = msecs_to_jiffies( + SUSPEND_TIMER_TIMEOUT_MS); + pr_info("Start suspend monitor\n"); + } else if (suspend_mon_toggle == TOGGLE_STOP) { + timeout = MAX_SCHEDULE_TIMEOUT; + timeout_count = 0; + pr_info("Stop suspend monitor\n"); + } + suspend_mon_toggle = TOGGLE_NONE; + mutex_unlock(&suspend_mon_lock); + continue; + } + mutex_unlock(&suspend_mon_lock); + + /* suspend monitor event handler */ + if (err == 0) { + timeout_count++; + suspend_timeout(timeout_count); + } else if (err == -ERESTARTSYS) { + pr_info("Exit ksuspend_mon!"); + break; + } + } while (1); + + return 0; +} + +static void init_suspend_monitor_thread(void) +{ + int ret; + + ksuspend_mon_tsk = kthread_create(suspend_monitor_kthread, + NULL, "ksuspend_mon"); + if (IS_ERR(ksuspend_mon_tsk)) { + ret = PTR_ERR(ksuspend_mon_tsk); + ksuspend_mon_tsk = NULL; + pr_err("Create suspend_monitor_kthread failed! ret = %d\n", + ret); + return; + } + + suspend_mon_toggle = TOGGLE_NONE; + wake_up_process(ksuspend_mon_tsk); + +} +#endif + /** * pm_suspend_default_s2idle - Check if suspend-to-idle is the default suspend. * @@ -89,6 +222,10 @@ static void s2idle_enter(void) { trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, true); +#ifdef CONFIG_PM_SLEEP_MONITOR + stop_suspend_mon(); +#endif + raw_spin_lock_irq(&s2idle_lock); if (pm_wakeup_pending()) goto out; @@ -114,6 +251,10 @@ static void s2idle_enter(void) s2idle_state = S2IDLE_STATE_NONE; raw_spin_unlock_irq(&s2idle_lock); +#ifdef CONFIG_PM_SLEEP_MONITOR + start_suspend_mon(); +#endif + trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, false); } @@ -179,6 +320,9 @@ void __init pm_states_init(void) * initialize mem_sleep_states[] accordingly here. */ mem_sleep_states[PM_SUSPEND_TO_IDLE] = mem_sleep_labels[PM_SUSPEND_TO_IDLE]; +#ifdef CONFIG_PM_SLEEP_MONITOR + init_suspend_monitor_thread(); +#endif } static int __init mem_sleep_default_setup(char *str) @@ -426,6 +570,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) if (error || suspend_test(TEST_CPUS)) goto Enable_cpus; +#ifdef CONFIG_PM_SLEEP_MONITOR + stop_suspend_mon(); +#endif + arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -451,6 +599,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); +#ifdef CONFIG_PM_SLEEP_MONITOR + start_suspend_mon(); +#endif + Enable_cpus: suspend_enable_secondary_cpus(); @@ -610,6 +762,11 @@ int pm_suspend(suspend_state_t state) return -EINVAL; pr_info("suspend entry (%s)\n", mem_sleep_labels[state]); + +#ifdef CONFIG_PM_SLEEP_MONITOR + start_suspend_mon(); +#endif + error = enter_state(state); if (error) { suspend_stats.fail++; @@ -617,6 +774,11 @@ int pm_suspend(suspend_state_t state) } else { suspend_stats.success++; } + +#ifdef CONFIG_PM_SLEEP_MONITOR + stop_suspend_mon(); +#endif + pr_info("suspend exit\n"); return error; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9b75f6bfc333..58db8762eeda 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2259,6 +2259,11 @@ module_param_named(console_suspend, console_suspend_enabled, MODULE_PARM_DESC(console_suspend, "suspend console during suspend" " and hibernate operations"); +int is_console_suspended(void) +{ + return console_suspended; +} + /** * suspend_console - suspend the console subsystem *