Message ID | 20210601071644.6055-1-krzysztof.kozlowski@canonical.com |
---|---|
State | New |
Headers | show |
Series | [v3,|,v5.12,1/3] x86/kvm: Teardown PV features on boot CPU as well | expand |
On 01/06/21 09:16, Krzysztof Kozlowski wrote: > From: Vitaly Kuznetsov <vkuznets@redhat.com> > > commit c02027b5742b5aa804ef08a4a9db433295533046 upstream. > > Currenly, we disable kvmclock from machine_shutdown() hook and this > only happens for boot CPU. We need to disable it for all CPUs to > guard against memory corruption e.g. on restore from hibernate. > > Note, writing '0' to kvmclock MSR doesn't clear memory location, it > just prevents hypervisor from updating the location so for the short > while after write and while CPU is still alive, the clock remains usable > and correct so we don't need to switch to some other clocksource. > > Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> > Message-Id: <20210414123544.1060604-4-vkuznets@redhat.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > Signed-off-by: Andrea Righi <andrea.righi@canonical.com> > Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com> > --- > arch/x86/include/asm/kvm_para.h | 4 ++-- > arch/x86/kernel/kvm.c | 1 + > arch/x86/kernel/kvmclock.c | 5 +---- > 3 files changed, 4 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h > index 338119852512..9c56e0defd45 100644 > --- a/arch/x86/include/asm/kvm_para.h > +++ b/arch/x86/include/asm/kvm_para.h > @@ -7,8 +7,6 @@ > #include <linux/interrupt.h> > #include <uapi/asm/kvm_para.h> > > -extern void kvmclock_init(void); > - > #ifdef CONFIG_KVM_GUEST > bool kvm_check_and_clear_guest_paused(void); > #else > @@ -86,6 +84,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, > } > > #ifdef CONFIG_KVM_GUEST > +void kvmclock_init(void); > +void kvmclock_disable(void); > bool kvm_para_available(void); > unsigned int kvm_arch_para_features(void); > unsigned int kvm_arch_para_hints(void); > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index 5ae57be19187..2ebb93c34bc5 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -468,6 +468,7 @@ static void kvm_guest_cpu_offline(void) > wrmsrl(MSR_KVM_PV_EOI_EN, 0); > kvm_pv_disable_apf(); > apf_task_wake_all(); > + kvmclock_disable(); > } > > static int kvm_cpu_online(unsigned int cpu) > diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c > index 1fc0962c89c0..cf869de98eec 100644 > --- a/arch/x86/kernel/kvmclock.c > +++ b/arch/x86/kernel/kvmclock.c > @@ -220,11 +220,9 @@ static void kvm_crash_shutdown(struct pt_regs *regs) > } > #endif > > -static void kvm_shutdown(void) > +void kvmclock_disable(void) > { > native_write_msr(msr_kvm_system_time, 0, 0); > - kvm_disable_steal_time(); > - native_machine_shutdown(); > } > > static void __init kvmclock_init_mem(void) > @@ -351,7 +349,6 @@ void __init kvmclock_init(void) > #endif > x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; > x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; > - machine_ops.shutdown = kvm_shutdown; > #ifdef CONFIG_KEXEC_CORE > machine_ops.crash_shutdown = kvm_crash_shutdown; > #endif > Acked-by: Paolo Bonzini <pbonzini@redhat.com>
On Tue, Jun 01, 2021 at 09:16:42AM +0200, Krzysztof Kozlowski wrote: > From: Vitaly Kuznetsov <vkuznets@redhat.com> > > commit 8b79feffeca28c5459458fe78676b081e87c93a4 upstream. > > Various PV features (Async PF, PV EOI, steal time) work through memory > shared with hypervisor and when we restore from hibernation we must > properly teardown all these features to make sure hypervisor doesn't > write to stale locations after we jump to the previously hibernated kernel > (which can try to place anything there). For secondary CPUs the job is > already done by kvm_cpu_down_prepare(), register syscore ops to do > the same for boot CPU. > > Krzysztof: > This fixes memory corruption visible after second resume from > hibernation: > > BUG: Bad page state in process dbus-daemon pfn:18b01 > page:ffffea000062c040 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 compound_mapcount: -30591 > flags: 0xfffffc0078141(locked|error|workingset|writeback|head|mappedtodisk|reclaim) > raw: 000fffffc0078141 dead0000000002d0 dead000000000100 0000000000000000 > raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 > page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set > bad because of flags: 0x78141(locked|error|workingset|writeback|head|mappedtodisk|reclaim) > > Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> > Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > Signed-off-by: Andrea Righi <andrea.righi@canonical.com> > [krzysztof: Extend the commit message, adjust for v5.10 context] > Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com> > --- > arch/x86/kernel/kvm.c | 57 +++++++++++++++++++++++++++++++------------ > 1 file changed, 41 insertions(+), 16 deletions(-) All now queued up, thanks. greg k-h
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 78bb0fae3982..5ae57be19187 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -26,6 +26,7 @@ #include <linux/kprobes.h> #include <linux/nmi.h> #include <linux/swait.h> +#include <linux/syscore_ops.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -460,6 +461,25 @@ static bool pv_tlb_flush_supported(void) static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); +static void kvm_guest_cpu_offline(void) +{ + kvm_disable_steal_time(); + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); + apf_task_wake_all(); +} + +static int kvm_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_init(); + local_irq_restore(flags); + return 0; +} + #ifdef CONFIG_SMP static bool pv_ipi_supported(void) @@ -587,31 +607,34 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); } -static void kvm_guest_cpu_offline(void) +static int kvm_cpu_down_prepare(unsigned int cpu) { - kvm_disable_steal_time(); - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - apf_task_wake_all(); -} + unsigned long flags; -static int kvm_cpu_online(unsigned int cpu) -{ - local_irq_disable(); - kvm_guest_cpu_init(); - local_irq_enable(); + local_irq_save(flags); + kvm_guest_cpu_offline(); + local_irq_restore(flags); return 0; } -static int kvm_cpu_down_prepare(unsigned int cpu) +#endif + +static int kvm_suspend(void) { - local_irq_disable(); kvm_guest_cpu_offline(); - local_irq_enable(); + return 0; } -#endif + +static void kvm_resume(void) +{ + kvm_cpu_online(raw_smp_processor_id()); +} + +static struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +}; static void kvm_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) @@ -681,6 +704,8 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif + register_syscore_ops(&kvm_syscore_ops); + /* * Hard lockup detection is enabled by default. Disable it, as guests * can get false positives too easily, for example if the host is