Message ID | 1370963819-26165-1-git-send-email-andre.przywara@linaro.org |
---|---|
State | New |
Headers | show |
On Tue, Jun 11, 2013 at 05:16:59PM +0200, Andre Przywara wrote: > For migration to work we need to save (and later restore) the state of > each core's virtual generic timer. > Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export > the three needed registers (control, counter, compare value). > Though they live in cp15 space, we don't use the existing list, since > they need special accessor functions and also the arch timer is > optional. > > Changes from v1: > - move code out of coproc.c and into guest.c and arch_timer.c > - present the registers with their native CP15 addresses, but without > using space in the VCPU's cp15 array > - do the user space copying in the accessor functions > > Signed-off-by: Andre Przywara <andre.przywara@linaro.org> > --- > arch/arm/include/asm/kvm_host.h | 5 ++++ > arch/arm/include/uapi/asm/kvm.h | 16 ++++++++++ > arch/arm/kvm/arch_timer.c | 65 +++++++++++++++++++++++++++++++++++++++++ > arch/arm/kvm/guest.c | 26 ++++++++++++++++- > 4 files changed, 111 insertions(+), 1 deletion(-) > > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index 57cb786..1096e33 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -224,4 +224,9 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) > int kvm_perf_init(void); > int kvm_perf_teardown(void); > > +int kvm_arm_num_timer_regs(void); > +int kvm_arm_copy_timer_indices(struct kvm_vcpu *, u64 __user *); > +int kvm_arm_timer_get_reg(struct kvm_vcpu *, const struct kvm_one_reg *); > +int kvm_arm_timer_set_reg(struct kvm_vcpu *, const struct kvm_one_reg *); > + > #endif /* __ARM_KVM_HOST_H__ */ > diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h > index c1ee007..e3b0115 100644 > --- a/arch/arm/include/uapi/asm/kvm.h > +++ b/arch/arm/include/uapi/asm/kvm.h > @@ -118,6 +118,22 @@ struct kvm_arch_memory_slot { > #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 > #define KVM_REG_ARM_32_CRN_SHIFT 11 > > +#define KVM_REG_ARM_32_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U32 | \ > + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) > +#define KVM_REG_ARM_64_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ > + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) > +#define KVM_REG_ARM_TIMER_CTL (KVM_REG_ARM_32_CP15 | \ > + ( 3ULL << KVM_REG_ARM_CRM_SHIFT) | \ > + (14ULL << KVM_REG_ARM_32_CRN_SHIFT) | \ > + ( 0ULL << KVM_REG_ARM_OPC1_SHIFT) | \ > + ( 1ULL << KVM_REG_ARM_32_OPC2_SHIFT)) > +#define KVM_REG_ARM_TIMER_CNT (KVM_REG_ARM_64_CP15 | \ > + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ > + ( 1ULL << KVM_REG_ARM_OPC1_SHIFT)) > +#define KVM_REG_ARM_TIMER_CVAL (KVM_REG_ARM_64_CP15 | \ > + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ > + ( 3ULL << KVM_REG_ARM_OPC1_SHIFT)) > + > /* Normal registers are mapped as coprocessor 16. */ > #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) > #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) > diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c > index c55b608..8d709eb 100644 > --- a/arch/arm/kvm/arch_timer.c > +++ b/arch/arm/kvm/arch_timer.c > @@ -18,6 +18,7 @@ > > #include <linux/cpu.h> > #include <linux/of_irq.h> > +#include <linux/uaccess.h> > #include <linux/kvm.h> > #include <linux/kvm_host.h> > #include <linux/interrupt.h> > @@ -171,6 +172,70 @@ static void kvm_timer_init_interrupt(void *info) > enable_percpu_irq(timer_irq.irq, 0); > } > > +int kvm_arm_num_timer_regs(void) > +{ > + return 3; > +} > + > +int kvm_arm_copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > +{ > + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) > + return -EFAULT; > + uindices++; > + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) > + return -EFAULT; > + uindices++; > + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) > + return -EFAULT; > + > + return 0; > +} > + > +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > +{ > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > + void __user *uaddr = (void __user *)(long)reg->addr; > + u64 val; > + int ret; > + > + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); > + if (ret != 0) > + return ret; > + > + switch (reg->id) { > + case KVM_REG_ARM_TIMER_CTL: > + timer->cntv_ctl = val; > + break; > + case KVM_REG_ARM_TIMER_CNT: > + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - val; > + break; > + case KVM_REG_ARM_TIMER_CVAL: > + timer->cntv_cval = val; > + break; > + } > + > + return 0; > +} > + > +int kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > +{ > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > + void __user *uaddr = (void __user *)(long)reg->addr; > + u64 val; > + > + switch (reg->id) { > + case KVM_REG_ARM_TIMER_CTL: > + val = timer->cntv_ctl; > + break; > + case KVM_REG_ARM_TIMER_CNT: > + val = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; > + break; > + case KVM_REG_ARM_TIMER_CVAL: > + val = timer->cntv_cval; > + break; > + } > + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); > +} > > static int kvm_timer_cpu_notify(struct notifier_block *self, > unsigned long action, void *cpu) > diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c > index 152d036..a50ffb6 100644 > --- a/arch/arm/kvm/guest.c > +++ b/arch/arm/kvm/guest.c > @@ -121,7 +121,8 @@ static unsigned long num_core_regs(void) > */ > unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) > { > - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); > + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) > + + kvm_arm_num_timer_regs(); > } > > /** > @@ -133,6 +134,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > { > unsigned int i; > const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; > + int ret; > > for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { > if (put_user(core_reg | i, uindices)) > @@ -140,9 +142,25 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > uindices++; > } > > + ret = kvm_arm_copy_timer_indices(vcpu, uindices); > + if (ret) > + return ret; > + uindices += kvm_arm_num_timer_regs(); > + > return kvm_arm_copy_coproc_indices(vcpu, uindices); > } > > +static bool is_timer_reg(u64 index) > +{ > + switch (index) { > + case KVM_REG_ARM_TIMER_CTL: > + case KVM_REG_ARM_TIMER_CNT: > + case KVM_REG_ARM_TIMER_CVAL: > + return true; > + } > + return false; > +} > + > int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > { > /* We currently use nothing arch-specific in upper 32 bits */ > @@ -153,6 +171,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) > return get_core_reg(vcpu, reg); > > + if (is_timer_reg(reg->id)) > + return kvm_arm_timer_get_reg(vcpu, reg); > + > return kvm_arm_coproc_get_reg(vcpu, reg); > } > > @@ -166,6 +187,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) > return set_core_reg(vcpu, reg); > > + if (is_timer_reg(reg->id)) > + return kvm_arm_timer_set_reg(vcpu, reg); > + > return kvm_arm_coproc_set_reg(vcpu, reg); > } > > -- This looks good to me! Thanks, -Christoffer
On 11/06/13 16:16, Andre Przywara wrote: > For migration to work we need to save (and later restore) the state of > each core's virtual generic timer. > Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export > the three needed registers (control, counter, compare value). > Though they live in cp15 space, we don't use the existing list, since > they need special accessor functions and also the arch timer is > optional. > > Changes from v1: > - move code out of coproc.c and into guest.c and arch_timer.c > - present the registers with their native CP15 addresses, but without > using space in the VCPU's cp15 array > - do the user space copying in the accessor functions > > Signed-off-by: Andre Przywara <andre.przywara@linaro.org> > --- > arch/arm/include/asm/kvm_host.h | 5 ++++ > arch/arm/include/uapi/asm/kvm.h | 16 ++++++++++ > arch/arm/kvm/arch_timer.c | 65 +++++++++++++++++++++++++++++++++++++++++ > arch/arm/kvm/guest.c | 26 ++++++++++++++++- > 4 files changed, 111 insertions(+), 1 deletion(-) > > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index 57cb786..1096e33 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -224,4 +224,9 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) > int kvm_perf_init(void); > int kvm_perf_teardown(void); > > +int kvm_arm_num_timer_regs(void); > +int kvm_arm_copy_timer_indices(struct kvm_vcpu *, u64 __user *); > +int kvm_arm_timer_get_reg(struct kvm_vcpu *, const struct kvm_one_reg *); > +int kvm_arm_timer_set_reg(struct kvm_vcpu *, const struct kvm_one_reg *); > + > #endif /* __ARM_KVM_HOST_H__ */ > diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h > index c1ee007..e3b0115 100644 > --- a/arch/arm/include/uapi/asm/kvm.h > +++ b/arch/arm/include/uapi/asm/kvm.h > @@ -118,6 +118,22 @@ struct kvm_arch_memory_slot { > #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 > #define KVM_REG_ARM_32_CRN_SHIFT 11 > > +#define KVM_REG_ARM_32_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U32 | \ > + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) > +#define KVM_REG_ARM_64_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ > + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) > +#define KVM_REG_ARM_TIMER_CTL (KVM_REG_ARM_32_CP15 | \ > + ( 3ULL << KVM_REG_ARM_CRM_SHIFT) | \ > + (14ULL << KVM_REG_ARM_32_CRN_SHIFT) | \ > + ( 0ULL << KVM_REG_ARM_OPC1_SHIFT) | \ > + ( 1ULL << KVM_REG_ARM_32_OPC2_SHIFT)) > +#define KVM_REG_ARM_TIMER_CNT (KVM_REG_ARM_64_CP15 | \ > + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ > + ( 1ULL << KVM_REG_ARM_OPC1_SHIFT)) > +#define KVM_REG_ARM_TIMER_CVAL (KVM_REG_ARM_64_CP15 | \ > + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ > + ( 3ULL << KVM_REG_ARM_OPC1_SHIFT)) > + > /* Normal registers are mapped as coprocessor 16. */ > #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) > #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) > diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c > index c55b608..8d709eb 100644 > --- a/arch/arm/kvm/arch_timer.c > +++ b/arch/arm/kvm/arch_timer.c > @@ -18,6 +18,7 @@ > > #include <linux/cpu.h> > #include <linux/of_irq.h> > +#include <linux/uaccess.h> > #include <linux/kvm.h> > #include <linux/kvm_host.h> > #include <linux/interrupt.h> > @@ -171,6 +172,70 @@ static void kvm_timer_init_interrupt(void *info) > enable_percpu_irq(timer_irq.irq, 0); > } > > +int kvm_arm_num_timer_regs(void) > +{ > + return 3; > +} > + > +int kvm_arm_copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > +{ > + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) > + return -EFAULT; > + uindices++; > + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) > + return -EFAULT; > + uindices++; > + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) > + return -EFAULT; So these macros are going to break arm64. Any chance you could introduce them at the same time on both platforms? The rest of the work can be delayed, but you shouldn't break arm64 (you'd expect me to say that, wouldn't you? ;-). Also, I'd like to keep userspace access out of the timer code itself. Low level code shouldn't have to know about that. Can you create proper accessors instead, and move whole userspace access to coproc.c? > + return 0; > +} > + > +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > +{ > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > + void __user *uaddr = (void __user *)(long)reg->addr; > + u64 val; > + int ret; > + > + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); > + if (ret != 0) > + return ret; > + > + switch (reg->id) { > + case KVM_REG_ARM_TIMER_CTL: > + timer->cntv_ctl = val; > + break; > + case KVM_REG_ARM_TIMER_CNT: > + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - val; I just realized what bothers me here: You're computing cntvoff on a per vcpu basis, while this is a VM property. Which means that as you're restoring vcpus, you'll be changing cntvoff - sounds like a bad idea to me. The counter is really global. Do we have a way to handle VM-wide registers? I think Christoffer was trying to some a similar thing with the GIC... > + break; > + case KVM_REG_ARM_TIMER_CVAL: > + timer->cntv_cval = val; > + break; > + } > + > + return 0; > +} > + > +int kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > +{ > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > + void __user *uaddr = (void __user *)(long)reg->addr; > + u64 val; > + > + switch (reg->id) { > + case KVM_REG_ARM_TIMER_CTL: > + val = timer->cntv_ctl; > + break; > + case KVM_REG_ARM_TIMER_CNT: > + val = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; > + break; > + case KVM_REG_ARM_TIMER_CVAL: > + val = timer->cntv_cval; > + break; > + } > + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); > +} > > static int kvm_timer_cpu_notify(struct notifier_block *self, > unsigned long action, void *cpu) > diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c > index 152d036..a50ffb6 100644 > --- a/arch/arm/kvm/guest.c > +++ b/arch/arm/kvm/guest.c > @@ -121,7 +121,8 @@ static unsigned long num_core_regs(void) > */ > unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) > { > - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); > + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) > + + kvm_arm_num_timer_regs(); > } > > /** > @@ -133,6 +134,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > { > unsigned int i; > const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; > + int ret; > > for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { > if (put_user(core_reg | i, uindices)) > @@ -140,9 +142,25 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > uindices++; > } > > + ret = kvm_arm_copy_timer_indices(vcpu, uindices); > + if (ret) > + return ret; > + uindices += kvm_arm_num_timer_regs(); > + > return kvm_arm_copy_coproc_indices(vcpu, uindices); > } > > +static bool is_timer_reg(u64 index) > +{ > + switch (index) { > + case KVM_REG_ARM_TIMER_CTL: > + case KVM_REG_ARM_TIMER_CNT: > + case KVM_REG_ARM_TIMER_CVAL: > + return true; > + } > + return false; > +} > + > int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > { > /* We currently use nothing arch-specific in upper 32 bits */ > @@ -153,6 +171,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) > return get_core_reg(vcpu, reg); > > + if (is_timer_reg(reg->id)) > + return kvm_arm_timer_get_reg(vcpu, reg); > + > return kvm_arm_coproc_get_reg(vcpu, reg); > } > > @@ -166,6 +187,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) > return set_core_reg(vcpu, reg); > > + if (is_timer_reg(reg->id)) > + return kvm_arm_timer_set_reg(vcpu, reg); > + > return kvm_arm_coproc_set_reg(vcpu, reg); > } This is otherwise moving in the right direction. Thanks, M.
On Thu, Jun 20, 2013 at 11:10:48AM +0100, Marc Zyngier wrote: > On 11/06/13 16:16, Andre Przywara wrote: > > For migration to work we need to save (and later restore) the state of > > each core's virtual generic timer. > > Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export > > the three needed registers (control, counter, compare value). > > Though they live in cp15 space, we don't use the existing list, since > > they need special accessor functions and also the arch timer is > > optional. > > > > Changes from v1: > > - move code out of coproc.c and into guest.c and arch_timer.c > > - present the registers with their native CP15 addresses, but without > > using space in the VCPU's cp15 array > > - do the user space copying in the accessor functions > > > > Signed-off-by: Andre Przywara <andre.przywara@linaro.org> > > --- > > arch/arm/include/asm/kvm_host.h | 5 ++++ > > arch/arm/include/uapi/asm/kvm.h | 16 ++++++++++ > > arch/arm/kvm/arch_timer.c | 65 +++++++++++++++++++++++++++++++++++++++++ > > arch/arm/kvm/guest.c | 26 ++++++++++++++++- > > 4 files changed, 111 insertions(+), 1 deletion(-) > > > > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > > index 57cb786..1096e33 100644 > > --- a/arch/arm/include/asm/kvm_host.h > > +++ b/arch/arm/include/asm/kvm_host.h > > @@ -224,4 +224,9 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) > > int kvm_perf_init(void); > > int kvm_perf_teardown(void); > > > > +int kvm_arm_num_timer_regs(void); > > +int kvm_arm_copy_timer_indices(struct kvm_vcpu *, u64 __user *); > > +int kvm_arm_timer_get_reg(struct kvm_vcpu *, const struct kvm_one_reg *); > > +int kvm_arm_timer_set_reg(struct kvm_vcpu *, const struct kvm_one_reg *); > > + > > #endif /* __ARM_KVM_HOST_H__ */ > > diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h > > index c1ee007..e3b0115 100644 > > --- a/arch/arm/include/uapi/asm/kvm.h > > +++ b/arch/arm/include/uapi/asm/kvm.h > > @@ -118,6 +118,22 @@ struct kvm_arch_memory_slot { > > #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 > > #define KVM_REG_ARM_32_CRN_SHIFT 11 > > > > +#define KVM_REG_ARM_32_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U32 | \ > > + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) > > +#define KVM_REG_ARM_64_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ > > + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) > > +#define KVM_REG_ARM_TIMER_CTL (KVM_REG_ARM_32_CP15 | \ > > + ( 3ULL << KVM_REG_ARM_CRM_SHIFT) | \ > > + (14ULL << KVM_REG_ARM_32_CRN_SHIFT) | \ > > + ( 0ULL << KVM_REG_ARM_OPC1_SHIFT) | \ > > + ( 1ULL << KVM_REG_ARM_32_OPC2_SHIFT)) > > +#define KVM_REG_ARM_TIMER_CNT (KVM_REG_ARM_64_CP15 | \ > > + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ > > + ( 1ULL << KVM_REG_ARM_OPC1_SHIFT)) > > +#define KVM_REG_ARM_TIMER_CVAL (KVM_REG_ARM_64_CP15 | \ > > + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ > > + ( 3ULL << KVM_REG_ARM_OPC1_SHIFT)) > > + > > /* Normal registers are mapped as coprocessor 16. */ > > #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) > > #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) > > diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c > > index c55b608..8d709eb 100644 > > --- a/arch/arm/kvm/arch_timer.c > > +++ b/arch/arm/kvm/arch_timer.c > > @@ -18,6 +18,7 @@ > > > > #include <linux/cpu.h> > > #include <linux/of_irq.h> > > +#include <linux/uaccess.h> > > #include <linux/kvm.h> > > #include <linux/kvm_host.h> > > #include <linux/interrupt.h> > > @@ -171,6 +172,70 @@ static void kvm_timer_init_interrupt(void *info) > > enable_percpu_irq(timer_irq.irq, 0); > > } > > > > +int kvm_arm_num_timer_regs(void) > > +{ > > + return 3; > > +} > > + > > +int kvm_arm_copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > > +{ > > + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) > > + return -EFAULT; > > + uindices++; > > + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) > > + return -EFAULT; > > + uindices++; > > + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) > > + return -EFAULT; > > So these macros are going to break arm64. Any chance you could introduce > them at the same time on both platforms? The rest of the work can be > delayed, but you shouldn't break arm64 (you'd expect me to say that, > wouldn't you? ;-). > > Also, I'd like to keep userspace access out of the timer code itself. > Low level code shouldn't have to know about that. Can you create proper > accessors instead, and move whole userspace access to coproc.c? > I'm fine with this, coproc.c or guest.c - either way. > > + return 0; > > +} > > + > > +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > > +{ > > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > > + void __user *uaddr = (void __user *)(long)reg->addr; > > + u64 val; > > + int ret; > > + > > + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); > > + if (ret != 0) > > + return ret; > > + > > + switch (reg->id) { > > + case KVM_REG_ARM_TIMER_CTL: > > + timer->cntv_ctl = val; > > + break; > > + case KVM_REG_ARM_TIMER_CNT: > > + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - val; > > I just realized what bothers me here: You're computing cntvoff on a per > vcpu basis, while this is a VM property. Which means that as you're > restoring vcpus, you'll be changing cntvoff - sounds like a bad idea to me. > > The counter is really global. Do we have a way to handle VM-wide > registers? I think Christoffer was trying to some a similar thing with > the GIC... > We do have a way, but it requires user space to create a device and keep track of the device fd just to set/get a single register, which seems like overkill to me. I suggest you do one of two things: 1. Whenever this value is written, make sure it's written across all vcpus, so guests always have a consistent view of time (be careful about synchronization here). 2. Move the cntvoff value to the vm struct instead, so there's only one offset and a consistent view of time. This may have an adverse effect on the world-switch code performance, but I suspect it would completely disappear in the noise. I dont' feel strongly about either approach. -Christoffer > > + break; > > + case KVM_REG_ARM_TIMER_CVAL: > > + timer->cntv_cval = val; > > + break; > > + } > > + > > + return 0; > > +} > > + > > +int kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > > +{ > > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > > + void __user *uaddr = (void __user *)(long)reg->addr; > > + u64 val; > > + > > + switch (reg->id) { > > + case KVM_REG_ARM_TIMER_CTL: > > + val = timer->cntv_ctl; > > + break; > > + case KVM_REG_ARM_TIMER_CNT: > > + val = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; > > + break; > > + case KVM_REG_ARM_TIMER_CVAL: > > + val = timer->cntv_cval; > > + break; > > + } > > + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); > > +} > > > > static int kvm_timer_cpu_notify(struct notifier_block *self, > > unsigned long action, void *cpu) > > diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c > > index 152d036..a50ffb6 100644 > > --- a/arch/arm/kvm/guest.c > > +++ b/arch/arm/kvm/guest.c > > @@ -121,7 +121,8 @@ static unsigned long num_core_regs(void) > > */ > > unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) > > { > > - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); > > + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) > > + + kvm_arm_num_timer_regs(); > > } > > > > /** > > @@ -133,6 +134,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > > { > > unsigned int i; > > const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; > > + int ret; > > > > for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { > > if (put_user(core_reg | i, uindices)) > > @@ -140,9 +142,25 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) > > uindices++; > > } > > > > + ret = kvm_arm_copy_timer_indices(vcpu, uindices); > > + if (ret) > > + return ret; > > + uindices += kvm_arm_num_timer_regs(); > > + > > return kvm_arm_copy_coproc_indices(vcpu, uindices); > > } > > > > +static bool is_timer_reg(u64 index) > > +{ > > + switch (index) { > > + case KVM_REG_ARM_TIMER_CTL: > > + case KVM_REG_ARM_TIMER_CNT: > > + case KVM_REG_ARM_TIMER_CVAL: > > + return true; > > + } > > + return false; > > +} > > + > > int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > > { > > /* We currently use nothing arch-specific in upper 32 bits */ > > @@ -153,6 +171,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > > if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) > > return get_core_reg(vcpu, reg); > > > > + if (is_timer_reg(reg->id)) > > + return kvm_arm_timer_get_reg(vcpu, reg); > > + > > return kvm_arm_coproc_get_reg(vcpu, reg); > > } > > > > @@ -166,6 +187,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > > if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) > > return set_core_reg(vcpu, reg); > > > > + if (is_timer_reg(reg->id)) > > + return kvm_arm_timer_set_reg(vcpu, reg); > > + > > return kvm_arm_coproc_set_reg(vcpu, reg); > > }
On 20/06/13 18:09, Christoffer Dall wrote: > On Thu, Jun 20, 2013 at 11:10:48AM +0100, Marc Zyngier wrote: >> On 11/06/13 16:16, Andre Przywara wrote: >>> For migration to work we need to save (and later restore) the state of >>> each core's virtual generic timer. >>> Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export >>> the three needed registers (control, counter, compare value). >>> Though they live in cp15 space, we don't use the existing list, since >>> they need special accessor functions and also the arch timer is >>> optional. >>> >>> Changes from v1: >>> - move code out of coproc.c and into guest.c and arch_timer.c >>> - present the registers with their native CP15 addresses, but without >>> using space in the VCPU's cp15 array >>> - do the user space copying in the accessor functions >>> >>> Signed-off-by: Andre Przywara <andre.przywara@linaro.org> >>> --- >>> arch/arm/include/asm/kvm_host.h | 5 ++++ >>> arch/arm/include/uapi/asm/kvm.h | 16 ++++++++++ >>> arch/arm/kvm/arch_timer.c | 65 +++++++++++++++++++++++++++++++++++++++++ >>> arch/arm/kvm/guest.c | 26 ++++++++++++++++- >>> 4 files changed, 111 insertions(+), 1 deletion(-) >>> >>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h >>> index 57cb786..1096e33 100644 >>> --- a/arch/arm/include/asm/kvm_host.h >>> +++ b/arch/arm/include/asm/kvm_host.h >>> @@ -224,4 +224,9 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) >>> int kvm_perf_init(void); >>> int kvm_perf_teardown(void); >>> >>> +int kvm_arm_num_timer_regs(void); >>> +int kvm_arm_copy_timer_indices(struct kvm_vcpu *, u64 __user *); >>> +int kvm_arm_timer_get_reg(struct kvm_vcpu *, const struct kvm_one_reg *); >>> +int kvm_arm_timer_set_reg(struct kvm_vcpu *, const struct kvm_one_reg *); >>> + >>> #endif /* __ARM_KVM_HOST_H__ */ >>> diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h >>> index c1ee007..e3b0115 100644 >>> --- a/arch/arm/include/uapi/asm/kvm.h >>> +++ b/arch/arm/include/uapi/asm/kvm.h >>> @@ -118,6 +118,22 @@ struct kvm_arch_memory_slot { >>> #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 >>> #define KVM_REG_ARM_32_CRN_SHIFT 11 >>> >>> +#define KVM_REG_ARM_32_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U32 | \ >>> + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) >>> +#define KVM_REG_ARM_64_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ >>> + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) >>> +#define KVM_REG_ARM_TIMER_CTL (KVM_REG_ARM_32_CP15 | \ >>> + ( 3ULL << KVM_REG_ARM_CRM_SHIFT) | \ >>> + (14ULL << KVM_REG_ARM_32_CRN_SHIFT) | \ >>> + ( 0ULL << KVM_REG_ARM_OPC1_SHIFT) | \ >>> + ( 1ULL << KVM_REG_ARM_32_OPC2_SHIFT)) >>> +#define KVM_REG_ARM_TIMER_CNT (KVM_REG_ARM_64_CP15 | \ >>> + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ >>> + ( 1ULL << KVM_REG_ARM_OPC1_SHIFT)) >>> +#define KVM_REG_ARM_TIMER_CVAL (KVM_REG_ARM_64_CP15 | \ >>> + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ >>> + ( 3ULL << KVM_REG_ARM_OPC1_SHIFT)) >>> + >>> /* Normal registers are mapped as coprocessor 16. */ >>> #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) >>> #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) >>> diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c >>> index c55b608..8d709eb 100644 >>> --- a/arch/arm/kvm/arch_timer.c >>> +++ b/arch/arm/kvm/arch_timer.c >>> @@ -18,6 +18,7 @@ >>> >>> #include <linux/cpu.h> >>> #include <linux/of_irq.h> >>> +#include <linux/uaccess.h> >>> #include <linux/kvm.h> >>> #include <linux/kvm_host.h> >>> #include <linux/interrupt.h> >>> @@ -171,6 +172,70 @@ static void kvm_timer_init_interrupt(void *info) >>> enable_percpu_irq(timer_irq.irq, 0); >>> } >>> >>> +int kvm_arm_num_timer_regs(void) >>> +{ >>> + return 3; >>> +} >>> + >>> +int kvm_arm_copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) >>> +{ >>> + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) >>> + return -EFAULT; >>> + uindices++; >>> + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) >>> + return -EFAULT; >>> + uindices++; >>> + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) >>> + return -EFAULT; >> >> So these macros are going to break arm64. Any chance you could introduce >> them at the same time on both platforms? The rest of the work can be >> delayed, but you shouldn't break arm64 (you'd expect me to say that, >> wouldn't you? ;-). >> >> Also, I'd like to keep userspace access out of the timer code itself. >> Low level code shouldn't have to know about that. Can you create proper >> accessors instead, and move whole userspace access to coproc.c? >> > > I'm fine with this, coproc.c or guest.c - either way. > >>> + return 0; >>> +} >>> + >>> +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >>> +{ >>> + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; >>> + void __user *uaddr = (void __user *)(long)reg->addr; >>> + u64 val; >>> + int ret; >>> + >>> + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); >>> + if (ret != 0) >>> + return ret; >>> + >>> + switch (reg->id) { >>> + case KVM_REG_ARM_TIMER_CTL: >>> + timer->cntv_ctl = val; >>> + break; >>> + case KVM_REG_ARM_TIMER_CNT: >>> + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - val; >> >> I just realized what bothers me here: You're computing cntvoff on a per >> vcpu basis, while this is a VM property. Which means that as you're >> restoring vcpus, you'll be changing cntvoff - sounds like a bad idea to me. >> >> The counter is really global. Do we have a way to handle VM-wide >> registers? I think Christoffer was trying to some a similar thing with >> the GIC... >> > > We do have a way, but it requires user space to create a device and keep > track of the device fd just to set/get a single register, which seems > like overkill to me. > > I suggest you do one of two things: > 1. Whenever this value is written, make sure it's written across all > vcpus, so guests always have a consistent view of time (be careful > about synchronization here). > 2. Move the cntvoff value to the vm struct instead, so there's only one > offset and a consistent view of time. This may have an adverse > effect on the world-switch code performance, but I suspect it would > completely disappear in the noise. > > I dont' feel strongly about either approach. So it turns out I've completely forgotten about that - cntvoff is already per-VM (the indirection shows it). Doh. So there is just one thing we absolutely need to make sure here: no vcpu can run before they've all had their timer restored, and hence a stable cntvoff. Otherwise two vcpus will have a different view of time. Can we guarantee this? M.
[...] > >>> +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) > >>> +{ > >>> + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > >>> + void __user *uaddr = (void __user *)(long)reg->addr; > >>> + u64 val; > >>> + int ret; > >>> + > >>> + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); > >>> + if (ret != 0) > >>> + return ret; > >>> + > >>> + switch (reg->id) { > >>> + case KVM_REG_ARM_TIMER_CTL: > >>> + timer->cntv_ctl = val; > >>> + break; > >>> + case KVM_REG_ARM_TIMER_CNT: > >>> + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - val; > >> > >> I just realized what bothers me here: You're computing cntvoff on a per > >> vcpu basis, while this is a VM property. Which means that as you're > >> restoring vcpus, you'll be changing cntvoff - sounds like a bad idea to me. > >> > >> The counter is really global. Do we have a way to handle VM-wide > >> registers? I think Christoffer was trying to some a similar thing with > >> the GIC... > >> > > > > We do have a way, but it requires user space to create a device and keep > > track of the device fd just to set/get a single register, which seems > > like overkill to me. > > > > I suggest you do one of two things: > > 1. Whenever this value is written, make sure it's written across all > > vcpus, so guests always have a consistent view of time (be careful > > about synchronization here). > > 2. Move the cntvoff value to the vm struct instead, so there's only one > > offset and a consistent view of time. This may have an adverse > > effect on the world-switch code performance, but I suspect it would > > completely disappear in the noise. > > > > I dont' feel strongly about either approach. > > So it turns out I've completely forgotten about that - cntvoff is > already per-VM (the indirection shows it). Doh. right.... ahem. > > So there is just one thing we absolutely need to make sure here: no vcpu > can run before they've all had their timer restored, and hence a stable > cntvoff. Otherwise two vcpus will have a different view of time. > > Can we guarantee this? > Do we need to? User space is free to modify time and all sort of other registers at any point during VM execution - it will just break the guest that it's running. I think the key here is that we expect the VM to be stopped for all save/restore operations (we can enforce it if we want to, which I am going to for the VGIC state, because we don't want to interfere with consistent state being written to the hardware). -Christoffer
On 20/06/13 19:32, Christoffer Dall wrote: >> So there is just one thing we absolutely need to make sure here: no vcpu >> can run before they've all had their timer restored, and hence a stable >> cntvoff. Otherwise two vcpus will have a different view of time. >> >> Can we guarantee this? >> > > Do we need to? User space is free to modify time and all sort of other > registers at any point during VM execution - it will just break the > guest that it's running. > > I think the key here is that we expect the VM to be stopped for all > save/restore operations (we can enforce it if we want to, which I am > going to for the VGIC state, because we don't want to interfere with > consistent state being written to the hardware). If that's what we expect, then it's fine by me. M.
On 20 June 2013 19:32, Christoffer Dall <christoffer.dall@linaro.org> wrote: > Marc wrote: >> So there is just one thing we absolutely need to make sure here: no vcpu >> can run before they've all had their timer restored, and hence a stable >> cntvoff. Otherwise two vcpus will have a different view of time. >> >> Can we guarantee this? > Do we need to? User space is free to modify time and all sort of other > registers at any point during VM execution - it will just break the > guest that it's running. Note that QEMU will stop all CPUs before doing a migration or similar operation. However there is a monitor command to query the current CPU registers etc which won't try to stop the VM first. So we might try to read vcpu registers (though I hope we don't allow writing them). -- PMM
On Thu, Jun 20, 2013 at 08:29:30PM +0100, Peter Maydell wrote: > On 20 June 2013 19:32, Christoffer Dall <christoffer.dall@linaro.org> wrote: > > Marc wrote: > >> So there is just one thing we absolutely need to make sure here: no vcpu > >> can run before they've all had their timer restored, and hence a stable > >> cntvoff. Otherwise two vcpus will have a different view of time. > >> > >> Can we guarantee this? > > > Do we need to? User space is free to modify time and all sort of other > > registers at any point during VM execution - it will just break the > > guest that it's running. > > Note that QEMU will stop all CPUs before doing a migration or > similar operation. However there is a monitor command to query > the current CPU registers etc which won't try to stop the VM > first. So we might try to read vcpu registers (though I hope we > don't allow writing them). > Sounds like we need to add a -EBUSY return on SET_ONE_REG if the VM is running. -Christoffer
On 20.06.2013, at 22:37, Christoffer Dall wrote: > On Thu, Jun 20, 2013 at 08:29:30PM +0100, Peter Maydell wrote: >> On 20 June 2013 19:32, Christoffer Dall <christoffer.dall@linaro.org> wrote: >>> Marc wrote: >>>> So there is just one thing we absolutely need to make sure here: no vcpu >>>> can run before they've all had their timer restored, and hence a stable >>>> cntvoff. Otherwise two vcpus will have a different view of time. >>>> >>>> Can we guarantee this? >> >>> Do we need to? User space is free to modify time and all sort of other >>> registers at any point during VM execution - it will just break the >>> guest that it's running. >> >> Note that QEMU will stop all CPUs before doing a migration or >> similar operation. However there is a monitor command to query >> the current CPU registers etc which won't try to stop the VM >> first. So we might try to read vcpu registers (though I hope we >> don't allow writing them). >> > Sounds like we need to add a -EBUSY return on SET_ONE_REG if the VM is > running. The ONE_REG API should already be protected here, as it does vcpu_load() in kvm_vcpu_ioctl(). So a separate thread can't possibly do ONE_REG accesses while another thread has the same vcpu running. Alex
On 20 June 2013 22:55, Alexander Graf <agraf@suse.de> wrote: > > On 20.06.2013, at 22:37, Christoffer Dall wrote: > >> On Thu, Jun 20, 2013 at 08:29:30PM +0100, Peter Maydell wrote: >>> On 20 June 2013 19:32, Christoffer Dall <christoffer.dall@linaro.org> wrote: >>>> Marc wrote: >>>>> So there is just one thing we absolutely need to make sure here: no vcpu >>>>> can run before they've all had their timer restored, and hence a stable >>>>> cntvoff. Otherwise two vcpus will have a different view of time. >>>>> >>>>> Can we guarantee this? >>> >>>> Do we need to? User space is free to modify time and all sort of other >>>> registers at any point during VM execution - it will just break the >>>> guest that it's running. >>> >>> Note that QEMU will stop all CPUs before doing a migration or >>> similar operation. However there is a monitor command to query >>> the current CPU registers etc which won't try to stop the VM >>> first. So we might try to read vcpu registers (though I hope we >>> don't allow writing them). >>> >> Sounds like we need to add a -EBUSY return on SET_ONE_REG if the VM is >> running. > > The ONE_REG API should already be protected here, as it does > vcpu_load() in kvm_vcpu_ioctl(). So a separate thread can't possibly > do ONE_REG accesses while another thread has the same vcpu running. Doesn't protect you against confusion due to another thread running a different vcpu in the same vm, though. thanks -- PMM
On 20.06.2013, at 23:59, Peter Maydell wrote: > On 20 June 2013 22:55, Alexander Graf <agraf@suse.de> wrote: >> >> On 20.06.2013, at 22:37, Christoffer Dall wrote: >> >>> On Thu, Jun 20, 2013 at 08:29:30PM +0100, Peter Maydell wrote: >>>> On 20 June 2013 19:32, Christoffer Dall <christoffer.dall@linaro.org> wrote: >>>>> Marc wrote: >>>>>> So there is just one thing we absolutely need to make sure here: no vcpu >>>>>> can run before they've all had their timer restored, and hence a stable >>>>>> cntvoff. Otherwise two vcpus will have a different view of time. >>>>>> >>>>>> Can we guarantee this? >>>> >>>>> Do we need to? User space is free to modify time and all sort of other >>>>> registers at any point during VM execution - it will just break the >>>>> guest that it's running. >>>> >>>> Note that QEMU will stop all CPUs before doing a migration or >>>> similar operation. However there is a monitor command to query >>>> the current CPU registers etc which won't try to stop the VM >>>> first. So we might try to read vcpu registers (though I hope we >>>> don't allow writing them). >>>> >>> Sounds like we need to add a -EBUSY return on SET_ONE_REG if the VM is >>> running. >> >> The ONE_REG API should already be protected here, as it does >> vcpu_load() in kvm_vcpu_ioctl(). So a separate thread can't possibly >> do ONE_REG accesses while another thread has the same vcpu running. > > Doesn't protect you against confusion due to another thread running > a different vcpu in the same vm, though. Ah, different ONE_REG API. Can't you just notify all vcpus to exit and refresh their timers? That's what kvm_make_request() is there for, no? Alex
On Fri, Jun 21, 2013 at 12:02:02AM +0200, Alexander Graf wrote: > > On 20.06.2013, at 23:59, Peter Maydell wrote: > > > On 20 June 2013 22:55, Alexander Graf <agraf@suse.de> wrote: > >> > >> On 20.06.2013, at 22:37, Christoffer Dall wrote: > >> > >>> On Thu, Jun 20, 2013 at 08:29:30PM +0100, Peter Maydell wrote: > >>>> On 20 June 2013 19:32, Christoffer Dall <christoffer.dall@linaro.org> wrote: > >>>>> Marc wrote: > >>>>>> So there is just one thing we absolutely need to make sure here: no vcpu > >>>>>> can run before they've all had their timer restored, and hence a stable > >>>>>> cntvoff. Otherwise two vcpus will have a different view of time. > >>>>>> > >>>>>> Can we guarantee this? > >>>> > >>>>> Do we need to? User space is free to modify time and all sort of other > >>>>> registers at any point during VM execution - it will just break the > >>>>> guest that it's running. > >>>> > >>>> Note that QEMU will stop all CPUs before doing a migration or > >>>> similar operation. However there is a monitor command to query > >>>> the current CPU registers etc which won't try to stop the VM > >>>> first. So we might try to read vcpu registers (though I hope we > >>>> don't allow writing them). > >>>> > >>> Sounds like we need to add a -EBUSY return on SET_ONE_REG if the VM is > >>> running. > >> > >> The ONE_REG API should already be protected here, as it does > >> vcpu_load() in kvm_vcpu_ioctl(). So a separate thread can't possibly > >> do ONE_REG accesses while another thread has the same vcpu running. > > > > Doesn't protect you against confusion due to another thread running > > a different vcpu in the same vm, though. > > Ah, different ONE_REG API. Can't you just notify all vcpus to exit and refresh their timers? That's what kvm_make_request() is there for, no? > yes you can, but I don't think it's worth the trouble to add the code in the kernel to fix a case where user space does something completely broken, which does not muck with the hardware or host state, but can only break the guest. I didn't realize that ONE_REG does vcpu_load() (or, I probably did once, and forgot) so that means we're good. Conclusion on this patch: address Marc's comment to move the user space interface handling out of arch_timer.c and we should be good. Thanks, -Christoffer
Hi Marc, >> ... >> >> +int kvm_arm_num_timer_regs(void) >> +{ >> + return 3; >> +} >> + >> +int kvm_arm_copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) >> +{ >> + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) >> + return -EFAULT; >> + uindices++; >> + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) >> + return -EFAULT; >> + uindices++; >> + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) >> + return -EFAULT; > > So these macros are going to break arm64. Any chance you could introduce > them at the same time on both platforms? The rest of the work can be > delayed, but you shouldn't break arm64 (you'd expect me to say that, > wouldn't you? ;-). Is that just due to KVM_REG_ARM instead of KVM_REG_ARM64? Or do you expect the numbering to be completely different since there is no mrc/mcr anymore (IIRC)? Is put_user an issue here (should not, right?) Is there already a document describing arch timer access on AARCH64? If I am thinking in a totally wrong direction, please bear with me and feel free to point me to the right one ;-) /me is now looking at getting a cross compiler to see what you mean... > Also, I'd like to keep userspace access out of the timer code itself. > Low level code shouldn't have to know about that. Can you create proper > accessors instead, and move whole userspace access to coproc.c? IIRC Christoffer recommended to keep this code completely out of coproc.c ;-) This also helps to keep coproc.c clean of ARCH_TIMER ifdefs in coproc.c (which I completely forgot in this version, btw, but that's already fixed). > >> + return 0; >> +} >> + >> +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >> +{ >> + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; >> + void __user *uaddr = (void __user *)(long)reg->addr; >> + u64 val; >> + int ret; >> + >> + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); >> + if (ret != 0) >> + return ret; >> + >> + switch (reg->id) { >> + case KVM_REG_ARM_TIMER_CTL: >> + timer->cntv_ctl = val; >> + break; >> + case KVM_REG_ARM_TIMER_CNT: >> + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - val; > > I just realized what bothers me here: You're computing cntvoff on a per > vcpu basis, while this is a VM property. Which means that as you're > restoring vcpus, you'll be changing cntvoff - sounds like a bad idea to me. > > The counter is really global. Do we have a way to handle VM-wide > registers? I think Christoffer was trying to some a similar thing with > the GIC... So the consensus of this discussion was just to block writing when the VCPU is running, right? Or is there something else? Regards, Andre. > >> + break; >> + case KVM_REG_ARM_TIMER_CVAL: >> + timer->cntv_cval = val; >> + break; >> + } >> + >> + return 0; >> +} >> + >> +int kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >> +{ >> + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; >> + void __user *uaddr = (void __user *)(long)reg->addr; >> + u64 val; >> + >> + switch (reg->id) { >> + case KVM_REG_ARM_TIMER_CTL: >> + val = timer->cntv_ctl; >> + break; >> + case KVM_REG_ARM_TIMER_CNT: >> + val = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; >> + break; >> + case KVM_REG_ARM_TIMER_CVAL: >> + val = timer->cntv_cval; >> + break; >> + } >> + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); >> +} >> >> static int kvm_timer_cpu_notify(struct notifier_block *self, >> unsigned long action, void *cpu) >> diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c >> index 152d036..a50ffb6 100644 >> --- a/arch/arm/kvm/guest.c >> +++ b/arch/arm/kvm/guest.c >> @@ -121,7 +121,8 @@ static unsigned long num_core_regs(void) >> */ >> unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) >> { >> - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); >> + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) >> + + kvm_arm_num_timer_regs(); >> } >> >> /** >> @@ -133,6 +134,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) >> { >> unsigned int i; >> const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; >> + int ret; >> >> for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { >> if (put_user(core_reg | i, uindices)) >> @@ -140,9 +142,25 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) >> uindices++; >> } >> >> + ret = kvm_arm_copy_timer_indices(vcpu, uindices); >> + if (ret) >> + return ret; >> + uindices += kvm_arm_num_timer_regs(); >> + >> return kvm_arm_copy_coproc_indices(vcpu, uindices); >> } >> >> +static bool is_timer_reg(u64 index) >> +{ >> + switch (index) { >> + case KVM_REG_ARM_TIMER_CTL: >> + case KVM_REG_ARM_TIMER_CNT: >> + case KVM_REG_ARM_TIMER_CVAL: >> + return true; >> + } >> + return false; >> +} >> + >> int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >> { >> /* We currently use nothing arch-specific in upper 32 bits */ >> @@ -153,6 +171,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >> if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) >> return get_core_reg(vcpu, reg); >> >> + if (is_timer_reg(reg->id)) >> + return kvm_arm_timer_get_reg(vcpu, reg); >> + >> return kvm_arm_coproc_get_reg(vcpu, reg); >> } >> >> @@ -166,6 +187,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >> if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) >> return set_core_reg(vcpu, reg); >> >> + if (is_timer_reg(reg->id)) >> + return kvm_arm_timer_set_reg(vcpu, reg); >> + >> return kvm_arm_coproc_set_reg(vcpu, reg); >> } > > This is otherwise moving in the right direction. > > Thanks, > > M. >
On 05/07/13 15:08, Andre Przywara wrote: > Hi Marc, > > >> ... >>> >>> +int kvm_arm_num_timer_regs(void) >>> +{ >>> + return 3; >>> +} >>> + >>> +int kvm_arm_copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) >>> +{ >>> + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) >>> + return -EFAULT; >>> + uindices++; >>> + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) >>> + return -EFAULT; >>> + uindices++; >>> + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) >>> + return -EFAULT; >> >> So these macros are going to break arm64. Any chance you could introduce >> them at the same time on both platforms? The rest of the work can be >> delayed, but you shouldn't break arm64 (you'd expect me to say that, >> wouldn't you? ;-). > > Is that just due to KVM_REG_ARM instead of KVM_REG_ARM64? > Or do you expect the numbering to be completely different since there is > no mrc/mcr anymore (IIRC)? Both. The encoding is different (32bit is encoded CRn/CRm/Op1/Op2, and 64bit is Op0/Op1/CRn/CRm/Op2), and the KVM_REG_ARM64 is different too. > Is put_user an issue here (should not, right?) No, except for the reason outlined below. > Is there already a document describing arch timer access on AARCH64? No, but it is strikingly similar to the AArch32. Have a look at arch/arm64/include/asm/arch_timer.h and arch/arm64/kvm/hyp.S for details. > If I am thinking in a totally wrong direction, please bear with me and > feel free to point me to the right one ;-) > /me is now looking at getting a cross compiler to see what you mean... That'd be a good thing! ;-) >> Also, I'd like to keep userspace access out of the timer code itself. >> Low level code shouldn't have to know about that. Can you create proper >> accessors instead, and move whole userspace access to coproc.c? > > IIRC Christoffer recommended to keep this code completely out of > coproc.c ;-) This also helps to keep coproc.c clean of ARCH_TIMER ifdefs > in coproc.c (which I completely forgot in this version, btw, but that's > already fixed). I thought we agreed on moving the userspace access out of the timer code. In a reply to the email you just quoted, Christoffer says: "I'm fine with this, coproc.c or guest.c - either way.". The man has spoken! ;-) >> >>> + return 0; >>> +} >>> + >>> +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) >>> +{ >>> + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; >>> + void __user *uaddr = (void __user *)(long)reg->addr; >>> + u64 val; >>> + int ret; >>> + >>> + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); >>> + if (ret != 0) >>> + return ret; >>> + >>> + switch (reg->id) { >>> + case KVM_REG_ARM_TIMER_CTL: >>> + timer->cntv_ctl = val; >>> + break; >>> + case KVM_REG_ARM_TIMER_CNT: >>> + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - val; >> >> I just realized what bothers me here: You're computing cntvoff on a per >> vcpu basis, while this is a VM property. Which means that as you're >> restoring vcpus, you'll be changing cntvoff - sounds like a bad idea to me. >> >> The counter is really global. Do we have a way to handle VM-wide >> registers? I think Christoffer was trying to some a similar thing with >> the GIC... > > So the consensus of this discussion was just to block writing when the > VCPU is running, right? Or is there something else? Yup. No update while vcpus are running. M.
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 57cb786..1096e33 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -224,4 +224,9 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); +int kvm_arm_num_timer_regs(void); +int kvm_arm_copy_timer_indices(struct kvm_vcpu *, u64 __user *); +int kvm_arm_timer_get_reg(struct kvm_vcpu *, const struct kvm_one_reg *); +int kvm_arm_timer_set_reg(struct kvm_vcpu *, const struct kvm_one_reg *); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c1ee007..e3b0115 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -118,6 +118,22 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +#define KVM_REG_ARM_32_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U32 | \ + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) +#define KVM_REG_ARM_64_CP15 (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ + (15ULL << KVM_REG_ARM_COPROC_SHIFT)) +#define KVM_REG_ARM_TIMER_CTL (KVM_REG_ARM_32_CP15 | \ + ( 3ULL << KVM_REG_ARM_CRM_SHIFT) | \ + (14ULL << KVM_REG_ARM_32_CRN_SHIFT) | \ + ( 0ULL << KVM_REG_ARM_OPC1_SHIFT) | \ + ( 1ULL << KVM_REG_ARM_32_OPC2_SHIFT)) +#define KVM_REG_ARM_TIMER_CNT (KVM_REG_ARM_64_CP15 | \ + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ + ( 1ULL << KVM_REG_ARM_OPC1_SHIFT)) +#define KVM_REG_ARM_TIMER_CVAL (KVM_REG_ARM_64_CP15 | \ + (14ULL << KVM_REG_ARM_CRM_SHIFT) | \ + ( 3ULL << KVM_REG_ARM_OPC1_SHIFT)) + /* Normal registers are mapped as coprocessor 16. */ #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c index c55b608..8d709eb 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/arch/arm/kvm/arch_timer.c @@ -18,6 +18,7 @@ #include <linux/cpu.h> #include <linux/of_irq.h> +#include <linux/uaccess.h> #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/interrupt.h> @@ -171,6 +172,70 @@ static void kvm_timer_init_interrupt(void *info) enable_percpu_irq(timer_irq.irq, 0); } +int kvm_arm_num_timer_regs(void) +{ + return 3; +} + +int kvm_arm_copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + switch (reg->id) { + case KVM_REG_ARM_TIMER_CTL: + timer->cntv_ctl = val; + break; + case KVM_REG_ARM_TIMER_CNT: + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - val; + break; + case KVM_REG_ARM_TIMER_CVAL: + timer->cntv_cval = val; + break; + } + + return 0; +} + +int kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + switch (reg->id) { + case KVM_REG_ARM_TIMER_CTL: + val = timer->cntv_ctl; + break; + case KVM_REG_ARM_TIMER_CNT: + val = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + break; + case KVM_REG_ARM_TIMER_CVAL: + val = timer->cntv_cval; + break; + } + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} static int kvm_timer_cpu_notify(struct notifier_block *self, unsigned long action, void *cpu) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 152d036..a50ffb6 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -121,7 +121,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) + + kvm_arm_num_timer_regs(); } /** @@ -133,6 +134,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { if (put_user(core_reg | i, uindices)) @@ -140,9 +142,25 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = kvm_arm_copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += kvm_arm_num_timer_regs(); + return kvm_arm_copy_coproc_indices(vcpu, uindices); } +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* We currently use nothing arch-specific in upper 32 bits */ @@ -153,6 +171,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return kvm_arm_timer_get_reg(vcpu, reg); + return kvm_arm_coproc_get_reg(vcpu, reg); } @@ -166,6 +187,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return kvm_arm_timer_set_reg(vcpu, reg); + return kvm_arm_coproc_set_reg(vcpu, reg); }
For migration to work we need to save (and later restore) the state of each core's virtual generic timer. Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export the three needed registers (control, counter, compare value). Though they live in cp15 space, we don't use the existing list, since they need special accessor functions and also the arch timer is optional. Changes from v1: - move code out of coproc.c and into guest.c and arch_timer.c - present the registers with their native CP15 addresses, but without using space in the VCPU's cp15 array - do the user space copying in the accessor functions Signed-off-by: Andre Przywara <andre.przywara@linaro.org> --- arch/arm/include/asm/kvm_host.h | 5 ++++ arch/arm/include/uapi/asm/kvm.h | 16 ++++++++++ arch/arm/kvm/arch_timer.c | 65 +++++++++++++++++++++++++++++++++++++++++ arch/arm/kvm/guest.c | 26 ++++++++++++++++- 4 files changed, 111 insertions(+), 1 deletion(-)