Message ID | 20220930210751.225873-3-scgl@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg | expand |
Hi Janis,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on f76349cf41451c5c42a99f18a9163377e4b364ff]
url: https://github.com/intel-lab-lkp/linux/commits/Janis-Schoetterl-Glausch/KVM-s390-Extend-MEM_OP-ioctl-by-storage-key-checked-cmpxchg/20221001-050945
base: f76349cf41451c5c42a99f18a9163377e4b364ff
config: s390-randconfig-m031-20220925
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/b53d129604de03147fce1d353698f961b256f895
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Janis-Schoetterl-Glausch/KVM-s390-Extend-MEM_OP-ioctl-by-storage-key-checked-cmpxchg/20221001-050945
git checkout b53d129604de03147fce1d353698f961b256f895
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
arch/s390/include/asm/uaccess.h: Assembler messages:
>> arch/s390/include/asm/uaccess.h:430: Error: Unrecognized opcode: `xrk'
arch/s390/include/asm/uaccess.h:434: Error: Unrecognized opcode: `xrk'
vim +430 arch/s390/include/asm/uaccess.h
110a6dbb2eca6b Heiko Carstens 2020-09-14 394
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 395 static __always_inline int __cmpxchg_user_key_small(int size, u64 address,
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 396 unsigned __int128 *old_p,
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 397 unsigned __int128 new, u8 access_key)
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 398 {
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 399 u32 shift, mask, old_word, new_word, align_mask, tmp, diff;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 400 u64 aligned;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 401 int ret = -EFAULT;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 402
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 403 switch (size) {
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 404 case 2:
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 405 align_mask = 2;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 406 aligned = (address ^ (address & align_mask));
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 407 shift = (sizeof(u32) - (address & align_mask) - size) * 8;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 408 mask = 0xffff << shift;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 409 old_word = ((u16)*old_p) << shift;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 410 new_word = ((u16)new) << shift;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 411 break;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 412 case 1:
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 413 align_mask = 3;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 414 aligned = (address ^ (address & align_mask));
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 415 shift = (sizeof(u32) - (address & align_mask) - size) * 8;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 416 mask = 0xff << shift;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 417 old_word = ((u8)*old_p) << shift;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 418 new_word = ((u8)new) << shift;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 419 break;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 420 }
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 421 asm volatile(
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 422 "spka 0(%[access_key])\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 423 " sacf 256\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 424 "0: l %[tmp],%[aligned]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 425 "1: nr %[tmp],%[hole_mask]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 426 " or %[new_word],%[tmp]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 427 " or %[old_word],%[tmp]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 428 " lr %[tmp],%[old_word]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 429 "2: cs %[tmp],%[new_word],%[aligned]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 @430 "3: jnl 4f\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 431 " xrk %[diff],%[tmp],%[old_word]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 432 " nr %[diff],%[hole_mask]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 433 " xr %[new_word],%[diff]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 434 " xr %[old_word],%[diff]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 435 " xrk %[diff],%[tmp],%[old_word]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 436 " jz 2b\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 437 "4: ipm %[ret]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 438 " srl %[ret],28\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 439 "5: sacf 768\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 440 " spka %[default_key]\n"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 441 EX_TABLE(0b, 5b) EX_TABLE(1b, 5b)
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 442 EX_TABLE(2b, 5b) EX_TABLE(3b, 5b)
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 443 : [old_word] "+&d" (old_word),
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 444 [new_word] "+&d" (new_word),
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 445 [tmp] "=&d" (tmp),
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 446 [aligned] "+Q" (*(u32 *)aligned),
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 447 [diff] "=&d" (diff),
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 448 [ret] "+d" (ret)
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 449 : [access_key] "a" (access_key << 4),
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 450 [hole_mask] "d" (~mask),
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 451 [default_key] "J" (PAGE_DEFAULT_KEY)
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 452 : "cc"
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 453 );
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 454 *old_p = (tmp & mask) >> shift;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 455 return ret;
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 456 }
db6f2eb3910899 Janis Schoetterl-Glausch 2022-09-30 457
On 30/09/2022 23.07, Janis Schoetterl-Glausch wrote: > User space can use the MEM_OP ioctl to make storage key checked reads > and writes to the guest, however, it has no way of performing atomic, > key checked, accesses to the guest. > Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg > mode. For now, support this mode for absolute accesses only. > > This mode can be use, for example, to set the device-state-change > indicator and the adapter-local-summary indicator atomically. > > Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com> > --- ... > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index eed0315a77a6..b856705f3f6b 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -580,7 +580,9 @@ struct kvm_translation { > struct kvm_s390_mem_op { > /* in */ > __u64 gaddr; /* the guest address */ > + /* in & out */ > __u64 flags; /* flags */ > + /* in */ > __u32 size; /* amount of bytes */ > __u32 op; /* type of operation */ > __u64 buf; /* buffer in userspace */ > @@ -588,6 +590,8 @@ struct kvm_s390_mem_op { > struct { > __u8 ar; /* the access register number */ > __u8 key; /* access key, ignored if flag unset */ > + /* in & out */ > + __u64 old[2]; /* ignored if flag unset */ The alignment looks very unfortunate now ... could you please add a "__u8 pad[6]" or "__u8 pad[14]" in front of the new field? > }; > __u32 sida_offset; /* offset into the sida */ > __u8 reserved[32]; /* ignored */ > @@ -604,6 +608,7 @@ struct kvm_s390_mem_op { > #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) > #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) > #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) > +#define KVM_S390_MEMOP_F_CMPXCHG (1ULL << 3) > > /* for KVM_INTERRUPT */ > struct kvm_interrupt { > diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h > index 9408d6cc8e2c..a1cb66ae0995 100644 > --- a/arch/s390/kvm/gaccess.h > +++ b/arch/s390/kvm/gaccess.h > @@ -206,6 +206,10 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, > int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, > void *data, unsigned long len, enum gacc_mode mode); > > +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, > + unsigned __int128 *old, > + unsigned __int128 new, u8 access_key); > + > /** > * write_guest_with_key - copy data from kernel space to guest space > * @vcpu: virtual cpu > diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c > index 0243b6e38d36..c0e490ecc372 100644 > --- a/arch/s390/kvm/gaccess.c > +++ b/arch/s390/kvm/gaccess.c > @@ -1161,6 +1161,62 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, > return rc; > } > > +/** > + * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. > + * @kvm: Virtual machine instance. > + * @gpa: Absolute guest address of the location to be changed. > + * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a > + * non power of two will result in failure. > + * @old_p: Pointer to old value. If the location at @gpa contains this value, the > + * exchange will succeed. After calling cmpxchg_guest_abs_with_key() *@old > + * contains the value at @gpa before the attempt to exchange the value. > + * @new: The value to place at @gpa. > + * @access_key: The access key to use for the guest access. > + * > + * Atomically exchange the value at @gpa by @new, if it contains *@old. > + * Honors storage keys. > + * > + * Return: * 0: successful exchange > + * * 1: exchange unsuccessful > + * * a program interruption code indicating the reason cmpxchg could > + * not be attempted > + * * -EINVAL: address misaligned or len not power of two > + */ > +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, > + unsigned __int128 *old_p, unsigned __int128 new, > + u8 access_key) > +{ > + gfn_t gfn = gpa >> PAGE_SHIFT; > + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); > + bool writable; > + hva_t hva; > + int ret; > + > + if (!IS_ALIGNED(gpa, len)) > + return -EINVAL; > + > + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); > + if (kvm_is_error_hva(hva)) > + return PGM_ADDRESSING; > + /* > + * Check if it's a ro memslot, even tho that can't occur (they're unsupported). Not everybody is used to read such abbreviated English ... I'd recommend to spell it rather properly (ro ==> read-only, tho ==> though) > + * Don't try to actually handle that case. > + */ > + if (!writable) > + return -EOPNOTSUPP; > + > + hva += offset_in_page(gpa); > + ret = cmpxchg_user_key_size(len, (void __user *)hva, old_p, new, access_key); > + mark_page_dirty_in_slot(kvm, slot, gfn); > + /* > + * Assume that the fault is caused by key protection, the alternative > + * is that the user page is write protected. > + */ > + if (ret == -EFAULT) > + ret = PGM_PROTECTION; > + return ret; > +} > + > /** > * guest_translate_address_with_key - translate guest logical into guest absolute address > * @vcpu: virtual cpu > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index b7ef0b71014d..d594d1318d2a 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -576,7 +576,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_S390_VCPU_RESETS: > case KVM_CAP_SET_GUEST_DEBUG: > case KVM_CAP_S390_DIAG318: > - case KVM_CAP_S390_MEM_OP_EXTENSION: > r = 1; > break; > case KVM_CAP_SET_GUEST_DEBUG2: > @@ -590,6 +589,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_S390_MEM_OP: > r = MEM_OP_MAX_SIZE; > break; > + case KVM_CAP_S390_MEM_OP_EXTENSION: > + r = 0x3; Add a comment to explain that magic value 0x3 ? > + break; > case KVM_CAP_NR_VCPUS: > case KVM_CAP_MAX_VCPUS: > case KVM_CAP_MAX_VCPU_ID: Thomas
On 30/09/2022 23.07, Janis Schoetterl-Glausch wrote: > User space can use the MEM_OP ioctl to make storage key checked reads > and writes to the guest, however, it has no way of performing atomic, > key checked, accesses to the guest. > Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg > mode. For now, support this mode for absolute accesses only. > > This mode can be use, for example, to set the device-state-change > indicator and the adapter-local-summary indicator atomically. > > Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com> > --- > > > The return value of MEM_OP is: > 0 on success, > < 0 on generic error (e.g. -EFAULT or -ENOMEM), > > 0 if an exception occurred while walking the page tables > A cmpxchg failing because the old value doesn't match is neither an > error nor an exception, so the question is how best to signal that > condition. This is not strictly necessary since user space can compare > the value of old after the MEM_OP with the value it set. If they're > different the cmpxchg failed. It might be a better user interface if > there is an easier way to see if the cmpxchg failed. > This patch sets the cmpxchg flag bit to 0 on a successful cmpxchg. > This way you can compare against a constant instead of the old old > value. > This has the disadvantage of being a bit weird, other suggestions > welcome. This also breaks the old API of defining the ioctl as _IOW only ... with your change to the flags field, it effectively gets IOWR instead. Maybe it would be better to put all the new logic into a new struct and only pass a pointer to that struct in kvm_s390_mem_op, so that the ioctl stays IOW ? ... or maybe even introduce a completely new ioctl for this functionality instead? Thomas
On Wed, 2022-10-05 at 08:32 +0200, Thomas Huth wrote: > On 30/09/2022 23.07, Janis Schoetterl-Glausch wrote: > > User space can use the MEM_OP ioctl to make storage key checked reads > > and writes to the guest, however, it has no way of performing atomic, > > key checked, accesses to the guest. > > Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg > > mode. For now, support this mode for absolute accesses only. > > > > This mode can be use, for example, to set the device-state-change > > indicator and the adapter-local-summary indicator atomically. > > > > Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com> > > --- > > > > > > The return value of MEM_OP is: > > 0 on success, > > < 0 on generic error (e.g. -EFAULT or -ENOMEM), > > > 0 if an exception occurred while walking the page tables > > A cmpxchg failing because the old value doesn't match is neither an > > error nor an exception, so the question is how best to signal that > > condition. This is not strictly necessary since user space can compare > > the value of old after the MEM_OP with the value it set. If they're > > different the cmpxchg failed. It might be a better user interface if > > there is an easier way to see if the cmpxchg failed. > > This patch sets the cmpxchg flag bit to 0 on a successful cmpxchg. > > This way you can compare against a constant instead of the old old > > value. > > This has the disadvantage of being a bit weird, other suggestions > > welcome. > > This also breaks the old API of defining the ioctl as _IOW only ... with > your change to the flags field, it effectively gets IOWR instead. Oh, right. > > Maybe it would be better to put all the new logic into a new struct and only > pass a pointer to that struct in kvm_s390_mem_op, so that the ioctl stays > IOW ? ... or maybe even introduce a completely new ioctl for this > functionality instead? Hmmm, the latter seems a bit ugly since there is so much commonality with the existing memop. > > Thomas >
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eed0315a77a6..b856705f3f6b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -580,7 +580,9 @@ struct kvm_translation { struct kvm_s390_mem_op { /* in */ __u64 gaddr; /* the guest address */ + /* in & out */ __u64 flags; /* flags */ + /* in */ __u32 size; /* amount of bytes */ __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ @@ -588,6 +590,8 @@ struct kvm_s390_mem_op { struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ + /* in & out */ + __u64 old[2]; /* ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ -604,6 +608,7 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +#define KVM_S390_MEMOP_F_CMPXCHG (1ULL << 3) /* for KVM_INTERRUPT */ struct kvm_interrupt { diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 9408d6cc8e2c..a1cb66ae0995 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -206,6 +206,10 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode); +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + unsigned __int128 *old, + unsigned __int128 new, u8 access_key); + /** * write_guest_with_key - copy data from kernel space to guest space * @vcpu: virtual cpu diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38d36..c0e490ecc372 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1161,6 +1161,62 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, return rc; } +/** + * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. + * @kvm: Virtual machine instance. + * @gpa: Absolute guest address of the location to be changed. + * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a + * non power of two will result in failure. + * @old_p: Pointer to old value. If the location at @gpa contains this value, the + * exchange will succeed. After calling cmpxchg_guest_abs_with_key() *@old + * contains the value at @gpa before the attempt to exchange the value. + * @new: The value to place at @gpa. + * @access_key: The access key to use for the guest access. + * + * Atomically exchange the value at @gpa by @new, if it contains *@old. + * Honors storage keys. + * + * Return: * 0: successful exchange + * * 1: exchange unsuccessful + * * a program interruption code indicating the reason cmpxchg could + * not be attempted + * * -EINVAL: address misaligned or len not power of two + */ +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + unsigned __int128 *old_p, unsigned __int128 new, + u8 access_key) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + bool writable; + hva_t hva; + int ret; + + if (!IS_ALIGNED(gpa, len)) + return -EINVAL; + + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a ro memslot, even tho that can't occur (they're unsupported). + * Don't try to actually handle that case. + */ + if (!writable) + return -EOPNOTSUPP; + + hva += offset_in_page(gpa); + ret = cmpxchg_user_key_size(len, (void __user *)hva, old_p, new, access_key); + mark_page_dirty_in_slot(kvm, slot, gfn); + /* + * Assume that the fault is caused by key protection, the alternative + * is that the user page is write protected. + */ + if (ret == -EFAULT) + ret = PGM_PROTECTION; + return ret; +} + /** * guest_translate_address_with_key - translate guest logical into guest absolute address * @vcpu: virtual cpu diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b7ef0b71014d..d594d1318d2a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -576,7 +576,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: - case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -590,6 +589,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; + case KVM_CAP_S390_MEM_OP_EXTENSION: + r = 0x3; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ -2711,15 +2713,22 @@ static bool access_key_invalid(u8 access_key) return access_key > 0xf; } -static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop, bool *modified) { void __user *uaddr = (void __user *)mop->buf; + unsigned __int128 old; + union { + unsigned __int128 quad; + char raw[sizeof(unsigned __int128)]; + } new = { .quad = 0 }; u64 supported_flags; void *tmpbuf = NULL; int r, srcu_idx; + *modified = false; supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION - | KVM_S390_MEMOP_F_CHECK_ONLY; + | KVM_S390_MEMOP_F_CHECK_ONLY + | KVM_S390_MEMOP_F_CMPXCHG; if (mop->flags & ~supported_flags || !mop->size) return -EINVAL; if (mop->size > MEM_OP_MAX_SIZE) @@ -2741,6 +2750,13 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) } else { mop->key = 0; } + if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + if (mop->size > sizeof(new)) + return -EINVAL; + if (copy_from_user(&new.raw[sizeof(new) - mop->size], uaddr, mop->size)) + return -EFAULT; + memcpy(&old, mop->old, sizeof(old)); + } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) @@ -2771,6 +2787,16 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) case KVM_S390_MEMOP_ABSOLUTE_WRITE: { if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); + } else if (mop->flags & KVM_S390_MEMOP_F_CMPXCHG) { + r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, + &old, new.quad, mop->key); + if (!r) { + mop->flags &= ~KVM_S390_MEMOP_F_CMPXCHG; + } else if (r == 1) { + memcpy(mop->old, &old, sizeof(old)); + r = 0; + } + *modified = true; } else { if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; @@ -2918,11 +2944,23 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_S390_MEM_OP: { struct kvm_s390_mem_op mem_op; + bool modified; - if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) - r = kvm_s390_vm_mem_op(kvm, &mem_op); - else + r = copy_from_user(&mem_op, argp, sizeof(mem_op)); + if (r) { r = -EFAULT; + break; + } + r = kvm_s390_vm_mem_op(kvm, &mem_op, &modified); + if (r) + break; + if (modified) { + r = copy_to_user(argp, &mem_op, sizeof(mem_op)); + if (r) { + r = -EFAULT; + break; + } + } break; } case KVM_S390_ZPCI_OP: {