diff mbox series

[v7,13/14] KVM: Enable and expose KVM_MEM_PRIVATE

Message ID 20220706082016.2603916-14-chao.p.peng@linux.intel.com
State New
Headers show
Series KVM: mm: fd-based approach for supporting KVM guest private memory | expand

Commit Message

Chao Peng July 6, 2022, 8:20 a.m. UTC
Register private memslot to fd-based memory backing store and handle the
memfile notifiers to zap the existing mappings.

Currently the register is happened at memslot creating time and the
initial support does not include page migration/swap.

KVM_MEM_PRIVATE is not exposed by default, architecture code can turn
on it by implementing kvm_arch_private_mem_supported().

A 'kvm' reference is added in memslot structure since in
memfile_notifier callbacks we can only obtain a memslot reference while
kvm is need to do the zapping.

Co-developed-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
 include/linux/kvm_host.h |   1 +
 virt/kvm/kvm_main.c      | 117 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 109 insertions(+), 9 deletions(-)

Comments

Gupta, Pankaj July 19, 2022, 9:55 a.m. UTC | #1
> Register private memslot to fd-based memory backing store and handle the
> memfile notifiers to zap the existing mappings.
> 
> Currently the register is happened at memslot creating time and the
> initial support does not include page migration/swap.
> 
> KVM_MEM_PRIVATE is not exposed by default, architecture code can turn
> on it by implementing kvm_arch_private_mem_supported().
> 
> A 'kvm' reference is added in memslot structure since in
> memfile_notifier callbacks we can only obtain a memslot reference while
> kvm is need to do the zapping.
> 
> Co-developed-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
>   include/linux/kvm_host.h |   1 +
>   virt/kvm/kvm_main.c      | 117 ++++++++++++++++++++++++++++++++++++---
>   2 files changed, 109 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 8f56426aa1e3..4e5a0db68799 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -584,6 +584,7 @@ struct kvm_memory_slot {
>   	struct file *private_file;
>   	loff_t private_offset;
>   	struct memfile_notifier notifier;
> +	struct kvm *kvm;
>   };
>   
>   static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index bb714c2a4b06..d6f7e074cab2 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -941,6 +941,63 @@ static int kvm_vm_ioctl_set_encrypted_region(struct kvm *kvm, unsigned int ioctl
>   
>   	return r;
>   }
> +
> +static void kvm_memfile_notifier_invalidate(struct memfile_notifier *notifier,
> +					    pgoff_t start, pgoff_t end)
> +{
> +	struct kvm_memory_slot *slot = container_of(notifier,
> +						    struct kvm_memory_slot,
> +						    notifier);
> +	unsigned long base_pgoff = slot->private_offset >> PAGE_SHIFT;
> +	gfn_t start_gfn = slot->base_gfn;
> +	gfn_t end_gfn = slot->base_gfn + slot->npages;
> +
> +
> +	if (start > base_pgoff)
> +		start_gfn = slot->base_gfn + start - base_pgoff;
> +
> +	if (end < base_pgoff + slot->npages)
> +		end_gfn = slot->base_gfn + end - base_pgoff;
> +
> +	if (start_gfn >= end_gfn)
> +		return;
> +
> +	kvm_zap_gfn_range(slot->kvm, start_gfn, end_gfn);
> +}
> +
> +static struct memfile_notifier_ops kvm_memfile_notifier_ops = {
> +	.invalidate = kvm_memfile_notifier_invalidate,
> +};
> +
> +#define KVM_MEMFILE_FLAGS (MEMFILE_F_USER_INACCESSIBLE | \
> +			   MEMFILE_F_UNMOVABLE | \
> +			   MEMFILE_F_UNRECLAIMABLE)
> +
> +static inline int kvm_private_mem_register(struct kvm_memory_slot *slot)
> +{
> +	slot->notifier.ops = &kvm_memfile_notifier_ops;
> +	return memfile_register_notifier(slot->private_file, KVM_MEMFILE_FLAGS,
> +					 &slot->notifier);
> +}
> +
> +static inline void kvm_private_mem_unregister(struct kvm_memory_slot *slot)
> +{
> +	memfile_unregister_notifier(&slot->notifier);
> +}
> +
> +#else /* !CONFIG_HAVE_KVM_PRIVATE_MEM */
> +
> +static inline int kvm_private_mem_register(struct kvm_memory_slot *slot)
> +{
> +	WARN_ON_ONCE(1);
> +	return -EOPNOTSUPP;
> +}
> +
> +static inline void kvm_private_mem_unregister(struct kvm_memory_slot *slot)
> +{
> +	WARN_ON_ONCE(1);
> +}
> +
>   #endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
>   
>   #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
> @@ -987,6 +1044,11 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
>   /* This does not remove the slot from struct kvm_memslots data structures */
>   static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
>   {
> +	if (slot->flags & KVM_MEM_PRIVATE) {
> +		kvm_private_mem_unregister(slot);
> +		fput(slot->private_file);
> +	}
> +
>   	kvm_destroy_dirty_bitmap(slot);
>   
>   	kvm_arch_free_memslot(kvm, slot);
> @@ -1548,10 +1610,16 @@ bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
>   	return false;
>   }
>   
> -static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
> +static int check_memory_region_flags(struct kvm *kvm,
> +				     const struct kvm_user_mem_region *mem)
>   {
>   	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
>   
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +	if (kvm_arch_private_mem_supported(kvm))
> +		valid_flags |= KVM_MEM_PRIVATE;
> +#endif
> +
>   #ifdef __KVM_HAVE_READONLY_MEM
>   	valid_flags |= KVM_MEM_READONLY;
>   #endif
> @@ -1627,6 +1695,12 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
>   {
>   	int r;
>   
> +	if (change == KVM_MR_CREATE && new->flags & KVM_MEM_PRIVATE) {
> +		r = kvm_private_mem_register(new);
> +		if (r)
> +			return r;
> +	}
> +
>   	/*
>   	 * If dirty logging is disabled, nullify the bitmap; the old bitmap
>   	 * will be freed on "commit".  If logging is enabled in both old and
> @@ -1655,6 +1729,9 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
>   	if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap))
>   		kvm_destroy_dirty_bitmap(new);
>   
> +	if (r && change == KVM_MR_CREATE && new->flags & KVM_MEM_PRIVATE)
> +		kvm_private_mem_unregister(new);
> +
>   	return r;
>   }
>   
> @@ -1952,7 +2029,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   	int as_id, id;
>   	int r;
>   
> -	r = check_memory_region_flags(mem);
> +	r = check_memory_region_flags(kvm, mem);
>   	if (r)
>   		return r;
>   
> @@ -1971,6 +2048,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   	     !access_ok((void __user *)(unsigned long)mem->userspace_addr,
>   			mem->memory_size))
>   		return -EINVAL;
> +	if (mem->flags & KVM_MEM_PRIVATE &&
> +		(mem->private_offset & (PAGE_SIZE - 1) ||
> +		 mem->private_offset > U64_MAX - mem->memory_size))
> +		return -EINVAL;
>   	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
>   		return -EINVAL;
>   	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
> @@ -2009,6 +2090,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   		if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
>   			return -EINVAL;
>   	} else { /* Modify an existing slot. */
> +		/* Private memslots are immutable, they can only be deleted. */
> +		if (mem->flags & KVM_MEM_PRIVATE)
> +			return -EINVAL;
>   		if ((mem->userspace_addr != old->userspace_addr) ||
>   		    (npages != old->npages) ||
>   		    ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
> @@ -2037,10 +2121,27 @@ int __kvm_set_memory_region(struct kvm *kvm,
>   	new->npages = npages;
>   	new->flags = mem->flags;
>   	new->userspace_addr = mem->userspace_addr;
> +	if (mem->flags & KVM_MEM_PRIVATE) {
> +		new->private_file = fget(mem->private_fd);
> +		if (!new->private_file) {
> +			r = -EINVAL;
> +			goto out;
> +		}
> +		new->private_offset = mem->private_offset;
> +	}
> +
> +	new->kvm = kvm;
>   
>   	r = kvm_set_memslot(kvm, old, new, change);
>   	if (r)
> -		kfree(new);
> +		goto out;
> +
> +	return 0;
> +
> +out:
> +	if (new->private_file)
> +		fput(new->private_file);
> +	kfree(new);
>   	return r;
>   }
>   EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
> @@ -4712,12 +4813,10 @@ static long kvm_vm_ioctl(struct file *filp,
>   			(u32 __user *)(argp + offsetof(typeof(mem), flags))))
>   			goto out;
>   
> -		if (flags & KVM_MEM_PRIVATE) {
> -			r = -EINVAL;
> -			goto out;
> -		}
> -
> -		size = sizeof(struct kvm_userspace_memory_region);
> +		if (flags & KVM_MEM_PRIVATE)
> +			size = sizeof(struct kvm_userspace_memory_region_ext);

Not sure if we use kvm_userspace_memory_region_ext or 
kvm_user_mem_region, just for readability.

> +		else
> +			size = sizeof(struct kvm_userspace_memory_region);
>   
>   		if (copy_from_user(&mem, argp, size))
>   			goto out;
Chao Peng July 19, 2022, 2:12 p.m. UTC | #2
On Tue, Jul 19, 2022 at 11:55:24AM +0200, Gupta, Pankaj wrote:

...

> > @@ -4712,12 +4813,10 @@ static long kvm_vm_ioctl(struct file *filp,
> >   			(u32 __user *)(argp + offsetof(typeof(mem), flags))))
> >   			goto out;
> > -		if (flags & KVM_MEM_PRIVATE) {
> > -			r = -EINVAL;
> > -			goto out;
> > -		}
> > -
> > -		size = sizeof(struct kvm_userspace_memory_region);
> > +		if (flags & KVM_MEM_PRIVATE)
> > +			size = sizeof(struct kvm_userspace_memory_region_ext);
> 
> Not sure if we use kvm_userspace_memory_region_ext or kvm_user_mem_region,
> just for readability.

Somehow, but majorly for code maintainability, kvm_user_mem_region is
designed to be the alias of kvm_userspace_memory_region_ext so in the
code we can access the 'unpacked' fields using something like
'mem.usersapce_addr' instead of 'mem.region.userspace_addr'.

Chao
> 
> > +		else
> > +			size = sizeof(struct kvm_userspace_memory_region);
> >   		if (copy_from_user(&mem, argp, size))
> >   			goto out;
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8f56426aa1e3..4e5a0db68799 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -584,6 +584,7 @@  struct kvm_memory_slot {
 	struct file *private_file;
 	loff_t private_offset;
 	struct memfile_notifier notifier;
+	struct kvm *kvm;
 };
 
 static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bb714c2a4b06..d6f7e074cab2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -941,6 +941,63 @@  static int kvm_vm_ioctl_set_encrypted_region(struct kvm *kvm, unsigned int ioctl
 
 	return r;
 }
+
+static void kvm_memfile_notifier_invalidate(struct memfile_notifier *notifier,
+					    pgoff_t start, pgoff_t end)
+{
+	struct kvm_memory_slot *slot = container_of(notifier,
+						    struct kvm_memory_slot,
+						    notifier);
+	unsigned long base_pgoff = slot->private_offset >> PAGE_SHIFT;
+	gfn_t start_gfn = slot->base_gfn;
+	gfn_t end_gfn = slot->base_gfn + slot->npages;
+
+
+	if (start > base_pgoff)
+		start_gfn = slot->base_gfn + start - base_pgoff;
+
+	if (end < base_pgoff + slot->npages)
+		end_gfn = slot->base_gfn + end - base_pgoff;
+
+	if (start_gfn >= end_gfn)
+		return;
+
+	kvm_zap_gfn_range(slot->kvm, start_gfn, end_gfn);
+}
+
+static struct memfile_notifier_ops kvm_memfile_notifier_ops = {
+	.invalidate = kvm_memfile_notifier_invalidate,
+};
+
+#define KVM_MEMFILE_FLAGS (MEMFILE_F_USER_INACCESSIBLE | \
+			   MEMFILE_F_UNMOVABLE | \
+			   MEMFILE_F_UNRECLAIMABLE)
+
+static inline int kvm_private_mem_register(struct kvm_memory_slot *slot)
+{
+	slot->notifier.ops = &kvm_memfile_notifier_ops;
+	return memfile_register_notifier(slot->private_file, KVM_MEMFILE_FLAGS,
+					 &slot->notifier);
+}
+
+static inline void kvm_private_mem_unregister(struct kvm_memory_slot *slot)
+{
+	memfile_unregister_notifier(&slot->notifier);
+}
+
+#else /* !CONFIG_HAVE_KVM_PRIVATE_MEM */
+
+static inline int kvm_private_mem_register(struct kvm_memory_slot *slot)
+{
+	WARN_ON_ONCE(1);
+	return -EOPNOTSUPP;
+}
+
+static inline void kvm_private_mem_unregister(struct kvm_memory_slot *slot)
+{
+	WARN_ON_ONCE(1);
+}
+
 #endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
 
 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
@@ -987,6 +1044,11 @@  static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /* This does not remove the slot from struct kvm_memslots data structures */
 static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
+	if (slot->flags & KVM_MEM_PRIVATE) {
+		kvm_private_mem_unregister(slot);
+		fput(slot->private_file);
+	}
+
 	kvm_destroy_dirty_bitmap(slot);
 
 	kvm_arch_free_memslot(kvm, slot);
@@ -1548,10 +1610,16 @@  bool __weak kvm_arch_private_mem_supported(struct kvm *kvm)
 	return false;
 }
 
-static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
+static int check_memory_region_flags(struct kvm *kvm,
+				     const struct kvm_user_mem_region *mem)
 {
 	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
 
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+	if (kvm_arch_private_mem_supported(kvm))
+		valid_flags |= KVM_MEM_PRIVATE;
+#endif
+
 #ifdef __KVM_HAVE_READONLY_MEM
 	valid_flags |= KVM_MEM_READONLY;
 #endif
@@ -1627,6 +1695,12 @@  static int kvm_prepare_memory_region(struct kvm *kvm,
 {
 	int r;
 
+	if (change == KVM_MR_CREATE && new->flags & KVM_MEM_PRIVATE) {
+		r = kvm_private_mem_register(new);
+		if (r)
+			return r;
+	}
+
 	/*
 	 * If dirty logging is disabled, nullify the bitmap; the old bitmap
 	 * will be freed on "commit".  If logging is enabled in both old and
@@ -1655,6 +1729,9 @@  static int kvm_prepare_memory_region(struct kvm *kvm,
 	if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap))
 		kvm_destroy_dirty_bitmap(new);
 
+	if (r && change == KVM_MR_CREATE && new->flags & KVM_MEM_PRIVATE)
+		kvm_private_mem_unregister(new);
+
 	return r;
 }
 
@@ -1952,7 +2029,7 @@  int __kvm_set_memory_region(struct kvm *kvm,
 	int as_id, id;
 	int r;
 
-	r = check_memory_region_flags(mem);
+	r = check_memory_region_flags(kvm, mem);
 	if (r)
 		return r;
 
@@ -1971,6 +2048,10 @@  int __kvm_set_memory_region(struct kvm *kvm,
 	     !access_ok((void __user *)(unsigned long)mem->userspace_addr,
 			mem->memory_size))
 		return -EINVAL;
+	if (mem->flags & KVM_MEM_PRIVATE &&
+		(mem->private_offset & (PAGE_SIZE - 1) ||
+		 mem->private_offset > U64_MAX - mem->memory_size))
+		return -EINVAL;
 	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
 		return -EINVAL;
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
@@ -2009,6 +2090,9 @@  int __kvm_set_memory_region(struct kvm *kvm,
 		if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
 			return -EINVAL;
 	} else { /* Modify an existing slot. */
+		/* Private memslots are immutable, they can only be deleted. */
+		if (mem->flags & KVM_MEM_PRIVATE)
+			return -EINVAL;
 		if ((mem->userspace_addr != old->userspace_addr) ||
 		    (npages != old->npages) ||
 		    ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
@@ -2037,10 +2121,27 @@  int __kvm_set_memory_region(struct kvm *kvm,
 	new->npages = npages;
 	new->flags = mem->flags;
 	new->userspace_addr = mem->userspace_addr;
+	if (mem->flags & KVM_MEM_PRIVATE) {
+		new->private_file = fget(mem->private_fd);
+		if (!new->private_file) {
+			r = -EINVAL;
+			goto out;
+		}
+		new->private_offset = mem->private_offset;
+	}
+
+	new->kvm = kvm;
 
 	r = kvm_set_memslot(kvm, old, new, change);
 	if (r)
-		kfree(new);
+		goto out;
+
+	return 0;
+
+out:
+	if (new->private_file)
+		fput(new->private_file);
+	kfree(new);
 	return r;
 }
 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
@@ -4712,12 +4813,10 @@  static long kvm_vm_ioctl(struct file *filp,
 			(u32 __user *)(argp + offsetof(typeof(mem), flags))))
 			goto out;
 
-		if (flags & KVM_MEM_PRIVATE) {
-			r = -EINVAL;
-			goto out;
-		}
-
-		size = sizeof(struct kvm_userspace_memory_region);
+		if (flags & KVM_MEM_PRIVATE)
+			size = sizeof(struct kvm_userspace_memory_region_ext);
+		else
+			size = sizeof(struct kvm_userspace_memory_region);
 
 		if (copy_from_user(&mem, argp, size))
 			goto out;