diff mbox series

[RFC,30/39] KVM: guest_memfd: Handle folio preparation for guest_memfd mmap

Message ID 24cf7a9b1ee499c4ca4da76e9945429072014d1e.1726009989.git.ackerleytng@google.com
State New
Headers show
Series 1G page support for guest_memfd | expand

Commit Message

Ackerley Tng Sept. 10, 2024, 11:44 p.m. UTC
Since guest_memfd now supports mmap(), folios have to be prepared
before they are faulted into userspace.

When memory attributes are switched between shared and private, the
up-to-date flags will be cleared.

Use the folio's up-to-date flag to indicate being ready for the guest
usage and can be used to mark whether the folio is ready for shared OR
private use.

Signed-off-by: Ackerley Tng <ackerleytng@google.com>

---
 virt/kvm/guest_memfd.c | 131 ++++++++++++++++++++++++++++++++++++++++-
 virt/kvm/kvm_main.c    |   2 +
 virt/kvm/kvm_mm.h      |   7 +++
 3 files changed, 139 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 110c4bbb004b..fb292e542381 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -129,13 +129,29 @@  static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo
 }
 
 /**
- * Use the uptodate flag to indicate that the folio is prepared for KVM's usage.
+ * Use folio's up-to-date flag to indicate that this folio is prepared for usage
+ * by the guest.
+ *
+ * This flag can be used whether the folio is prepared for PRIVATE or SHARED
+ * usage.
  */
 static inline void kvm_gmem_mark_prepared(struct folio *folio)
 {
 	folio_mark_uptodate(folio);
 }
 
+/**
+ * Use folio's up-to-date flag to indicate that this folio is not yet prepared for
+ * usage by the guest.
+ *
+ * This flag can be used whether the folio is prepared for PRIVATE or SHARED
+ * usage.
+ */
+static inline void kvm_gmem_clear_prepared(struct folio *folio)
+{
+	folio_clear_uptodate(folio);
+}
+
 /*
  * Process @folio, which contains @gfn, so that the guest can use it.
  * The folio must be locked and the gfn must be contained in @slot.
@@ -148,6 +164,12 @@  static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
 	pgoff_t index;
 	int r;
 
+	/*
+	 * Defensively zero folio to avoid leaking kernel memory in
+	 * uninitialized pages. This is important since pages can now be mapped
+	 * into userspace, where hardware (e.g. TDX) won't be clearing those
+	 * pages.
+	 */
 	if (folio_test_hugetlb(folio)) {
 		folio_zero_user(folio, folio->index << PAGE_SHIFT);
 	} else {
@@ -1017,6 +1039,7 @@  static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
 {
 	struct inode *inode;
 	struct folio *folio;
+	bool is_prepared;
 
 	inode = file_inode(vmf->vma->vm_file);
 	if (!kvm_gmem_is_faultable(inode, vmf->pgoff))
@@ -1026,6 +1049,31 @@  static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
 	if (!folio)
 		return VM_FAULT_SIGBUS;
 
+	is_prepared = folio_test_uptodate(folio);
+	if (!is_prepared) {
+		unsigned long nr_pages;
+		unsigned long i;
+
+		if (folio_test_hugetlb(folio)) {
+			folio_zero_user(folio, folio->index << PAGE_SHIFT);
+		} else {
+			/*
+			 * Defensively zero folio to avoid leaking kernel memory in
+			 * uninitialized pages. This is important since pages can now be
+			 * mapped into userspace, where hardware (e.g. TDX) won't be
+			 * clearing those pages.
+			 *
+			 * Will probably need a version of kvm_gmem_prepare_folio() to
+			 * prepare the page for SHARED use.
+			 */
+			nr_pages = folio_nr_pages(folio);
+			for (i = 0; i < nr_pages; i++)
+				clear_highpage(folio_page(folio, i));
+		}
+
+		kvm_gmem_mark_prepared(folio);
+	}
+
 	vmf->page = folio_file_page(folio, vmf->pgoff);
 	return VM_FAULT_LOCKED;
 }
@@ -1593,6 +1641,87 @@  long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
 }
 EXPORT_SYMBOL_GPL(kvm_gmem_populate);
 
+static void kvm_gmem_clear_prepared_range(struct inode *inode, pgoff_t start,
+					  pgoff_t end)
+{
+	pgoff_t index;
+
+	filemap_invalidate_lock_shared(inode->i_mapping);
+
+	/* TODO: replace iteration with filemap_get_folios() for efficiency. */
+	for (index = start; index < end;) {
+		struct folio *folio;
+
+		/* Don't use kvm_gmem_get_folio to avoid allocating */
+		folio = filemap_lock_folio(inode->i_mapping, index);
+		if (IS_ERR(folio)) {
+			++index;
+			continue;
+		}
+
+		kvm_gmem_clear_prepared(folio);
+
+		index = folio_next_index(folio);
+		folio_unlock(folio);
+		folio_put(folio);
+	}
+
+	filemap_invalidate_unlock_shared(inode->i_mapping);
+}
+
+/**
+ * Clear the prepared flag for all folios in gfn range [@start, @end) in memslot
+ * @slot.
+ */
+static void kvm_gmem_clear_prepared_slot(struct kvm_memory_slot *slot, gfn_t start,
+					 gfn_t end)
+{
+	pgoff_t start_offset;
+	pgoff_t end_offset;
+	struct file *file;
+
+	file = kvm_gmem_get_file(slot);
+	if (!file)
+		return;
+
+	start_offset = start - slot->base_gfn + slot->gmem.pgoff;
+	end_offset = end - slot->base_gfn + slot->gmem.pgoff;
+
+	kvm_gmem_clear_prepared_range(file_inode(file), start_offset, end_offset);
+
+	fput(file);
+}
+
+/**
+ * Clear the prepared flag for all folios for any slot in gfn range
+ * [@start, @end) in @kvm.
+ */
+void kvm_gmem_clear_prepared_vm(struct kvm *kvm, gfn_t start, gfn_t end)
+{
+	int i;
+
+	lockdep_assert_held(&kvm->slots_lock);
+
+	for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
+		struct kvm_memslot_iter iter;
+		struct kvm_memslots *slots;
+
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot_in_gfn_range(&iter, slots, start, end) {
+			struct kvm_memory_slot *slot;
+			gfn_t gfn_start;
+			gfn_t gfn_end;
+
+			slot = iter.slot;
+			gfn_start = max(start, slot->base_gfn);
+			gfn_end = min(end, slot->base_gfn + slot->npages);
+
+			if (iter.slot->flags & KVM_MEM_GUEST_MEMFD)
+				kvm_gmem_clear_prepared_slot(iter.slot, gfn_start, gfn_end);
+		}
+	}
+}
+
 /**
  * Returns true if pages in range [@start, @end) in inode @inode have no
  * userspace mappings.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1a7bbcc31b7e..255d27df7f5c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2565,6 +2565,8 @@  static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
 		KVM_BUG_ON(r, kvm);
 	}
 
+	kvm_gmem_clear_prepared_vm(kvm, start, end);
+
 	kvm_handle_gfn_range(kvm, &post_set_range);
 
 out_unlock:
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index d8ff2b380d0e..25fd0d9f66cc 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -43,6 +43,7 @@  int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
 void kvm_gmem_unbind(struct kvm_memory_slot *slot);
 int kvm_gmem_should_set_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
 				   unsigned long attrs);
+void kvm_gmem_clear_prepared_vm(struct kvm *kvm, gfn_t start, gfn_t end);
 #else
 static inline void kvm_gmem_init(struct module *module)
 {
@@ -68,6 +69,12 @@  static inline int kvm_gmem_should_set_attributes(struct kvm *kvm, gfn_t start,
 	return 0;
 }
 
+static inline void kvm_gmem_clear_prepared_slots(struct kvm *kvm,
+						 gfn_t start, gfn_t end)
+{
+	WARN_ON_ONCE(1);
+}
+
 #endif /* CONFIG_KVM_PRIVATE_MEM */
 
 #endif /* __KVM_MM_H__ */