diff mbox series

[v2,1/5] mm: userfaultfd: generic continue for non hugetlbfs

Message ID 20250402160721.97596-2-kalyazin@amazon.com
State New
Headers show
Series [v2,1/5] mm: userfaultfd: generic continue for non hugetlbfs | expand

Commit Message

Nikita Kalyazin April 2, 2025, 4:07 p.m. UTC
Remove shmem-specific code from UFFDIO_CONTINUE implementation for
non-huge pages by calling vm_ops->fault().  A new VMF flag,
FAULT_FLAG_NO_USERFAULT_MINOR, is introduced to avoid recursive call to
handle_userfault().

Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
---
 include/linux/mm_types.h |  3 +++
 mm/hugetlb.c             |  2 +-
 mm/shmem.c               |  3 ++-
 mm/userfaultfd.c         | 25 ++++++++++++++++++-------
 4 files changed, 24 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0234f14f2aa6..91a00f2cd565 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1429,6 +1429,8 @@  enum tlb_flush_reason {
  * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached.
  *                        We should only access orig_pte if this flag set.
  * @FAULT_FLAG_VMA_LOCK: The fault is handled under VMA lock.
+ * @FAULT_FLAG_NO_USERFAULT_MINOR: The fault handler must not call userfaultfd
+ *                                 minor handler.
  *
  * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
  * whether we would allow page faults to retry by specifying these two
@@ -1467,6 +1469,7 @@  enum fault_flag {
 	FAULT_FLAG_UNSHARE =		1 << 10,
 	FAULT_FLAG_ORIG_PTE_VALID =	1 << 11,
 	FAULT_FLAG_VMA_LOCK =		1 << 12,
+	FAULT_FLAG_NO_USERFAULT_MINOR = 1 << 13,
 };
 
 typedef unsigned int __bitwise zap_flags_t;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 97930d44d460..ba90d48144fc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6228,7 +6228,7 @@  static vm_fault_t hugetlb_no_page(struct address_space *mapping,
 		}
 
 		/* Check for page in userfault range. */
-		if (userfaultfd_minor(vma)) {
+		if (userfaultfd_minor(vma) && !(vmf->flags & FAULT_FLAG_NO_USERFAULT_MINOR)) {
 			folio_unlock(folio);
 			folio_put(folio);
 			/* See comment in userfaultfd_missing() block above */
diff --git a/mm/shmem.c b/mm/shmem.c
index 1ede0800e846..5e1911e39dec 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2467,7 +2467,8 @@  static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 	fault_mm = vma ? vma->vm_mm : NULL;
 
 	folio = filemap_get_entry(inode->i_mapping, index);
-	if (folio && vma && userfaultfd_minor(vma)) {
+	if (folio && vma && userfaultfd_minor(vma) &&
+	    !(vmf->flags & FAULT_FLAG_NO_USERFAULT_MINOR)) {
 		if (!xa_is_value(folio))
 			folio_put(folio);
 		*fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index d06453fa8aba..68a995216789 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -386,24 +386,35 @@  static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
 				     unsigned long dst_addr,
 				     uffd_flags_t flags)
 {
-	struct inode *inode = file_inode(dst_vma->vm_file);
-	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
 	struct folio *folio;
 	struct page *page;
 	int ret;
+	struct vm_fault vmf = {
+		.vma = dst_vma,
+		.address = dst_addr,
+		.flags = FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE |
+		    FAULT_FLAG_NO_USERFAULT_MINOR,
+		.pte = NULL,
+		.page = NULL,
+		.pgoff = linear_page_index(dst_vma, dst_addr),
+	};
+
+	if (!dst_vma->vm_ops || !dst_vma->vm_ops->fault)
+		return -EINVAL;
 
-	ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
-	/* Our caller expects us to return -EFAULT if we failed to find folio */
-	if (ret == -ENOENT)
+	ret = dst_vma->vm_ops->fault(&vmf);
+	if (ret & VM_FAULT_ERROR) {
 		ret = -EFAULT;
-	if (ret)
 		goto out;
+	}
+
+	page = vmf.page;
+	folio = page_folio(page);
 	if (!folio) {
 		ret = -EFAULT;
 		goto out;
 	}
 
-	page = folio_file_page(folio, pgoff);
 	if (PageHWPoison(page)) {
 		ret = -EIO;
 		goto out_release;