diff mbox series

[v2,05/17] mm/frame-vector: Use FOLL_LONGTERM

Message ID 20201009075934.3509076-6-daniel.vetter@ffwll.ch
State Superseded
Headers show
Series None | expand

Commit Message

Daniel Vetter Oct. 9, 2020, 7:59 a.m. UTC
This is used by media/videbuf2 for persistent dma mappings, not just
for a single dma operation and then freed again, so needs
FOLL_LONGTERM.

Unfortunately current pup_locked doesn't support FOLL_LONGTERM due to
locking issues. Rework the code to pull the pup path out from the
mmap_sem critical section as suggested by Jason.

By relying entirely on the vma checks in pin_user_pages and follow_pfn
(for vm_flags and vma_is_fsdax) we can also streamline the code a lot.

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Pawel Osciak <pawel@osciak.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-samsung-soc@vger.kernel.org
Cc: linux-media@vger.kernel.org
--
v2: Streamline the code and further simplify the loop checks (Jason)
---
 mm/frame_vector.c | 50 ++++++++++++++---------------------------------
 1 file changed, 15 insertions(+), 35 deletions(-)

Comments

John Hubbard Oct. 16, 2020, 7:54 a.m. UTC | #1
On 10/9/20 12:59 AM, Daniel Vetter wrote:
...
> @@ -48,40 +47,25 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,

>   

>   	start = untagged_addr(start);

>   

> -	mmap_read_lock(mm);

> -	locked = 1;

> -	vma = find_vma_intersection(mm, start, start + 1);

> -	if (!vma) {

> -		ret = -EFAULT;

> -		goto out;

> -	}

> -

> -	/*

> -	 * While get_vaddr_frames() could be used for transient (kernel

> -	 * controlled lifetime) pinning of memory pages all current

> -	 * users establish long term (userspace controlled lifetime)

> -	 * page pinning. Treat get_vaddr_frames() like

> -	 * get_user_pages_longterm() and disallow it for filesystem-dax

> -	 * mappings.

> -	 */

> -	if (vma_is_fsdax(vma)) {

> -		ret = -EOPNOTSUPP;

> -		goto out;

> -	}

> -

> -	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {

> +	ret = pin_user_pages_fast(start, nr_frames,

> +				  FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,

> +				  (struct page **)(vec->ptrs));

> +	if (ret > 0) {


None of the callers that we have today will accept anything less than
ret == nr_frames. And the whole partially pinned region idea turns out
to be just not useful for almost everyone, from what I recall of the gup/pup
call sites. So I wonder if we should just have get_vaddr_frames do the
cleanup here and return -EFAULT, if ret != nr_frames ?

thanks,
-- 
John Hubbard
NVIDIA
diff mbox series

Patch

diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 10f82d5643b6..d44779e56313 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -38,7 +38,6 @@  int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
 	struct vm_area_struct *vma;
 	int ret = 0;
 	int err;
-	int locked;
 
 	if (nr_frames == 0)
 		return 0;
@@ -48,40 +47,25 @@  int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
 
 	start = untagged_addr(start);
 
-	mmap_read_lock(mm);
-	locked = 1;
-	vma = find_vma_intersection(mm, start, start + 1);
-	if (!vma) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	/*
-	 * While get_vaddr_frames() could be used for transient (kernel
-	 * controlled lifetime) pinning of memory pages all current
-	 * users establish long term (userspace controlled lifetime)
-	 * page pinning. Treat get_vaddr_frames() like
-	 * get_user_pages_longterm() and disallow it for filesystem-dax
-	 * mappings.
-	 */
-	if (vma_is_fsdax(vma)) {
-		ret = -EOPNOTSUPP;
-		goto out;
-	}
-
-	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
+	ret = pin_user_pages_fast(start, nr_frames,
+				  FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
+				  (struct page **)(vec->ptrs));
+	if (ret > 0) {
 		vec->got_ref = true;
 		vec->is_pfns = false;
-		ret = pin_user_pages_locked(start, nr_frames,
-			gup_flags, (struct page **)(vec->ptrs), &locked);
-		goto out;
+		goto out_unlocked;
 	}
 
+	mmap_read_lock(mm);
 	vec->got_ref = false;
 	vec->is_pfns = true;
 	do {
 		unsigned long *nums = frame_vector_pfns(vec);
 
+		vma = find_vma_intersection(mm, start, start + 1);
+		if (!vma)
+			break;
+
 		while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) {
 			err = follow_pfn(vma, start, &nums[ret]);
 			if (err) {
@@ -92,17 +76,13 @@  int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
 			start += PAGE_SIZE;
 			ret++;
 		}
-		/*
-		 * We stop if we have enough pages or if VMA doesn't completely
-		 * cover the tail page.
-		 */
-		if (ret >= nr_frames || start < vma->vm_end)
+		/* Bail out if VMA doesn't completely cover the tail page. */
+		if (start < vma->vm_end)
 			break;
-		vma = find_vma_intersection(mm, start, start + 1);
-	} while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP));
+	} while (ret < nr_frames);
 out:
-	if (locked)
-		mmap_read_unlock(mm);
+	mmap_read_unlock(mm);
+out_unlocked:
 	if (!ret)
 		ret = -EFAULT;
 	if (ret > 0)