@@ -1,6 +1,6 @@
VERSION = 3
PATCHLEVEL = 2
-SUBLEVEL = 82
+SUBLEVEL = 83
EXTRAVERSION =
NAME = Saber-toothed Squirrel
@@ -1527,6 +1527,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
#define FOLL_MLOCK 0x40 /* mark page as mlocked */
#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
+#define FOLL_COW 0x4000 /* internal GUP flag */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
@@ -1427,6 +1427,24 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
}
EXPORT_SYMBOL_GPL(zap_vma_ptes);
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+ unsigned int flags)
+{
+ if (pte_write(pte))
+ return true;
+
+ /*
+ * Make sure that we are really following CoWed page. We do not really
+ * have to care about exclusiveness of the page because we only want
+ * to ensure that once COWed page hasn't disappeared in the meantime
+ * or it hasn't been merged to a KSM page.
+ */
+ if ((flags & FOLL_FORCE) && (flags & FOLL_COW))
+ return page && PageAnon(page) && !PageKsm(page);
+
+ return false;
+}
+
/**
* follow_page - look up a page descriptor from a user-virtual address
* @vma: vm_area_struct mapping @address
@@ -1509,10 +1527,13 @@ split_fallthrough:
pte = *ptep;
if (!pte_present(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !pte_write(pte))
- goto unlock;
page = vm_normal_page(vma, address, pte);
+ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) {
+ pte_unmap_unlock(ptep, ptl);
+ return NULL;
+ }
+
if (unlikely(!page)) {
if ((flags & FOLL_DUMP) ||
!is_zero_pfn(pte_pfn(pte)))
@@ -1555,7 +1576,7 @@ split_fallthrough:
unlock_page(page);
}
}
-unlock:
+
pte_unmap_unlock(ptep, ptl);
out:
return page;
@@ -1789,17 +1810,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
* The VM_FAULT_WRITE bit tells us that
* do_wp_page has broken COW when necessary,
* even if maybe_mkwrite decided not to set
- * pte_write. We can thus safely do subsequent
- * page lookups as if they were reads. But only
- * do so when looping for pte_write is futile:
- * in some cases userspace may also be wanting
- * to write to the gotten user page, which a
- * read fault here might prevent (a readonly
- * page might get reCOWed by userspace write).
+ * pte_write. We cannot simply drop FOLL_WRITE
+ * here because the COWed page might be gone by
+ * the time we do the subsequent page lookups.
*/
if ((ret & VM_FAULT_WRITE) &&
!(vma->vm_flags & VM_WRITE))
- foll_flags &= ~FOLL_WRITE;
+ foll_flags |= FOLL_COW;
cond_resched();
}