diff -ru2 old/include/linux/mm.h new/include/linux/mm.h --- old/include/linux/mm.h 2012-07-12 04:32:21.000000000 +0100 +++ new/include/linux/mm.h 2016-10-24 13:38:47.012256040 +0100 @@ -1523,4 +1523,5 @@ #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ +#define FOLL_COW 0x4000 /* internal GUP flag */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, diff -ru2 old/mm/memory.c new/mm/memory.c --- old/mm/memory.c 2012-07-12 04:32:21.000000000 +0100 +++ new/mm/memory.c 2016-10-24 13:46:16.254765041 +0100 @@ -1395,4 +1395,22 @@ } +static inline bool can_follow_write_pte(pte_t pte, struct page *page, + unsigned int flags) +{ + if (pte_write(pte)) + return true; + + /* + * Make sure that we are really following CoWed page. We do not really + * have to care about exclusiveness of the page because we only want + * to ensure that once COWed page hasn't disappeared in the meantime + * or it hasn't been merged to a KSM page. + */ + if ((flags & FOLL_FORCE) && (flags & FOLL_COW)) + return page && PageAnon(page) && !PageKsm(page); + + return false; +} + /** * zap_vma_ptes - remove ptes mapping the vma @@ -1500,8 +1518,11 @@ if (!pte_present(pte)) goto no_page; - if ((flags & FOLL_WRITE) && !pte_write(pte)) - goto unlock; page = vm_normal_page(vma, address, pte); + if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) { + pte_unmap_unlock(ptep, ptl); + return NULL; + } + if (unlikely(!page)) { if ((flags & FOLL_DUMP) || @@ -1546,5 +1567,5 @@ } } -unlock: + pte_unmap_unlock(ptep, ptl); out: @@ -1780,15 +1801,11 @@ * do_wp_page has broken COW when necessary, * even if maybe_mkwrite decided not to set - * pte_write. We can thus safely do subsequent - * page lookups as if they were reads. But only - * do so when looping for pte_write is futile: - * in some cases userspace may also be wanting - * to write to the gotten user page, which a - * read fault here might prevent (a readonly - * page might get reCOWed by userspace write). + * pte_write. We cannot simply drop FOLL_WRITE + * here because the COWed page might be gone by + * the time we do the subsequent page lookups. */ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) - foll_flags &= ~FOLL_WRITE; + foll_flags |= FOLL_COW; cond_resched();