summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLorenzo Stoakes <lstoakes@gmail.com>2016-09-11 23:54:25 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-10-17 10:04:44 +0200
commit8fcef5e064da402fb853762ef07439d6a310d348 (patch)
tree1f9717603c16b140414e89fbd9903664c800e82b
parent58facc9c7ae307be5ecffc1697552550fedb55bd (diff)
mm: check VMA flags to avoid invalid PROT_NONE NUMA balancing
commit 38e088546522e1e86d2b8f401a1354ad3a9b3303 upstream. The NUMA balancing logic uses an arch-specific PROT_NONE page table flag defined by pte_protnone() or pmd_protnone() to mark PTEs or huge page PMDs respectively as requiring balancing upon a subsequent page fault. User-defined PROT_NONE memory regions which also have this flag set will not normally invoke the NUMA balancing code as do_page_fault() will send a segfault to the process before handle_mm_fault() is even called. However if access_remote_vm() is invoked to access a PROT_NONE region of memory, handle_mm_fault() is called via faultin_page() and __get_user_pages() without any access checks being performed, meaning the NUMA balancing logic is incorrectly invoked on a non-NUMA memory region. A simple means of triggering this problem is to access PROT_NONE mmap'd memory using /proc/self/mem which reliably results in the NUMA handling functions being invoked when CONFIG_NUMA_BALANCING is set. This issue was reported in bugzilla (issue 99101) which includes some simple repro code. There are BUG_ON() checks in do_numa_page() and do_huge_pmd_numa_page() added at commit c0e7cad to avoid accidentally provoking strange behaviour by attempting to apply NUMA balancing to pages that are in fact PROT_NONE. The BUG_ON()'s are consistently triggered by the repro. This patch moves the PROT_NONE check into mm/memory.c rather than invoking BUG_ON() as faulting in these pages via faultin_page() is a valid reason for reaching the NUMA check with the PROT_NONE page table flag set and is therefore not always a bug. Link: https://bugzilla.kernel.org/show_bug.cgi?id=99101 Reported-by: Trevor Saunders <tbsaunde@tbsaunde.org> Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Tim Gardner <tim.gardner@canonical.com> Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com> [cascardo: context adjustments were necessary] Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/memory.c12
2 files changed, 7 insertions, 8 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fae45c56e2ee..2f53786098c5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1340,9 +1340,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
bool was_writable;
int flags = 0;
- /* A PROT_NONE fault should not end up here */
- BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
ptl = pmd_lock(mm, pmdp);
if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock;
diff --git a/mm/memory.c b/mm/memory.c
index 360d28224a8e..6bfc6a021c4f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3209,9 +3209,6 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
bool was_writable = pte_write(pte);
int flags = 0;
- /* A PROT_NONE fault should not end up here */
- BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
/*
* The "pte" at this point cannot be used safely without
* validation through pte_unmap_same(). It's of NUMA type but
@@ -3304,6 +3301,11 @@ static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_FALLBACK;
}
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
+}
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
@@ -3350,7 +3352,7 @@ static int handle_pte_fault(struct mm_struct *mm,
pte, pmd, flags, entry);
}
- if (pte_protnone(entry))
+ if (pte_protnone(entry) && vma_is_accessible(vma))
return do_numa_page(mm, vma, address, entry, pte, pmd);
ptl = pte_lockptr(mm, pmd);
@@ -3425,7 +3427,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (pmd_trans_splitting(orig_pmd))
return 0;
- if (pmd_protnone(orig_pmd))
+ if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(mm, vma, address,
orig_pmd, pmd);