|
Oscar Salvador |
b87a4b |
From fc8efd2ddfed3f343c11b693e87140ff358d7ff5 Mon Sep 17 00:00:00 2001
|
|
Oscar Salvador |
b87a4b |
From: Jan Stancek <jstancek@redhat.com>
|
|
Oscar Salvador |
b87a4b |
Date: Tue, 5 Mar 2019 15:50:08 -0800
|
|
Oscar Salvador |
b87a4b |
Subject: [PATCH] mm/memory.c: do_fault: avoid usage of stale vm_area_struct
|
|
Oscar Salvador |
b87a4b |
Git-commit: fc8efd2ddfed3f343c11b693e87140ff358d7ff5
|
|
Oscar Salvador |
b87a4b |
Patch-mainline: v5.1-rc1
|
|
Oscar Salvador |
b87a4b |
References: bsc#1136513
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
LTP testcase mtest06 [1] can trigger a crash on s390x running 5.0.0-rc8.
|
|
Oscar Salvador |
b87a4b |
This is a stress test, where one thread mmaps/writes/munmaps memory area
|
|
Oscar Salvador |
b87a4b |
and other thread is trying to read from it:
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
CPU: 0 PID: 2611 Comm: mmap1 Not tainted 5.0.0-rc8+ #51
|
|
Oscar Salvador |
b87a4b |
Hardware name: IBM 2964 N63 400 (z/VM 6.4.0)
|
|
Oscar Salvador |
b87a4b |
Krnl PSW : 0404e00180000000 00000000001ac8d8 (__lock_acquire+0x7/0x7a8)
|
|
Oscar Salvador |
b87a4b |
Call Trace:
|
|
Oscar Salvador |
b87a4b |
([<0000000000000000>] (null))
|
|
Oscar Salvador |
b87a4b |
[<00000000001adae4>] lock_acquire+0xec/0x258
|
|
Oscar Salvador |
b87a4b |
[<000000000080d1ac>] _raw_spin_lock_bh+0x5c/0x98
|
|
Oscar Salvador |
b87a4b |
[<000000000012a780>] page_table_free+0x48/0x1a8
|
|
Oscar Salvador |
b87a4b |
[<00000000002f6e54>] do_fault+0xdc/0x670
|
|
Oscar Salvador |
b87a4b |
[<00000000002fadae>] __handle_mm_fault+0x416/0x5f0
|
|
Oscar Salvador |
b87a4b |
[<00000000002fb138>] handle_mm_fault+0x1b0/0x320
|
|
Oscar Salvador |
b87a4b |
[<00000000001248cc>] do_dat_exception+0x19c/0x2c8
|
|
Oscar Salvador |
b87a4b |
[<000000000080e5ee>] pgm_check_handler+0x19e/0x200
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
page_table_free() is called with NULL mm parameter, but because "0" is a
|
|
Oscar Salvador |
b87a4b |
valid address on s390 (see S390_lowcore), it keeps going until it
|
|
Oscar Salvador |
b87a4b |
eventually crashes in lockdep's lock_acquire. This crash is
|
|
Oscar Salvador |
b87a4b |
reproducible at least since 4.14.
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
Problem is that "vmf->vma" used in do_fault() can become stale. Because
|
|
Oscar Salvador |
b87a4b |
mmap_sem may be released, other threads can come in, call munmap() and
|
|
Oscar Salvador |
b87a4b |
cause "vma" be returned to kmem cache, and get zeroed/re-initialized and
|
|
Oscar Salvador |
b87a4b |
re-used:
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
handle_mm_fault |
|
|
Oscar Salvador |
b87a4b |
__handle_mm_fault |
|
|
Oscar Salvador |
b87a4b |
do_fault |
|
|
Oscar Salvador |
b87a4b |
vma = vmf->vma |
|
|
Oscar Salvador |
b87a4b |
do_read_fault |
|
|
Oscar Salvador |
b87a4b |
__do_fault |
|
|
Oscar Salvador |
b87a4b |
vma->vm_ops->fault(vmf); |
|
|
Oscar Salvador |
b87a4b |
mmap_sem is released |
|
|
Oscar Salvador |
b87a4b |
|
|
|
Oscar Salvador |
b87a4b |
| do_munmap()
|
|
Oscar Salvador |
b87a4b |
| remove_vma_list()
|
|
Oscar Salvador |
b87a4b |
| remove_vma()
|
|
Oscar Salvador |
b87a4b |
| vm_area_free()
|
|
Oscar Salvador |
b87a4b |
| # vma is released
|
|
Oscar Salvador |
b87a4b |
| ...
|
|
Oscar Salvador |
b87a4b |
| # same vma is allocated
|
|
Oscar Salvador |
b87a4b |
| # from kmem cache
|
|
Oscar Salvador |
b87a4b |
| do_mmap()
|
|
Oscar Salvador |
b87a4b |
| vm_area_alloc()
|
|
Oscar Salvador |
b87a4b |
| memset(vma, 0, ...)
|
|
Oscar Salvador |
b87a4b |
|
|
|
Oscar Salvador |
b87a4b |
pte_free(vma->vm_mm, ...); |
|
|
Oscar Salvador |
b87a4b |
page_table_free |
|
|
Oscar Salvador |
b87a4b |
spin_lock_bh(&mm->context.lock);|
|
|
Oscar Salvador |
b87a4b |
<crash> |
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
Cache mm_struct to avoid using potentially stale "vma".
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
[1] https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/mem/mtest06/mmap1.c
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
Link: http://lkml.kernel.org/r/5b3fdf19e2a5be460a384b936f5b56e13733f1b8.1551595137.git.jstancek@redhat.com
|
|
Oscar Salvador |
b87a4b |
Signed-off-by: Jan Stancek <jstancek@redhat.com>
|
|
Oscar Salvador |
b87a4b |
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
|
|
Oscar Salvador |
b87a4b |
Reviewed-by: Matthew Wilcox <willy@infradead.org>
|
|
Oscar Salvador |
b87a4b |
Acked-by: Rafael Aquini <aquini@redhat.com>
|
|
Oscar Salvador |
b87a4b |
Reviewed-by: Minchan Kim <minchan@kernel.org>
|
|
Oscar Salvador |
b87a4b |
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
|
|
Oscar Salvador |
b87a4b |
Cc: Rik van Riel <riel@surriel.com>
|
|
Oscar Salvador |
b87a4b |
Cc: Michal Hocko <mhocko@suse.com>
|
|
Oscar Salvador |
b87a4b |
Cc: Huang Ying <ying.huang@intel.com>
|
|
Oscar Salvador |
b87a4b |
Cc: Souptick Joarder <jrdr.linux@gmail.com>
|
|
Oscar Salvador |
b87a4b |
Cc: Jerome Glisse <jglisse@redhat.com>
|
|
Oscar Salvador |
b87a4b |
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
|
|
Oscar Salvador |
b87a4b |
Cc: David Hildenbrand <david@redhat.com>
|
|
Oscar Salvador |
b87a4b |
Cc: Andrea Arcangeli <aarcange@redhat.com>
|
|
Oscar Salvador |
b87a4b |
Cc: David Rientjes <rientjes@google.com>
|
|
Oscar Salvador |
b87a4b |
Cc: Mel Gorman <mgorman@techsingularity.net>
|
|
Oscar Salvador |
b87a4b |
Cc: <stable@vger.kernel.org>
|
|
Oscar Salvador |
b87a4b |
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
Oscar Salvador |
b87a4b |
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
Oscar Salvador |
b87a4b |
Signed-off-by: Oscar Salvador <osalvador@suse.de>
|
|
Oscar Salvador |
b87a4b |
---
|
|
Oscar Salvador |
b87a4b |
mm/memory.c | 5 ++++-
|
|
Oscar Salvador |
b87a4b |
1 file changed, 4 insertions(+), 1 deletion(-)
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
--- a/mm/memory.c
|
|
Oscar Salvador |
b87a4b |
+++ b/mm/memory.c
|
|
Oscar Salvador |
b87a4b |
@@ -3684,10 +3684,13 @@ static int do_shared_fault(struct vm_fau
|
|
Oscar Salvador |
b87a4b |
* but allow concurrent faults).
|
|
Oscar Salvador |
b87a4b |
* The mmap_sem may have been released depending on flags and our
|
|
Oscar Salvador |
b87a4b |
* return value. See filemap_fault() and __lock_page_or_retry().
|
|
Oscar Salvador |
b87a4b |
+ * If mmap_sem is released, vma may become invalid (for example
|
|
Oscar Salvador |
b87a4b |
+ * by other thread calling munmap()).
|
|
Oscar Salvador |
b87a4b |
*/
|
|
Oscar Salvador |
b87a4b |
static int do_fault(struct vm_fault *vmf)
|
|
Oscar Salvador |
b87a4b |
{
|
|
Oscar Salvador |
b87a4b |
struct vm_area_struct *vma = vmf->vma;
|
|
Oscar Salvador |
b87a4b |
+ struct mm_struct *vm_mm = vma->vm_mm;
|
|
Oscar Salvador |
b87a4b |
int ret;
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
/*
|
|
Oscar Salvador |
b87a4b |
@@ -3728,7 +3731,7 @@ static int do_fault(struct vm_fault *vmf
|
|
Oscar Salvador |
b87a4b |
|
|
Oscar Salvador |
b87a4b |
/* preallocated pagetable is unused: free it */
|
|
Oscar Salvador |
b87a4b |
if (vmf->prealloc_pte) {
|
|
Oscar Salvador |
b87a4b |
- pte_free(vma->vm_mm, vmf->prealloc_pte);
|
|
Oscar Salvador |
b87a4b |
+ pte_free(vm_mm, vmf->prealloc_pte);
|
|
Oscar Salvador |
b87a4b |
vmf->prealloc_pte = NULL;
|
|
Oscar Salvador |
b87a4b |
}
|
|
Oscar Salvador |
b87a4b |
return ret;
|