From ae1139ece126b8eb6d0770094fbac43ea928d9d9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:11 -0700
Subject: [PATCH] mm, memory_failure: Collect mapping size in collect_procs()
Git-commit: ae1139ece126b8eb6d0770094fbac43ea928d9d9
Patch-mainline: v4.19-rc1
References: bsc#1107783
In preparation for supporting memory_failure() for dax mappings, teach
collect_procs() to also determine the mapping size. Unlike typical
mappings the dax mapping size is determined by walking page-table
entries rather than using the compound-page accounting for THP pages.
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Acked-by: Jan Kara <jack@suse.cz>
---
mm/memory-failure.c | 79 ++++++++++++++++++++++++++--------------------------
1 file changed, 40 insertions(+), 39 deletions(-)
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -175,25 +175,56 @@ int hwpoison_filter(struct page *p)
EXPORT_SYMBOL_GPL(hwpoison_filter);
/*
+ * Kill all processes that have a poisoned page mapped and then isolate
+ * the page.
+ *
+ * General strategy:
+ * Find all processes having the page mapped and kill them.
+ * But we keep a page reference around so that the page is not
+ * actually freed yet.
+ * Then stash the page away
+ *
+ * There's no convenient way to get back to mapped processes
+ * from the VMAs. So do a brute-force search over all
+ * running processes.
+ *
+ * Remember that machine checks are not common (or rather
+ * if they are common you have other problems), so this shouldn't
+ * be a performance issue.
+ *
+ * Also there are some races possible while we get from the
+ * error detection to actually handle it.
+ */
+
+struct to_kill {
+ struct list_head nd;
+ struct task_struct *tsk;
+ unsigned long addr;
+ short size_shift;
+ char addr_valid;
+};
+
+/*
* Send all the processes who have the page mapped a signal.
* ``action optional'' if they are not immediately affected by the error
* ``action required'' if error happened in current execution context
*/
-static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
- unsigned long pfn, struct page *page, int flags)
+static int kill_proc(struct to_kill *tk, int trapno, unsigned long pfn,
+ int flags)
{
struct siginfo si;
+ struct task_struct *t = tk->tsk;
int ret;
pr_err("Memory failure: %#lx: Killing %s:%d due to hardware memory corruption\n",
pfn, t->comm, t->pid);
si.si_signo = SIGBUS;
si.si_errno = 0;
- si.si_addr = (void *)addr;
+ si.si_addr = (void *)tk->addr;
#ifdef __ARCH_SI_TRAPNO
- si.si_trapno = trapno;
+ si.si_trapno = tk->trapno;
#endif
- si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
+ si.si_addr_lsb = tk->size_shift;
if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
si.si_code = BUS_MCEERR_AR;
@@ -242,35 +273,6 @@ void shake_page(struct page *p, int acce
EXPORT_SYMBOL_GPL(shake_page);
/*
- * Kill all processes that have a poisoned page mapped and then isolate
- * the page.
- *
- * General strategy:
- * Find all processes having the page mapped and kill them.
- * But we keep a page reference around so that the page is not
- * actually freed yet.
- * Then stash the page away
- *
- * There's no convenient way to get back to mapped processes
- * from the VMAs. So do a brute-force search over all
- * running processes.
- *
- * Remember that machine checks are not common (or rather
- * if they are common you have other problems), so this shouldn't
- * be a performance issue.
- *
- * Also there are some races possible while we get from the
- * error detection to actually handle it.
- */
-
-struct to_kill {
- struct list_head nd;
- struct task_struct *tsk;
- unsigned long addr;
- char addr_valid;
-};
-
-/*
* Failure handling: if we can't find or can't kill a process there's
* not much we can do. We just print a message and ignore otherwise.
*/
@@ -299,6 +301,7 @@ static void add_to_kill(struct task_stru
}
tk->addr = page_address_in_vma(p, vma);
tk->addr_valid = 1;
+ tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
/*
* In theory we don't have to kill when the page was
@@ -325,8 +328,7 @@ static void add_to_kill(struct task_stru
* wrong earlier.
*/
static void kill_procs(struct list_head *to_kill, int forcekill, int trapno,
- bool fail, struct page *page, unsigned long pfn,
- int flags)
+ bool fail, unsigned long pfn, int flags)
{
struct to_kill *tk, *next;
@@ -349,8 +351,7 @@ static void kill_procs(struct list_head
* check for that, but we need to tell the
* process anyways.
*/
- else if (kill_proc(tk->tsk, tk->addr, trapno,
- pfn, page, flags) < 0)
+ else if (kill_proc(tk, trapno, pfn, flags) < 0)
pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
pfn, tk->tsk->comm, tk->tsk->pid);
}
@@ -1005,7 +1006,7 @@ static bool hwpoison_user_mappings(struc
* any accesses to the poisoned memory.
*/
forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL);
- kill_procs(&tokill, forcekill, trapno, !unmap_success, p, pfn, flags);
+ kill_procs(&tokill, forcekill, trapno, !unmap_success, pfn, flags);
return unmap_success;
}