Skip to content

Commit ae1139e

Browse files
djbwdavejiang
authored andcommitted
mm, memory_failure: Collect mapping size in collect_procs()
In preparation for supporting memory_failure() for dax mappings, teach collect_procs() to also determine the mapping size. Unlike typical mappings the dax mapping size is determined by walking page-table entries rather than using the compound-page accounting for THP pages. Acked-by: Naoya Horiguchi <[email protected]> Signed-off-by: Dan Williams <[email protected]> Signed-off-by: Dave Jiang <[email protected]>
1 parent 23e7b5c commit ae1139e

File tree

1 file changed

+40
-41
lines changed

1 file changed

+40
-41
lines changed

mm/memory-failure.c

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -173,23 +173,52 @@ int hwpoison_filter(struct page *p)
173173

174174
EXPORT_SYMBOL_GPL(hwpoison_filter);
175175

176+
/*
177+
* Kill all processes that have a poisoned page mapped and then isolate
178+
* the page.
179+
*
180+
* General strategy:
181+
* Find all processes having the page mapped and kill them.
182+
* But we keep a page reference around so that the page is not
183+
* actually freed yet.
184+
* Then stash the page away
185+
*
186+
* There's no convenient way to get back to mapped processes
187+
* from the VMAs. So do a brute-force search over all
188+
* running processes.
189+
*
190+
* Remember that machine checks are not common (or rather
191+
* if they are common you have other problems), so this shouldn't
192+
* be a performance issue.
193+
*
194+
* Also there are some races possible while we get from the
195+
* error detection to actually handle it.
196+
*/
197+
198+
struct to_kill {
199+
struct list_head nd;
200+
struct task_struct *tsk;
201+
unsigned long addr;
202+
short size_shift;
203+
char addr_valid;
204+
};
205+
176206
/*
177207
* Send all the processes who have the page mapped a signal.
178208
* ``action optional'' if they are not immediately affected by the error
179209
* ``action required'' if error happened in current execution context
180210
*/
181-
static int kill_proc(struct task_struct *t, unsigned long addr,
182-
unsigned long pfn, struct page *page, int flags)
211+
static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
183212
{
184-
short addr_lsb;
213+
struct task_struct *t = tk->tsk;
214+
short addr_lsb = tk->size_shift;
185215
int ret;
186216

187217
pr_err("Memory failure: %#lx: Killing %s:%d due to hardware memory corruption\n",
188218
pfn, t->comm, t->pid);
189-
addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
190219

191220
if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
192-
ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)addr,
221+
ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr,
193222
addr_lsb, current);
194223
} else {
195224
/*
@@ -198,7 +227,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr,
198227
* This could cause a loop when the user sets SIGBUS
199228
* to SIG_IGN, but hopefully no one will do that?
200229
*/
201-
ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)addr,
230+
ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
202231
addr_lsb, t); /* synchronous? */
203232
}
204233
if (ret < 0)
@@ -234,35 +263,6 @@ void shake_page(struct page *p, int access)
234263
}
235264
EXPORT_SYMBOL_GPL(shake_page);
236265

237-
/*
238-
* Kill all processes that have a poisoned page mapped and then isolate
239-
* the page.
240-
*
241-
* General strategy:
242-
* Find all processes having the page mapped and kill them.
243-
* But we keep a page reference around so that the page is not
244-
* actually freed yet.
245-
* Then stash the page away
246-
*
247-
* There's no convenient way to get back to mapped processes
248-
* from the VMAs. So do a brute-force search over all
249-
* running processes.
250-
*
251-
* Remember that machine checks are not common (or rather
252-
* if they are common you have other problems), so this shouldn't
253-
* be a performance issue.
254-
*
255-
* Also there are some races possible while we get from the
256-
* error detection to actually handle it.
257-
*/
258-
259-
struct to_kill {
260-
struct list_head nd;
261-
struct task_struct *tsk;
262-
unsigned long addr;
263-
char addr_valid;
264-
};
265-
266266
/*
267267
* Failure handling: if we can't find or can't kill a process there's
268268
* not much we can do. We just print a message and ignore otherwise.
@@ -292,6 +292,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
292292
}
293293
tk->addr = page_address_in_vma(p, vma);
294294
tk->addr_valid = 1;
295+
tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
295296

296297
/*
297298
* In theory we don't have to kill when the page was
@@ -317,9 +318,8 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
317318
* Also when FAIL is set do a force kill because something went
318319
* wrong earlier.
319320
*/
320-
static void kill_procs(struct list_head *to_kill, int forcekill,
321-
bool fail, struct page *page, unsigned long pfn,
322-
int flags)
321+
static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
322+
unsigned long pfn, int flags)
323323
{
324324
struct to_kill *tk, *next;
325325

@@ -342,8 +342,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill,
342342
* check for that, but we need to tell the
343343
* process anyways.
344344
*/
345-
else if (kill_proc(tk->tsk, tk->addr,
346-
pfn, page, flags) < 0)
345+
else if (kill_proc(tk, pfn, flags) < 0)
347346
pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
348347
pfn, tk->tsk->comm, tk->tsk->pid);
349348
}
@@ -1012,7 +1011,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
10121011
* any accesses to the poisoned memory.
10131012
*/
10141013
forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL);
1015-
kill_procs(&tokill, forcekill, !unmap_success, p, pfn, flags);
1014+
kill_procs(&tokill, forcekill, !unmap_success, pfn, flags);
10161015

10171016
return unmap_success;
10181017
}

0 commit comments

Comments
 (0)