Skip to content

Commit 405ce05

Browse files
nhoriguchitorvalds
authored andcommitted
mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()
There is a race condition between memory_failure_hugetlb() and hugetlb free/demotion, which causes setting PageHWPoison flag on the wrong page. The one simple result is that wrong processes can be killed, but another (more serious) one is that the actual error is left unhandled, so no one prevents later access to it, and that might lead to more serious results like consuming corrupted data. Think about the below race window: CPU 1 CPU 2 memory_failure_hugetlb struct page *head = compound_head(p); hugetlb page might be freed to buddy, or even changed to another compound page. get_hwpoison_page -- page is not what we want now... The current code first does prechecks roughly and then reconfirms after taking refcount, but it's found that it makes code overly complicated, so move the prechecks in a single hugetlb_lock range. A newly introduced function, try_memory_failure_hugetlb(), always takes hugetlb_lock (even for non-hugetlb pages). That can be improved, but memory_failure() is rare in principle, so should not be a big problem. Link: https://lkml.kernel.org/r/[email protected] Fixes: 761ad8d ("mm: hwpoison: introduce memory_failure_hugetlb()") Signed-off-by: Naoya Horiguchi <[email protected]> Reported-by: Mike Kravetz <[email protected]> Reviewed-by: Miaohe Lin <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Cc: Yang Shi <[email protected]> Cc: Dan Carpenter <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent b253435 commit 405ce05

File tree

4 files changed

+127
-42
lines changed

4 files changed

+127
-42
lines changed

include/linux/hugetlb.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
169169
long freed);
170170
bool isolate_huge_page(struct page *page, struct list_head *list);
171171
int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
172+
int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
172173
void putback_active_hugepage(struct page *page);
173174
void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
174175
void free_huge_page(struct page *page);
@@ -378,6 +379,11 @@ static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
378379
return 0;
379380
}
380381

382+
static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
383+
{
384+
return 0;
385+
}
386+
381387
static inline void putback_active_hugepage(struct page *page)
382388
{
383389
}

include/linux/mm.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3197,6 +3197,14 @@ extern int sysctl_memory_failure_recovery;
31973197
extern void shake_page(struct page *p);
31983198
extern atomic_long_t num_poisoned_pages __read_mostly;
31993199
extern int soft_offline_page(unsigned long pfn, int flags);
3200+
#ifdef CONFIG_MEMORY_FAILURE
3201+
extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
3202+
#else
3203+
static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
3204+
{
3205+
return 0;
3206+
}
3207+
#endif
32003208

32013209
#ifndef arch_memory_failure
32023210
static inline int arch_memory_failure(unsigned long pfn, int flags)

mm/hugetlb.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6785,6 +6785,16 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
67856785
return ret;
67866786
}
67876787

6788+
int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
6789+
{
6790+
int ret;
6791+
6792+
spin_lock_irq(&hugetlb_lock);
6793+
ret = __get_huge_page_for_hwpoison(pfn, flags);
6794+
spin_unlock_irq(&hugetlb_lock);
6795+
return ret;
6796+
}
6797+
67886798
void putback_active_hugepage(struct page *page)
67896799
{
67906800
spin_lock_irq(&hugetlb_lock);

mm/memory-failure.c

Lines changed: 103 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,50 +1498,113 @@ static int try_to_split_thp_page(struct page *page, const char *msg)
14981498
return 0;
14991499
}
15001500

1501-
static int memory_failure_hugetlb(unsigned long pfn, int flags)
1501+
/*
1502+
* Called from hugetlb code with hugetlb_lock held.
1503+
*
1504+
* Return values:
1505+
* 0 - free hugepage
1506+
* 1 - in-use hugepage
1507+
* 2 - not a hugepage
1508+
* -EBUSY - the hugepage is busy (try to retry)
1509+
* -EHWPOISON - the hugepage is already hwpoisoned
1510+
*/
1511+
int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
1512+
{
1513+
struct page *page = pfn_to_page(pfn);
1514+
struct page *head = compound_head(page);
1515+
int ret = 2; /* fallback to normal page handling */
1516+
bool count_increased = false;
1517+
1518+
if (!PageHeadHuge(head))
1519+
goto out;
1520+
1521+
if (flags & MF_COUNT_INCREASED) {
1522+
ret = 1;
1523+
count_increased = true;
1524+
} else if (HPageFreed(head) || HPageMigratable(head)) {
1525+
ret = get_page_unless_zero(head);
1526+
if (ret)
1527+
count_increased = true;
1528+
} else {
1529+
ret = -EBUSY;
1530+
goto out;
1531+
}
1532+
1533+
if (TestSetPageHWPoison(head)) {
1534+
ret = -EHWPOISON;
1535+
goto out;
1536+
}
1537+
1538+
return ret;
1539+
out:
1540+
if (count_increased)
1541+
put_page(head);
1542+
return ret;
1543+
}
1544+
1545+
#ifdef CONFIG_HUGETLB_PAGE
1546+
/*
1547+
* Taking refcount of hugetlb pages needs extra care about race conditions
1548+
* with basic operations like hugepage allocation/free/demotion.
1549+
* So some of prechecks for hwpoison (pinning, and testing/setting
1550+
* PageHWPoison) should be done in single hugetlb_lock range.
1551+
*/
1552+
static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
15021553
{
1503-
struct page *p = pfn_to_page(pfn);
1504-
struct page *head = compound_head(p);
15051554
int res;
1555+
struct page *p = pfn_to_page(pfn);
1556+
struct page *head;
15061557
unsigned long page_flags;
1558+
bool retry = true;
15071559

1508-
if (TestSetPageHWPoison(head)) {
1509-
pr_err("Memory failure: %#lx: already hardware poisoned\n",
1510-
pfn);
1511-
res = -EHWPOISON;
1512-
if (flags & MF_ACTION_REQUIRED)
1560+
*hugetlb = 1;
1561+
retry:
1562+
res = get_huge_page_for_hwpoison(pfn, flags);
1563+
if (res == 2) { /* fallback to normal page handling */
1564+
*hugetlb = 0;
1565+
return 0;
1566+
} else if (res == -EHWPOISON) {
1567+
pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
1568+
if (flags & MF_ACTION_REQUIRED) {
1569+
head = compound_head(p);
15131570
res = kill_accessing_process(current, page_to_pfn(head), flags);
1571+
}
15141572
return res;
1573+
} else if (res == -EBUSY) {
1574+
if (retry) {
1575+
retry = false;
1576+
goto retry;
1577+
}
1578+
action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
1579+
return res;
1580+
}
1581+
1582+
head = compound_head(p);
1583+
lock_page(head);
1584+
1585+
if (hwpoison_filter(p)) {
1586+
ClearPageHWPoison(head);
1587+
res = -EOPNOTSUPP;
1588+
goto out;
15151589
}
15161590

15171591
num_poisoned_pages_inc();
15181592

1519-
if (!(flags & MF_COUNT_INCREASED)) {
1520-
res = get_hwpoison_page(p, flags);
1521-
if (!res) {
1522-
lock_page(head);
1523-
if (hwpoison_filter(p)) {
1524-
if (TestClearPageHWPoison(head))
1525-
num_poisoned_pages_dec();
1526-
unlock_page(head);
1527-
return -EOPNOTSUPP;
1528-
}
1529-
unlock_page(head);
1530-
res = MF_FAILED;
1531-
if (__page_handle_poison(p)) {
1532-
page_ref_inc(p);
1533-
res = MF_RECOVERED;
1534-
}
1535-
action_result(pfn, MF_MSG_FREE_HUGE, res);
1536-
return res == MF_RECOVERED ? 0 : -EBUSY;
1537-
} else if (res < 0) {
1538-
action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
1539-
return -EBUSY;
1593+
/*
1594+
* Handling free hugepage. The possible race with hugepage allocation
1595+
* or demotion can be prevented by PageHWPoison flag.
1596+
*/
1597+
if (res == 0) {
1598+
unlock_page(head);
1599+
res = MF_FAILED;
1600+
if (__page_handle_poison(p)) {
1601+
page_ref_inc(p);
1602+
res = MF_RECOVERED;
15401603
}
1604+
action_result(pfn, MF_MSG_FREE_HUGE, res);
1605+
return res == MF_RECOVERED ? 0 : -EBUSY;
15411606
}
15421607

1543-
lock_page(head);
1544-
15451608
/*
15461609
* The page could have changed compound pages due to race window.
15471610
* If this happens just bail out.
@@ -1554,14 +1617,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
15541617

15551618
page_flags = head->flags;
15561619

1557-
if (hwpoison_filter(p)) {
1558-
if (TestClearPageHWPoison(head))
1559-
num_poisoned_pages_dec();
1560-
put_page(p);
1561-
res = -EOPNOTSUPP;
1562-
goto out;
1563-
}
1564-
15651620
/*
15661621
* TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
15671622
* simply disable it. In order to make it work properly, we need
@@ -1588,6 +1643,12 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
15881643
unlock_page(head);
15891644
return res;
15901645
}
1646+
#else
1647+
static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
1648+
{
1649+
return 0;
1650+
}
1651+
#endif
15911652

15921653
static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
15931654
struct dev_pagemap *pgmap)
@@ -1712,6 +1773,7 @@ int memory_failure(unsigned long pfn, int flags)
17121773
int res = 0;
17131774
unsigned long page_flags;
17141775
bool retry = true;
1776+
int hugetlb = 0;
17151777

17161778
if (!sysctl_memory_failure_recovery)
17171779
panic("Memory failure on page %lx", pfn);
@@ -1739,10 +1801,9 @@ int memory_failure(unsigned long pfn, int flags)
17391801
}
17401802

17411803
try_again:
1742-
if (PageHuge(p)) {
1743-
res = memory_failure_hugetlb(pfn, flags);
1804+
res = try_memory_failure_hugetlb(pfn, flags, &hugetlb);
1805+
if (hugetlb)
17441806
goto unlock_mutex;
1745-
}
17461807

17471808
if (TestSetPageHWPoison(p)) {
17481809
pr_err("Memory failure: %#lx: already hardware poisoned\n",

0 commit comments

Comments
 (0)