Skip to content

Commit 9343224

Browse files
committed
Merge branch 'akpm' (patches from Andrew Morton)
Merge a bunch of fixes from Andrew Morton: "Commit 579f829 ("swap: add a simple detector for inappropriate swapin readahead") is a feature. No probs if you decide to defer it until the next merge window. It has been sitting in my tree for over a year because of my dislike of all the magic numbers, but recent discussion with Hugh has made me give up" * emailed patches fron Andrew Morton <[email protected]>: mm: __set_page_dirty uses spin_lock_irqsave instead of spin_lock_irq arch/x86/mm/numa.c: fix array index overflow when synchronizing nid to memblock.reserved. arch/x86/mm/numa.c: initialize numa_kernel_nodes in numa_clear_kernel_node_hotplug() mm: __set_page_dirty_nobuffers() uses spin_lock_irqsave() instead of spin_lock_irq() mm/swap: fix race on swap_info reuse between swapoff and swapon swap: add a simple detector for inappropriate swapin readahead ocfs2: free allocated clusters if error occurs after ocfs2_claim_clusters Documentation/kernel-parameters.txt: fix memmap= language
2 parents f2de3a1 + 227d53b commit 9343224

File tree

10 files changed

+178
-26
lines changed

10 files changed

+178
-26
lines changed

Documentation/kernel-parameters.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,16 +1726,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
17261726
option description.
17271727

17281728
memmap=nn[KMG]@ss[KMG]
1729-
[KNL] Force usage of a specific region of memory
1730-
Region of memory to be used, from ss to ss+nn.
1729+
[KNL] Force usage of a specific region of memory.
1730+
Region of memory to be used is from ss to ss+nn.
17311731

17321732
memmap=nn[KMG]#ss[KMG]
17331733
[KNL,ACPI] Mark specific memory as ACPI data.
1734-
Region of memory to be used, from ss to ss+nn.
1734+
Region of memory to be marked is from ss to ss+nn.
17351735

17361736
memmap=nn[KMG]$ss[KMG]
17371737
[KNL,ACPI] Mark specific memory as reserved.
1738-
Region of memory to be used, from ss to ss+nn.
1738+
Region of memory to be reserved is from ss to ss+nn.
17391739
Example: Exclude memory from 0x18690000-0x1869ffff
17401740
memmap=64K$0x18690000
17411741
or

arch/x86/mm/numa.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -493,14 +493,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
493493
struct numa_memblk *mb = &mi->blk[i];
494494
memblock_set_node(mb->start, mb->end - mb->start,
495495
&memblock.memory, mb->nid);
496-
497-
/*
498-
* At this time, all memory regions reserved by memblock are
499-
* used by the kernel. Set the nid in memblock.reserved will
500-
* mark out all the nodes the kernel resides in.
501-
*/
502-
memblock_set_node(mb->start, mb->end - mb->start,
503-
&memblock.reserved, mb->nid);
504496
}
505497

506498
/*
@@ -565,10 +557,21 @@ static void __init numa_init_array(void)
565557
static void __init numa_clear_kernel_node_hotplug(void)
566558
{
567559
int i, nid;
568-
nodemask_t numa_kernel_nodes;
560+
nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
569561
unsigned long start, end;
570562
struct memblock_type *type = &memblock.reserved;
571563

564+
/*
565+
* At this time, all memory regions reserved by memblock are
566+
* used by the kernel. Set the nid in memblock.reserved will
567+
* mark out all the nodes the kernel resides in.
568+
*/
569+
for (i = 0; i < numa_meminfo.nr_blks; i++) {
570+
struct numa_memblk *mb = &numa_meminfo.blk[i];
571+
memblock_set_node(mb->start, mb->end - mb->start,
572+
&memblock.reserved, mb->nid);
573+
}
574+
572575
/* Mark all kernel nodes. */
573576
for (i = 0; i < type->cnt; i++)
574577
node_set(type->regions[i].nid, numa_kernel_nodes);

fs/buffer.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -654,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
654654
static void __set_page_dirty(struct page *page,
655655
struct address_space *mapping, int warn)
656656
{
657-
spin_lock_irq(&mapping->tree_lock);
657+
unsigned long flags;
658+
659+
spin_lock_irqsave(&mapping->tree_lock, flags);
658660
if (page->mapping) { /* Race with truncate? */
659661
WARN_ON_ONCE(warn && !PageUptodate(page));
660662
account_page_dirtied(page, mapping);
661663
radix_tree_tag_set(&mapping->page_tree,
662664
page_index(page), PAGECACHE_TAG_DIRTY);
663665
}
664-
spin_unlock_irq(&mapping->tree_lock);
666+
spin_unlock_irqrestore(&mapping->tree_lock, flags);
665667
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
666668
}
667669

fs/ocfs2/alloc.c

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
47424742
enum ocfs2_alloc_restarted *reason_ret)
47434743
{
47444744
int status = 0, err = 0;
4745+
int need_free = 0;
47454746
int free_extents;
47464747
enum ocfs2_alloc_restarted reason = RESTART_NONE;
47474748
u32 bit_off, num_bits;
@@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
47964797
OCFS2_JOURNAL_ACCESS_WRITE);
47974798
if (status < 0) {
47984799
mlog_errno(status);
4799-
goto leave;
4800+
need_free = 1;
4801+
goto bail;
48004802
}
48014803

48024804
block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
@@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
48074809
num_bits, flags, meta_ac);
48084810
if (status < 0) {
48094811
mlog_errno(status);
4810-
goto leave;
4812+
need_free = 1;
4813+
goto bail;
48114814
}
48124815

48134816
ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
48214824
reason = RESTART_TRANS;
48224825
}
48234826

4827+
bail:
4828+
if (need_free) {
4829+
if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
4830+
ocfs2_free_local_alloc_bits(osb, handle, data_ac,
4831+
bit_off, num_bits);
4832+
else
4833+
ocfs2_free_clusters(handle,
4834+
data_ac->ac_inode,
4835+
data_ac->ac_bh,
4836+
ocfs2_clusters_to_blocks(osb->sb, bit_off),
4837+
num_bits);
4838+
}
4839+
48244840
leave:
48254841
if (reason_ret)
48264842
*reason_ret = reason;
@@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
68056821
struct buffer_head *di_bh)
68066822
{
68076823
int ret, i, has_data, num_pages = 0;
6824+
int need_free = 0;
6825+
u32 bit_off, num;
68086826
handle_t *handle;
68096827
u64 uninitialized_var(block);
68106828
struct ocfs2_inode_info *oi = OCFS2_I(inode);
@@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
68506868
}
68516869

68526870
if (has_data) {
6853-
u32 bit_off, num;
68546871
unsigned int page_end;
68556872
u64 phys;
68566873

@@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
68866903
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
68876904
if (ret) {
68886905
mlog_errno(ret);
6906+
need_free = 1;
68896907
goto out_commit;
68906908
}
68916909

@@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
68966914
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
68976915
if (ret) {
68986916
mlog_errno(ret);
6917+
need_free = 1;
68996918
goto out_commit;
69006919
}
69016920

@@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
69276946
ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
69286947
if (ret) {
69296948
mlog_errno(ret);
6949+
need_free = 1;
69306950
goto out_commit;
69316951
}
69326952

@@ -6938,6 +6958,18 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
69386958
dquot_free_space_nodirty(inode,
69396959
ocfs2_clusters_to_bytes(osb->sb, 1));
69406960

6961+
if (need_free) {
6962+
if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
6963+
ocfs2_free_local_alloc_bits(osb, handle, data_ac,
6964+
bit_off, num);
6965+
else
6966+
ocfs2_free_clusters(handle,
6967+
data_ac->ac_inode,
6968+
data_ac->ac_bh,
6969+
ocfs2_clusters_to_blocks(osb->sb, bit_off),
6970+
num);
6971+
}
6972+
69416973
ocfs2_commit_trans(osb, handle);
69426974

69436975
out_unlock:

fs/ocfs2/localalloc.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,48 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
781781
return status;
782782
}
783783

784+
int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
785+
handle_t *handle,
786+
struct ocfs2_alloc_context *ac,
787+
u32 bit_off,
788+
u32 num_bits)
789+
{
790+
int status, start;
791+
u32 clear_bits;
792+
struct inode *local_alloc_inode;
793+
void *bitmap;
794+
struct ocfs2_dinode *alloc;
795+
struct ocfs2_local_alloc *la;
796+
797+
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
798+
799+
local_alloc_inode = ac->ac_inode;
800+
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
801+
la = OCFS2_LOCAL_ALLOC(alloc);
802+
803+
bitmap = la->la_bitmap;
804+
start = bit_off - le32_to_cpu(la->la_bm_off);
805+
clear_bits = num_bits;
806+
807+
status = ocfs2_journal_access_di(handle,
808+
INODE_CACHE(local_alloc_inode),
809+
osb->local_alloc_bh,
810+
OCFS2_JOURNAL_ACCESS_WRITE);
811+
if (status < 0) {
812+
mlog_errno(status);
813+
goto bail;
814+
}
815+
816+
while (clear_bits--)
817+
ocfs2_clear_bit(start++, bitmap);
818+
819+
le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
820+
ocfs2_journal_dirty(handle, osb->local_alloc_bh);
821+
822+
bail:
823+
return status;
824+
}
825+
784826
static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
785827
{
786828
u32 count;

fs/ocfs2/localalloc.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
5555
u32 *bit_off,
5656
u32 *num_bits);
5757

58+
int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
59+
handle_t *handle,
60+
struct ocfs2_alloc_context *ac,
61+
u32 bit_off,
62+
u32 num_bits);
63+
5864
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
5965
unsigned int num_clusters);
6066
void ocfs2_la_enable_worker(struct work_struct *work);

include/linux/page-flags.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
228228
TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
229229
PAGEFLAG(MappedToDisk, mappedtodisk)
230230

231-
/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
231+
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
232232
PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
233-
PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */
233+
PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim)
234234

235235
#ifdef CONFIG_HIGHMEM
236236
/*

mm/page-writeback.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2173,11 +2173,12 @@ int __set_page_dirty_nobuffers(struct page *page)
21732173
if (!TestSetPageDirty(page)) {
21742174
struct address_space *mapping = page_mapping(page);
21752175
struct address_space *mapping2;
2176+
unsigned long flags;
21762177

21772178
if (!mapping)
21782179
return 1;
21792180

2180-
spin_lock_irq(&mapping->tree_lock);
2181+
spin_lock_irqsave(&mapping->tree_lock, flags);
21812182
mapping2 = page_mapping(page);
21822183
if (mapping2) { /* Race with truncate? */
21832184
BUG_ON(mapping2 != mapping);
@@ -2186,7 +2187,7 @@ int __set_page_dirty_nobuffers(struct page *page)
21862187
radix_tree_tag_set(&mapping->page_tree,
21872188
page_index(page), PAGECACHE_TAG_DIRTY);
21882189
}
2189-
spin_unlock_irq(&mapping->tree_lock);
2190+
spin_unlock_irqrestore(&mapping->tree_lock, flags);
21902191
if (mapping->host) {
21912192
/* !PageAnon && !swapper_space */
21922193
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

mm/swap_state.c

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void)
6363
return ret;
6464
}
6565

66+
static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
67+
6668
void show_swap_cache_info(void)
6769
{
6870
printk("%lu pages in swap cache\n", total_swapcache_pages());
@@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry)
286288

287289
page = find_get_page(swap_address_space(entry), entry.val);
288290

289-
if (page)
291+
if (page) {
290292
INC_CACHE_INFO(find_success);
293+
if (TestClearPageReadahead(page))
294+
atomic_inc(&swapin_readahead_hits);
295+
}
291296

292297
INC_CACHE_INFO(find_total);
293298
return page;
@@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
389394
return found_page;
390395
}
391396

397+
static unsigned long swapin_nr_pages(unsigned long offset)
398+
{
399+
static unsigned long prev_offset;
400+
unsigned int pages, max_pages, last_ra;
401+
static atomic_t last_readahead_pages;
402+
403+
max_pages = 1 << ACCESS_ONCE(page_cluster);
404+
if (max_pages <= 1)
405+
return 1;
406+
407+
/*
408+
* This heuristic has been found to work well on both sequential and
409+
* random loads, swapping to hard disk or to SSD: please don't ask
410+
* what the "+ 2" means, it just happens to work well, that's all.
411+
*/
412+
pages = atomic_xchg(&swapin_readahead_hits, 0) + 2;
413+
if (pages == 2) {
414+
/*
415+
* We can have no readahead hits to judge by: but must not get
416+
* stuck here forever, so check for an adjacent offset instead
417+
* (and don't even bother to check whether swap type is same).
418+
*/
419+
if (offset != prev_offset + 1 && offset != prev_offset - 1)
420+
pages = 1;
421+
prev_offset = offset;
422+
} else {
423+
unsigned int roundup = 4;
424+
while (roundup < pages)
425+
roundup <<= 1;
426+
pages = roundup;
427+
}
428+
429+
if (pages > max_pages)
430+
pages = max_pages;
431+
432+
/* Don't shrink readahead too fast */
433+
last_ra = atomic_read(&last_readahead_pages) / 2;
434+
if (pages < last_ra)
435+
pages = last_ra;
436+
atomic_set(&last_readahead_pages, pages);
437+
438+
return pages;
439+
}
440+
392441
/**
393442
* swapin_readahead - swap in pages in hope we need them soon
394443
* @entry: swap entry of this memory
@@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
412461
struct vm_area_struct *vma, unsigned long addr)
413462
{
414463
struct page *page;
415-
unsigned long offset = swp_offset(entry);
464+
unsigned long entry_offset = swp_offset(entry);
465+
unsigned long offset = entry_offset;
416466
unsigned long start_offset, end_offset;
417-
unsigned long mask = (1UL << page_cluster) - 1;
467+
unsigned long mask;
418468
struct blk_plug plug;
419469

470+
mask = swapin_nr_pages(offset) - 1;
471+
if (!mask)
472+
goto skip;
473+
420474
/* Read a page_cluster sized and aligned cluster around offset. */
421475
start_offset = offset & ~mask;
422476
end_offset = offset | mask;
@@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
430484
gfp_mask, vma, addr);
431485
if (!page)
432486
continue;
487+
if (offset != entry_offset)
488+
SetPageReadahead(page);
433489
page_cache_release(page);
434490
}
435491
blk_finish_plug(&plug);
436492

437493
lru_add_drain(); /* Push any new pages onto the LRU now */
494+
skip:
438495
return read_swap_cache_async(entry, gfp_mask, vma, addr);
439496
}

0 commit comments

Comments
 (0)