Skip to content

Commit 22208de

Browse files
kvaneeshtytso
authored andcommitted
ext4: Fix file fragmentation during large file write.
The range_cyclic writeback mode uses the address_space writeback_index as the start index for writeback. With delayed allocation we were updating writeback_index wrongly resulting in highly fragmented file. This patch reduces the number of extents reduced from 4000 to 27 for a 3GB file. Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 17bc6c3 commit 22208de

File tree

1 file changed

+57
-34
lines changed

1 file changed

+57
-34
lines changed

fs/ext4/inode.c

Lines changed: 57 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,14 +1648,14 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
16481648
int ret = 0, err, nr_pages, i;
16491649
unsigned long index, end;
16501650
struct pagevec pvec;
1651+
long pages_skipped;
16511652

16521653
BUG_ON(mpd->next_page <= mpd->first_page);
16531654
pagevec_init(&pvec, 0);
16541655
index = mpd->first_page;
16551656
end = mpd->next_page - 1;
16561657

16571658
while (index <= end) {
1658-
/* XXX: optimize tail */
16591659
/*
16601660
* We can use PAGECACHE_TAG_DIRTY lookup here because
16611661
* even though we have cleared the dirty flag on the page
@@ -1673,8 +1673,13 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
16731673
for (i = 0; i < nr_pages; i++) {
16741674
struct page *page = pvec.pages[i];
16751675

1676+
pages_skipped = mpd->wbc->pages_skipped;
16761677
err = mapping->a_ops->writepage(page, mpd->wbc);
1677-
if (!err)
1678+
if (!err && (pages_skipped == mpd->wbc->pages_skipped))
1679+
/*
1680+
* have successfully written the page
1681+
* without skipping the same
1682+
*/
16781683
mpd->pages_written++;
16791684
/*
16801685
* In error case, we have to continue because
@@ -2110,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
21102115
struct writeback_control *wbc,
21112116
struct mpage_da_data *mpd)
21122117
{
2113-
long to_write;
21142118
int ret;
21152119

21162120
if (!mpd->get_block)
@@ -2125,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
21252129
mpd->pages_written = 0;
21262130
mpd->retval = 0;
21272131

2128-
to_write = wbc->nr_to_write;
2129-
21302132
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2131-
21322133
/*
21332134
* Handle last extent of pages
21342135
*/
21352136
if (!mpd->io_done && mpd->next_page != mpd->first_page) {
21362137
if (mpage_da_map_blocks(mpd) == 0)
21372138
mpage_da_submit_io(mpd);
2138-
}
21392139

2140-
wbc->nr_to_write = to_write - mpd->pages_written;
2140+
mpd->io_done = 1;
2141+
ret = MPAGE_DA_EXTENT_TAIL;
2142+
}
2143+
wbc->nr_to_write -= mpd->pages_written;
21412144
return ret;
21422145
}
21432146

@@ -2366,11 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
23662369
static int ext4_da_writepages(struct address_space *mapping,
23672370
struct writeback_control *wbc)
23682371
{
2372+
pgoff_t index;
2373+
int range_whole = 0;
23692374
handle_t *handle = NULL;
23702375
struct mpage_da_data mpd;
23712376
struct inode *inode = mapping->host;
2377+
int no_nrwrite_index_update;
2378+
long pages_written = 0, pages_skipped;
23722379
int needed_blocks, ret = 0, nr_to_writebump = 0;
2373-
long to_write, pages_skipped = 0;
23742380
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
23752381

23762382
/*
@@ -2390,16 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
23902396
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
23912397
wbc->nr_to_write = sbi->s_mb_stream_request;
23922398
}
2399+
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2400+
range_whole = 1;
23932401

2394-
2395-
pages_skipped = wbc->pages_skipped;
2402+
if (wbc->range_cyclic)
2403+
index = mapping->writeback_index;
2404+
else
2405+
index = wbc->range_start >> PAGE_CACHE_SHIFT;
23962406

23972407
mpd.wbc = wbc;
23982408
mpd.inode = mapping->host;
23992409

2400-
restart_loop:
2401-
to_write = wbc->nr_to_write;
2402-
while (!ret && to_write > 0) {
2410+
/*
2411+
* we don't want write_cache_pages to update
2412+
* nr_to_write and writeback_index
2413+
*/
2414+
no_nrwrite_index_update = wbc->no_nrwrite_index_update;
2415+
wbc->no_nrwrite_index_update = 1;
2416+
pages_skipped = wbc->pages_skipped;
2417+
2418+
while (!ret && wbc->nr_to_write > 0) {
24032419

24042420
/*
24052421
* we insert one extent at a time. So we need
@@ -2420,46 +2436,53 @@ static int ext4_da_writepages(struct address_space *mapping,
24202436
dump_stack();
24212437
goto out_writepages;
24222438
}
2423-
to_write -= wbc->nr_to_write;
2424-
24252439
mpd.get_block = ext4_da_get_block_write;
24262440
ret = mpage_da_writepages(mapping, wbc, &mpd);
24272441

24282442
ext4_journal_stop(handle);
24292443

2430-
if (mpd.retval == -ENOSPC)
2444+
if (mpd.retval == -ENOSPC) {
2445+
/* commit the transaction which would
2446+
* free blocks released in the transaction
2447+
* and try again
2448+
*/
24312449
jbd2_journal_force_commit_nested(sbi->s_journal);
2432-
2433-
/* reset the retry count */
2434-
if (ret == MPAGE_DA_EXTENT_TAIL) {
2450+
wbc->pages_skipped = pages_skipped;
2451+
ret = 0;
2452+
} else if (ret == MPAGE_DA_EXTENT_TAIL) {
24352453
/*
24362454
* got one extent now try with
24372455
* rest of the pages
24382456
*/
2439-
to_write += wbc->nr_to_write;
2457+
pages_written += mpd.pages_written;
2458+
wbc->pages_skipped = pages_skipped;
24402459
ret = 0;
2441-
} else if (wbc->nr_to_write) {
2460+
} else if (wbc->nr_to_write)
24422461
/*
24432462
* There is no more writeout needed
24442463
* or we requested for a noblocking writeout
24452464
* and we found the device congested
24462465
*/
2447-
to_write += wbc->nr_to_write;
24482466
break;
2449-
}
2450-
wbc->nr_to_write = to_write;
2451-
}
2452-
2453-
if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
2454-
/* We skipped pages in this loop */
2455-
wbc->nr_to_write = to_write +
2456-
wbc->pages_skipped - pages_skipped;
2457-
wbc->pages_skipped = pages_skipped;
2458-
goto restart_loop;
24592467
}
2468+
if (pages_skipped != wbc->pages_skipped)
2469+
printk(KERN_EMERG "This should not happen leaving %s "
2470+
"with nr_to_write = %ld ret = %d\n",
2471+
__func__, wbc->nr_to_write, ret);
2472+
2473+
/* Update index */
2474+
index += pages_written;
2475+
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476+
/*
2477+
* set the writeback_index so that range_cyclic
2478+
* mode will write it back later
2479+
*/
2480+
mapping->writeback_index = index;
24602481

24612482
out_writepages:
2462-
wbc->nr_to_write = to_write - nr_to_writebump;
2483+
if (!no_nrwrite_index_update)
2484+
wbc->no_nrwrite_index_update = 0;
2485+
wbc->nr_to_write -= nr_to_writebump;
24632486
return ret;
24642487
}
24652488

0 commit comments

Comments
 (0)