Skip to content

Commit 58617d5

Browse files
committed
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: Remove automatic enabling of the HUGE_FILE feature flag ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback ext4: Update Documentation/filesystems/ext4.txt ext4: Remove unused mount options: nomballoc, mballoc, nocheck ext4: Remove compile warnings when building w/o CONFIG_PROC_FS ext4: Add missing newlines to printk messages ext4: Fix file fragmentation during large file write. vfs: Add no_nrwrite_index_update writeback control flag vfs: Remove the range_cont writeback mode. ext4: Use tag dirty lookup during mpage_da_submit_io ext4: let the block device know when unused blocks can be discarded ext4: Don't reuse released data blocks until transaction commits ext4: Use an rbtree for tracking blocks freed during transaction. ext4: Do mballoc init before doing filesystem recovery ext4: Free ext4_prealloc_space using kmem_cache_free ext4: Fix Kconfig typo for ext4dev ext4: Remove an old reference to ext4dev in Makefile comment
2 parents 26e9a39 + f287a1a commit 58617d5

File tree

15 files changed

+320
-336
lines changed

15 files changed

+320
-336
lines changed

Documentation/filesystems/ext4.txt

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,24 @@
22
Ext4 Filesystem
33
===============
44

5-
This is a development version of the ext4 filesystem, an advanced level
6-
of the ext3 filesystem which incorporates scalability and reliability
7-
enhancements for supporting large filesystems (64 bit) in keeping with
8-
increasing disk capacities and state-of-the-art feature requirements.
5+
Ext4 is an an advanced level of the ext3 filesystem which incorporates
6+
scalability and reliability enhancements for supporting large filesystems
7+
(64 bit) in keeping with increasing disk capacities and state-of-the-art
8+
feature requirements.
99

10-
Mailing list: [email protected]
10+
Mailing list: [email protected]
11+
Web site: http://ext4.wiki.kernel.org
1112

1213

1314
1. Quick usage instructions:
1415
===========================
1516

17+
Note: More extensive information for getting started with ext4 can be
18+
found at the ext4 wiki site at the URL:
19+
http://ext4.wiki.kernel.org/index.php/Ext4_Howto
20+
1621
- Compile and install the latest version of e2fsprogs (as of this
17-
writing version 1.41) from:
22+
writing version 1.41.3) from:
1823

1924
http://sourceforge.net/project/showfiles.php?group_id=2406
2025

@@ -36,11 +41,9 @@ Mailing list: [email protected]
3641

3742
# mke2fs -t ext4 /dev/hda1
3843

39-
Or configure an existing ext3 filesystem to support extents and set
40-
the test_fs flag to indicate that it's ok for an in-development
41-
filesystem to touch this filesystem:
44+
Or to configure an existing ext3 filesystem to support extents:
4245

43-
# tune2fs -O extents -E test_fs /dev/hda1
46+
# tune2fs -O extents /dev/hda1
4447

4548
If the filesystem was created with 128 byte inodes, it can be
4649
converted to use 256 byte for greater efficiency via:
@@ -104,8 +107,8 @@ exist yet so I'm not sure they're in the near-term roadmap.
104107
The big performance win will come with mballoc, delalloc and flex_bg
105108
grouping of bitmaps and inode tables. Some test results available here:
106109

107-
- http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html
108-
- http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html
110+
- http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html
111+
- http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html
109112

110113
3. Options
111114
==========
@@ -214,9 +217,6 @@ noreservation
214217
bsddf (*) Make 'df' act like BSD.
215218
minixdf Make 'df' act like Minix.
216219

217-
check=none Don't do extra checking of bitmaps on mount.
218-
nocheck
219-
220220
debug Extra debugging information is sent to syslog.
221221

222222
errors=remount-ro(*) Remount the filesystem read-only on an error.
@@ -253,8 +253,6 @@ nobh (a) cache disk block mapping information
253253
"nobh" option tries to avoid associating buffer
254254
heads (supported only for "writeback" mode).
255255

256-
mballoc (*) Use the multiple block allocator for block allocation
257-
nomballoc disabled multiple block allocator for block allocation.
258256
stripe=n Number of filesystem blocks that mballoc will try
259257
to use for allocation size and alignment. For RAID5/6
260258
systems this should be the number of data

fs/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ config EXT4_FS
160160
filesystem initially.
161161

162162
To compile this file system support as a module, choose M here. The
163-
module will be called ext4dev.
163+
module will be called ext4.
164164

165165
If unsure, say N.
166166

fs/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ obj-$(CONFIG_DLM) += dlm/
7171
# Do not add any filesystems before this line
7272
obj-$(CONFIG_REISERFS_FS) += reiserfs/
7373
obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
74-
obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev
74+
obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4
7575
obj-$(CONFIG_JBD) += jbd/
7676
obj-$(CONFIG_JBD2) += jbd2/
7777
obj-$(CONFIG_EXT2_FS) += ext2/

fs/ext4/balloc.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
568568

569569
/* this isn't the right place to decide whether block is metadata
570570
* inode.c/extents.c knows better, but for safety ... */
571-
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
572-
ext4_should_journal_data(inode))
571+
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
572+
metadata = 1;
573+
574+
/* We need to make sure we don't reuse
575+
* block released untill the transaction commit.
576+
* writeback mode have weak data consistency so
577+
* don't force data as metadata when freeing block
578+
* for writeback mode.
579+
*/
580+
if (metadata == 0 && !ext4_should_writeback_data(inode))
573581
metadata = 1;
574582

575583
sb = inode->i_sb;

fs/ext4/ext4.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,6 @@ do { \
511511
/*
512512
* Mount flags
513513
*/
514-
#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
515514
#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
516515
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
517516
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */

fs/ext4/ext4_sb.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,6 @@ struct ext4_sb_info {
9999
struct inode *s_buddy_cache;
100100
long s_blocks_reserved;
101101
spinlock_t s_reserve_lock;
102-
struct list_head s_active_transaction;
103-
struct list_head s_closed_transaction;
104-
struct list_head s_committed_transaction;
105102
spinlock_t s_md_lock;
106103
tid_t s_last_transaction;
107104
unsigned short *s_mb_offsets, *s_mb_maxs;

fs/ext4/inode.c

Lines changed: 76 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,27 +1648,38 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
16481648
int ret = 0, err, nr_pages, i;
16491649
unsigned long index, end;
16501650
struct pagevec pvec;
1651+
long pages_skipped;
16511652

16521653
BUG_ON(mpd->next_page <= mpd->first_page);
16531654
pagevec_init(&pvec, 0);
16541655
index = mpd->first_page;
16551656
end = mpd->next_page - 1;
16561657

16571658
while (index <= end) {
1658-
/* XXX: optimize tail */
1659-
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1659+
/*
1660+
* We can use PAGECACHE_TAG_DIRTY lookup here because
1661+
* even though we have cleared the dirty flag on the page
1662+
* We still keep the page in the radix tree with tag
1663+
* PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
1664+
* The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
1665+
* which is called via the below writepage callback.
1666+
*/
1667+
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1668+
PAGECACHE_TAG_DIRTY,
1669+
min(end - index,
1670+
(pgoff_t)PAGEVEC_SIZE-1) + 1);
16601671
if (nr_pages == 0)
16611672
break;
16621673
for (i = 0; i < nr_pages; i++) {
16631674
struct page *page = pvec.pages[i];
16641675

1665-
index = page->index;
1666-
if (index > end)
1667-
break;
1668-
index++;
1669-
1676+
pages_skipped = mpd->wbc->pages_skipped;
16701677
err = mapping->a_ops->writepage(page, mpd->wbc);
1671-
if (!err)
1678+
if (!err && (pages_skipped == mpd->wbc->pages_skipped))
1679+
/*
1680+
* have successfully written the page
1681+
* without skipping the same
1682+
*/
16721683
mpd->pages_written++;
16731684
/*
16741685
* In error case, we have to continue because
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
21042115
struct writeback_control *wbc,
21052116
struct mpage_da_data *mpd)
21062117
{
2107-
long to_write;
21082118
int ret;
21092119

21102120
if (!mpd->get_block)
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
21192129
mpd->pages_written = 0;
21202130
mpd->retval = 0;
21212131

2122-
to_write = wbc->nr_to_write;
2123-
21242132
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2125-
21262133
/*
21272134
* Handle last extent of pages
21282135
*/
21292136
if (!mpd->io_done && mpd->next_page != mpd->first_page) {
21302137
if (mpage_da_map_blocks(mpd) == 0)
21312138
mpage_da_submit_io(mpd);
2132-
}
21332139

2134-
wbc->nr_to_write = to_write - mpd->pages_written;
2140+
mpd->io_done = 1;
2141+
ret = MPAGE_DA_EXTENT_TAIL;
2142+
}
2143+
wbc->nr_to_write -= mpd->pages_written;
21352144
return ret;
21362145
}
21372146

@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
23602369
static int ext4_da_writepages(struct address_space *mapping,
23612370
struct writeback_control *wbc)
23622371
{
2372+
pgoff_t index;
2373+
int range_whole = 0;
23632374
handle_t *handle = NULL;
2364-
loff_t range_start = 0;
23652375
struct mpage_da_data mpd;
23662376
struct inode *inode = mapping->host;
2377+
int no_nrwrite_index_update;
2378+
long pages_written = 0, pages_skipped;
23672379
int needed_blocks, ret = 0, nr_to_writebump = 0;
2368-
long to_write, pages_skipped = 0;
23692380
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
23702381

23712382
/*
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
23852396
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
23862397
wbc->nr_to_write = sbi->s_mb_stream_request;
23872398
}
2399+
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2400+
range_whole = 1;
23882401

2389-
if (!wbc->range_cyclic)
2390-
/*
2391-
* If range_cyclic is not set force range_cont
2392-
* and save the old writeback_index
2393-
*/
2394-
wbc->range_cont = 1;
2395-
2396-
range_start = wbc->range_start;
2397-
pages_skipped = wbc->pages_skipped;
2402+
if (wbc->range_cyclic)
2403+
index = mapping->writeback_index;
2404+
else
2405+
index = wbc->range_start >> PAGE_CACHE_SHIFT;
23982406

23992407
mpd.wbc = wbc;
24002408
mpd.inode = mapping->host;
24012409

2402-
restart_loop:
2403-
to_write = wbc->nr_to_write;
2404-
while (!ret && to_write > 0) {
2410+
/*
2411+
* we don't want write_cache_pages to update
2412+
* nr_to_write and writeback_index
2413+
*/
2414+
no_nrwrite_index_update = wbc->no_nrwrite_index_update;
2415+
wbc->no_nrwrite_index_update = 1;
2416+
pages_skipped = wbc->pages_skipped;
2417+
2418+
while (!ret && wbc->nr_to_write > 0) {
24052419

24062420
/*
24072421
* we insert one extent at a time. So we need
@@ -2422,48 +2436,53 @@ static int ext4_da_writepages(struct address_space *mapping,
24222436
dump_stack();
24232437
goto out_writepages;
24242438
}
2425-
to_write -= wbc->nr_to_write;
2426-
24272439
mpd.get_block = ext4_da_get_block_write;
24282440
ret = mpage_da_writepages(mapping, wbc, &mpd);
24292441

24302442
ext4_journal_stop(handle);
24312443

2432-
if (mpd.retval == -ENOSPC)
2444+
if (mpd.retval == -ENOSPC) {
2445+
/* commit the transaction which would
2446+
* free blocks released in the transaction
2447+
* and try again
2448+
*/
24332449
jbd2_journal_force_commit_nested(sbi->s_journal);
2434-
2435-
/* reset the retry count */
2436-
if (ret == MPAGE_DA_EXTENT_TAIL) {
2450+
wbc->pages_skipped = pages_skipped;
2451+
ret = 0;
2452+
} else if (ret == MPAGE_DA_EXTENT_TAIL) {
24372453
/*
24382454
* got one extent now try with
24392455
* rest of the pages
24402456
*/
2441-
to_write += wbc->nr_to_write;
2457+
pages_written += mpd.pages_written;
2458+
wbc->pages_skipped = pages_skipped;
24422459
ret = 0;
2443-
} else if (wbc->nr_to_write) {
2460+
} else if (wbc->nr_to_write)
24442461
/*
24452462
* There is no more writeout needed
24462463
* or we requested for a noblocking writeout
24472464
* and we found the device congested
24482465
*/
2449-
to_write += wbc->nr_to_write;
24502466
break;
2451-
}
2452-
wbc->nr_to_write = to_write;
2453-
}
2454-
2455-
if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
2456-
/* We skipped pages in this loop */
2457-
wbc->range_start = range_start;
2458-
wbc->nr_to_write = to_write +
2459-
wbc->pages_skipped - pages_skipped;
2460-
wbc->pages_skipped = pages_skipped;
2461-
goto restart_loop;
24622467
}
2468+
if (pages_skipped != wbc->pages_skipped)
2469+
printk(KERN_EMERG "This should not happen leaving %s "
2470+
"with nr_to_write = %ld ret = %d\n",
2471+
__func__, wbc->nr_to_write, ret);
2472+
2473+
/* Update index */
2474+
index += pages_written;
2475+
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476+
/*
2477+
* set the writeback_index so that range_cyclic
2478+
* mode will write it back later
2479+
*/
2480+
mapping->writeback_index = index;
24632481

24642482
out_writepages:
2465-
wbc->nr_to_write = to_write - nr_to_writebump;
2466-
wbc->range_start = range_start;
2483+
if (!no_nrwrite_index_update)
2484+
wbc->no_nrwrite_index_update = 0;
2485+
wbc->nr_to_write -= nr_to_writebump;
24672486
return ret;
24682487
}
24692488

@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
41754194
struct inode *inode = &(ei->vfs_inode);
41764195
u64 i_blocks = inode->i_blocks;
41774196
struct super_block *sb = inode->i_sb;
4178-
int err = 0;
41794197

41804198
if (i_blocks <= ~0U) {
41814199
/*
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
41854203
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
41864204
raw_inode->i_blocks_high = 0;
41874205
ei->i_flags &= ~EXT4_HUGE_FILE_FL;
4188-
} else if (i_blocks <= 0xffffffffffffULL) {
4206+
return 0;
4207+
}
4208+
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
4209+
return -EFBIG;
4210+
4211+
if (i_blocks <= 0xffffffffffffULL) {
41894212
/*
41904213
* i_blocks can be represented in a 48 bit variable
41914214
* as multiple of 512 bytes
41924215
*/
4193-
err = ext4_update_rocompat_feature(handle, sb,
4194-
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
4195-
if (err)
4196-
goto err_out;
4197-
/* i_block is stored in the split 48 bit fields */
41984216
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
41994217
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
42004218
ei->i_flags &= ~EXT4_HUGE_FILE_FL;
42014219
} else {
4202-
/*
4203-
* i_blocks should be represented in a 48 bit variable
4204-
* as multiple of file system block size
4205-
*/
4206-
err = ext4_update_rocompat_feature(handle, sb,
4207-
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
4208-
if (err)
4209-
goto err_out;
42104220
ei->i_flags |= EXT4_HUGE_FILE_FL;
42114221
/* i_block is stored in file system block size */
42124222
i_blocks = i_blocks >> (inode->i_blkbits - 9);
42134223
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
42144224
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
42154225
}
4216-
err_out:
4217-
return err;
4226+
return 0;
42184227
}
42194228

42204229
/*

0 commit comments

Comments
 (0)