Skip to content

Commit a4bb6b6

Browse files
achendertytso
authored andcommitted
ext4: enable "punch hole" functionality
This patch adds new routines: "ext4_punch_hole" "ext4_ext_punch_hole" and "ext4_ext_check_cache" fallocate has been modified to call ext4_punch_hole when the punch hole flag is passed. At the moment, we only support punching holes in extents, so this routine is pretty much a wrapper for the ext4_ext_punch_hole routine. The ext4_ext_punch_hole routine first completes all outstanding writes with the associated pages, and then releases them. The unblock aligned data is zeroed, and all blocks in between are punched out. The ext4_ext_check_cache routine is very similar to ext4_ext_in_cache except it accepts a ext4_ext_cache parameter instead of a ext4_extent parameter. This routine is used by ext4_ext_punch_hole to check and see if a block in a hole that has been cached. The ext4_ext_cache parameter is necessary because the members ext4_extent structure are not large enough to hold a 32 bit value. The existing ext4_ext_in_cache routine has become a wrapper to this new function. [ext4 punch hole patch series 5/5 v7] Signed-off-by: Allison Henderson <[email protected]> Signed-off-by: "Theodore Ts'o" <[email protected]> Reviewed-by: Mingming Cao <[email protected]>
1 parent e861304 commit a4bb6b6

File tree

3 files changed

+253
-11
lines changed

3 files changed

+253
-11
lines changed

fs/ext4/ext4.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,6 +1816,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
18161816
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
18171817
extern int ext4_can_truncate(struct inode *inode);
18181818
extern void ext4_truncate(struct inode *);
1819+
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
18191820
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
18201821
extern void ext4_set_inode_flags(struct inode *);
18211822
extern void ext4_get_inode_flags(struct ext4_inode_info *);
@@ -2157,6 +2158,8 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
21572158
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
21582159
struct ext4_map_blocks *map, int flags);
21592160
extern void ext4_ext_truncate(struct inode *);
2161+
extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
2162+
loff_t length);
21602163
extern void ext4_ext_init(struct super_block *);
21612164
extern void ext4_ext_release(struct super_block *);
21622165
extern long ext4_fallocate(struct file *file, int mode, loff_t offset,

fs/ext4/extents.c

Lines changed: 225 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,12 +2044,23 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
20442044
}
20452045

20462046
/*
2047+
* ext4_ext_in_cache()
2048+
* Checks to see if the given block is in the cache.
2049+
* If it is, the cached extent is stored in the given
2050+
* cache extent pointer. If the cached extent is a hole,
2051+
* this routine should be used instead of
2052+
* ext4_ext_in_cache if the calling function needs to
2053+
* know the size of the hole.
2054+
*
2055+
* @inode: The files inode
2056+
* @block: The block to look for in the cache
2057+
* @ex: Pointer where the cached extent will be stored
2058+
* if it contains block
2059+
*
20472060
* Return 0 if cache is invalid; 1 if the cache is valid
20482061
*/
2049-
static int
2050-
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2051-
struct ext4_extent *ex)
2052-
{
2062+
static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2063+
struct ext4_ext_cache *ex){
20532064
struct ext4_ext_cache *cex;
20542065
struct ext4_sb_info *sbi;
20552066
int ret = 0;
@@ -2066,9 +2077,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
20662077
goto errout;
20672078

20682079
if (in_range(block, cex->ec_block, cex->ec_len)) {
2069-
ex->ee_block = cpu_to_le32(cex->ec_block);
2070-
ext4_ext_store_pblock(ex, cex->ec_start);
2071-
ex->ee_len = cpu_to_le16(cex->ec_len);
2080+
memcpy(ex, cex, sizeof(struct ext4_ext_cache));
20722081
ext_debug("%u cached by %u:%u:%llu\n",
20732082
block,
20742083
cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2083,6 +2092,37 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
20832092
return ret;
20842093
}
20852094

2095+
/*
2096+
* ext4_ext_in_cache()
2097+
* Checks to see if the given block is in the cache.
2098+
* If it is, the cached extent is stored in the given
2099+
* extent pointer.
2100+
*
2101+
* @inode: The files inode
2102+
* @block: The block to look for in the cache
2103+
* @ex: Pointer where the cached extent will be stored
2104+
* if it contains block
2105+
*
2106+
* Return 0 if cache is invalid; 1 if the cache is valid
2107+
*/
2108+
static int
2109+
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2110+
struct ext4_extent *ex)
2111+
{
2112+
struct ext4_ext_cache cex;
2113+
int ret = 0;
2114+
2115+
if (ext4_ext_check_cache(inode, block, &cex)) {
2116+
ex->ee_block = cpu_to_le32(cex.ec_block);
2117+
ext4_ext_store_pblock(ex, cex.ec_start);
2118+
ex->ee_len = cpu_to_le16(cex.ec_len);
2119+
ret = 1;
2120+
}
2121+
2122+
return ret;
2123+
}
2124+
2125+
20862126
/*
20872127
* ext4_ext_rm_idx:
20882128
* removes index from the index block.
@@ -3724,17 +3764,20 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
37243764
struct ext4_map_blocks map;
37253765
unsigned int credits, blkbits = inode->i_blkbits;
37263766

3727-
/* We only support the FALLOC_FL_KEEP_SIZE mode */
3728-
if (mode & ~FALLOC_FL_KEEP_SIZE)
3729-
return -EOPNOTSUPP;
3730-
37313767
/*
37323768
* currently supporting (pre)allocate mode for extent-based
37333769
* files _only_
37343770
*/
37353771
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
37363772
return -EOPNOTSUPP;
37373773

3774+
/* Return error if mode is not supported */
3775+
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
3776+
return -EOPNOTSUPP;
3777+
3778+
if (mode & FALLOC_FL_PUNCH_HOLE)
3779+
return ext4_punch_hole(file, offset, len);
3780+
37383781
trace_ext4_fallocate_enter(inode, offset, len, mode);
37393782
map.m_lblk = offset >> blkbits;
37403783
/*
@@ -4100,6 +4143,177 @@ static int ext4_xattr_fiemap(struct inode *inode,
41004143
return (error < 0 ? error : 0);
41014144
}
41024145

4146+
/*
4147+
* ext4_ext_punch_hole
4148+
*
4149+
* Punches a hole of "length" bytes in a file starting
4150+
* at byte "offset"
4151+
*
4152+
* @inode: The inode of the file to punch a hole in
4153+
* @offset: The starting byte offset of the hole
4154+
* @length: The length of the hole
4155+
*
4156+
* Returns the number of blocks removed or negative on err
4157+
*/
4158+
int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4159+
{
4160+
struct inode *inode = file->f_path.dentry->d_inode;
4161+
struct super_block *sb = inode->i_sb;
4162+
struct ext4_ext_cache cache_ex;
4163+
ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
4164+
struct address_space *mapping = inode->i_mapping;
4165+
struct ext4_map_blocks map;
4166+
handle_t *handle;
4167+
loff_t first_block_offset, last_block_offset, block_len;
4168+
loff_t first_page, last_page, first_page_offset, last_page_offset;
4169+
int ret, credits, blocks_released, err = 0;
4170+
4171+
first_block = (offset + sb->s_blocksize - 1) >>
4172+
EXT4_BLOCK_SIZE_BITS(sb);
4173+
last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4174+
4175+
first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb);
4176+
last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb);
4177+
4178+
first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4179+
last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4180+
4181+
first_page_offset = first_page << PAGE_CACHE_SHIFT;
4182+
last_page_offset = last_page << PAGE_CACHE_SHIFT;
4183+
4184+
/*
4185+
* Write out all dirty pages to avoid race conditions
4186+
* Then release them.
4187+
*/
4188+
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4189+
err = filemap_write_and_wait_range(mapping,
4190+
first_page_offset == 0 ? 0 : first_page_offset-1,
4191+
last_page_offset);
4192+
4193+
if (err)
4194+
return err;
4195+
}
4196+
4197+
/* Now release the pages */
4198+
if (last_page_offset > first_page_offset) {
4199+
truncate_inode_pages_range(mapping, first_page_offset,
4200+
last_page_offset-1);
4201+
}
4202+
4203+
/* finish any pending end_io work */
4204+
ext4_flush_completed_IO(inode);
4205+
4206+
credits = ext4_writepage_trans_blocks(inode);
4207+
handle = ext4_journal_start(inode, credits);
4208+
if (IS_ERR(handle))
4209+
return PTR_ERR(handle);
4210+
4211+
err = ext4_orphan_add(handle, inode);
4212+
if (err)
4213+
goto out;
4214+
4215+
/*
4216+
* Now we need to zero out the un block aligned data.
4217+
* If the file is smaller than a block, just
4218+
* zero out the middle
4219+
*/
4220+
if (first_block > last_block)
4221+
ext4_block_zero_page_range(handle, mapping, offset, length);
4222+
else {
4223+
/* zero out the head of the hole before the first block */
4224+
block_len = first_block_offset - offset;
4225+
if (block_len > 0)
4226+
ext4_block_zero_page_range(handle, mapping,
4227+
offset, block_len);
4228+
4229+
/* zero out the tail of the hole after the last block */
4230+
block_len = offset + length - last_block_offset;
4231+
if (block_len > 0) {
4232+
ext4_block_zero_page_range(handle, mapping,
4233+
last_block_offset, block_len);
4234+
}
4235+
}
4236+
4237+
/* If there are no blocks to remove, return now */
4238+
if (first_block >= last_block)
4239+
goto out;
4240+
4241+
down_write(&EXT4_I(inode)->i_data_sem);
4242+
ext4_ext_invalidate_cache(inode);
4243+
ext4_discard_preallocations(inode);
4244+
4245+
/*
4246+
* Loop over all the blocks and identify blocks
4247+
* that need to be punched out
4248+
*/
4249+
iblock = first_block;
4250+
blocks_released = 0;
4251+
while (iblock < last_block) {
4252+
max_blocks = last_block - iblock;
4253+
num_blocks = 1;
4254+
memset(&map, 0, sizeof(map));
4255+
map.m_lblk = iblock;
4256+
map.m_len = max_blocks;
4257+
ret = ext4_ext_map_blocks(handle, inode, &map,
4258+
EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
4259+
4260+
if (ret > 0) {
4261+
blocks_released += ret;
4262+
num_blocks = ret;
4263+
} else if (ret == 0) {
4264+
/*
4265+
* If map blocks could not find the block,
4266+
* then it is in a hole. If the hole was
4267+
* not already cached, then map blocks should
4268+
* put it in the cache. So we can get the hole
4269+
* out of the cache
4270+
*/
4271+
memset(&cache_ex, 0, sizeof(cache_ex));
4272+
if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) &&
4273+
!cache_ex.ec_start) {
4274+
4275+
/* The hole is cached */
4276+
num_blocks = cache_ex.ec_block +
4277+
cache_ex.ec_len - iblock;
4278+
4279+
} else {
4280+
/* The block could not be identified */
4281+
err = -EIO;
4282+
break;
4283+
}
4284+
} else {
4285+
/* Map blocks error */
4286+
err = ret;
4287+
break;
4288+
}
4289+
4290+
if (num_blocks == 0) {
4291+
/* This condition should never happen */
4292+
ext_debug("Block lookup failed");
4293+
err = -EIO;
4294+
break;
4295+
}
4296+
4297+
iblock += num_blocks;
4298+
}
4299+
4300+
if (blocks_released > 0) {
4301+
ext4_ext_invalidate_cache(inode);
4302+
ext4_discard_preallocations(inode);
4303+
}
4304+
4305+
if (IS_SYNC(inode))
4306+
ext4_handle_sync(handle);
4307+
4308+
up_write(&EXT4_I(inode)->i_data_sem);
4309+
4310+
out:
4311+
ext4_orphan_del(handle, inode);
4312+
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4313+
ext4_mark_inode_dirty(handle, inode);
4314+
ext4_journal_stop(handle);
4315+
return err;
4316+
}
41034317
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
41044318
__u64 start, __u64 len)
41054319
{

fs/ext4/inode.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4418,6 +4418,31 @@ int ext4_can_truncate(struct inode *inode)
44184418
return 0;
44194419
}
44204420

4421+
/*
4422+
* ext4_punch_hole: punches a hole in a file by releaseing the blocks
4423+
* associated with the given offset and length
4424+
*
4425+
* @inode: File inode
4426+
* @offset: The offset where the hole will begin
4427+
* @len: The length of the hole
4428+
*
4429+
* Returns: 0 on sucess or negative on failure
4430+
*/
4431+
4432+
int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
4433+
{
4434+
struct inode *inode = file->f_path.dentry->d_inode;
4435+
if (!S_ISREG(inode->i_mode))
4436+
return -ENOTSUPP;
4437+
4438+
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
4439+
/* TODO: Add support for non extent hole punching */
4440+
return -ENOTSUPP;
4441+
}
4442+
4443+
return ext4_ext_punch_hole(file, offset, length);
4444+
}
4445+
44214446
/*
44224447
* ext4_truncate()
44234448
*

0 commit comments

Comments
 (0)