Skip to content

Commit dec214d

Browse files
Tahsin Erdogantytso
authored andcommitted
ext4: xattr inode deduplication
Ext4 now supports xattr values that are up to 64k in size (vfs limit). Large xattr values are stored in external inodes each one holding a single value. Once written the data blocks of these inodes are immutable. The real world use cases are expected to have a lot of value duplication such as inherited acls etc. To reduce data duplication on disk, this patch implements a deduplicator that allows sharing of xattr inodes. The deduplication is based on an in-memory hash lookup that is a best effort sharing scheme. When a xattr inode is read from disk (i.e. getxattr() call), its crc32c hash is added to a hash table. Before creating a new xattr inode for a value being set, the hash table is checked to see if an existing inode holds an identical value. If such an inode is found, the ref count on that inode is incremented. On value removal the ref count is decremented and if it reaches zero the inode is deleted. The quota charging for such inodes is manually managed. Every reference holder is charged the full size as if there was no sharing happening. This is consistent with how xattr blocks are also charged. [ Fixed up journal credits calculation to handle inline data and the rare case where an shared xattr block can get freed when two thread race on breaking the xattr block sharing. --tytso ] Signed-off-by: Tahsin Erdogan <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 30a7eb9 commit dec214d

File tree

7 files changed

+862
-294
lines changed

7 files changed

+862
-294
lines changed

fs/ext4/acl.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,10 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
238238
if (error)
239239
return error;
240240
retry:
241-
credits = ext4_xattr_set_credits(inode, acl_size);
241+
error = ext4_xattr_set_credits(inode, acl_size, &credits);
242+
if (error)
243+
return error;
244+
242245
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
243246
if (IS_ERR(handle))
244247
return PTR_ERR(handle);

fs/ext4/ext4.h

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,6 +1517,7 @@ struct ext4_sb_info {
15171517
long s_es_nr_inode;
15181518
struct ext4_es_stats s_es_stats;
15191519
struct mb_cache *s_ea_block_cache;
1520+
struct mb_cache *s_ea_inode_cache;
15201521
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
15211522

15221523
/* Ratelimit ext4 messages. */
@@ -2100,7 +2101,11 @@ static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
21002101
return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
21012102
}
21022103

2103-
#define ext4_is_quota_file(inode) IS_NOQUOTA(inode)
2104+
static inline bool ext4_is_quota_file(struct inode *inode)
2105+
{
2106+
return IS_NOQUOTA(inode) &&
2107+
!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL);
2108+
}
21042109

21052110
/*
21062111
* This structure is stuffed into the struct file's private_data field
@@ -2493,7 +2498,6 @@ extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
24932498
extern void ext4_set_inode_flags(struct inode *);
24942499
extern int ext4_alloc_da_blocks(struct inode *inode);
24952500
extern void ext4_set_aops(struct inode *inode);
2496-
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk);
24972501
extern int ext4_writepage_trans_blocks(struct inode *);
24982502
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
24992503
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
@@ -2720,19 +2724,20 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
27202724
extern int ext4_register_li_request(struct super_block *sb,
27212725
ext4_group_t first_not_zeroed);
27222726

2723-
static inline int ext4_has_group_desc_csum(struct super_block *sb)
2724-
{
2725-
return ext4_has_feature_gdt_csum(sb) ||
2726-
EXT4_SB(sb)->s_chksum_driver != NULL;
2727-
}
2728-
27292727
static inline int ext4_has_metadata_csum(struct super_block *sb)
27302728
{
27312729
WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
27322730
!EXT4_SB(sb)->s_chksum_driver);
27332731

2734-
return (EXT4_SB(sb)->s_chksum_driver != NULL);
2732+
return ext4_has_feature_metadata_csum(sb) &&
2733+
(EXT4_SB(sb)->s_chksum_driver != NULL);
27352734
}
2735+
2736+
static inline int ext4_has_group_desc_csum(struct super_block *sb)
2737+
{
2738+
return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
2739+
}
2740+
27362741
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
27372742
{
27382743
return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |

fs/ext4/inode.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset,
139139
unsigned int length);
140140
static int __ext4_journalled_writepage(struct page *page, unsigned int len);
141141
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
142+
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
143+
int pextents);
142144

143145
/*
144146
* Test whether an inode is a fast symlink.
@@ -4843,8 +4845,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
48434845
}
48444846
brelse(iloc.bh);
48454847
ext4_set_inode_flags(inode);
4846-
if (ei->i_flags & EXT4_EA_INODE_FL)
4848+
4849+
if (ei->i_flags & EXT4_EA_INODE_FL) {
48474850
ext4_xattr_inode_set_class(inode);
4851+
4852+
inode_lock(inode);
4853+
inode->i_flags |= S_NOQUOTA;
4854+
inode_unlock(inode);
4855+
}
4856+
48484857
unlock_new_inode(inode);
48494858
return inode;
48504859

@@ -5503,7 +5512,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
55035512
*
55045513
* Also account for superblock, inode, quota and xattr blocks
55055514
*/
5506-
int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
5515+
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
55075516
int pextents)
55085517
{
55095518
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);

fs/ext4/super.c

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,10 @@ static void ext4_put_super(struct super_block *sb)
927927
invalidate_bdev(sbi->journal_bdev);
928928
ext4_blkdev_remove(sbi);
929929
}
930+
if (sbi->s_ea_inode_cache) {
931+
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
932+
sbi->s_ea_inode_cache = NULL;
933+
}
930934
if (sbi->s_ea_block_cache) {
931935
ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
932936
sbi->s_ea_block_cache = NULL;
@@ -1178,7 +1182,10 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
11781182
if (res)
11791183
return res;
11801184
retry:
1181-
credits = ext4_xattr_set_credits(inode, len);
1185+
res = ext4_xattr_set_credits(inode, len, &credits);
1186+
if (res)
1187+
return res;
1188+
11821189
handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
11831190
if (IS_ERR(handle))
11841191
return PTR_ERR(handle);
@@ -3445,7 +3452,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
34453452
}
34463453

34473454
/* Load the checksum driver */
3448-
if (ext4_has_feature_metadata_csum(sb)) {
3455+
if (ext4_has_feature_metadata_csum(sb) ||
3456+
ext4_has_feature_ea_inode(sb)) {
34493457
sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
34503458
if (IS_ERR(sbi->s_chksum_driver)) {
34513459
ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
@@ -3467,7 +3475,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
34673475
/* Precompute checksum seed for all metadata */
34683476
if (ext4_has_feature_csum_seed(sb))
34693477
sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
3470-
else if (ext4_has_metadata_csum(sb))
3478+
else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
34713479
sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
34723480
sizeof(es->s_uuid));
34733481

@@ -3597,6 +3605,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
35973605
"The Hurd can't support 64-bit file systems");
35983606
goto failed_mount;
35993607
}
3608+
3609+
/*
3610+
* ea_inode feature uses l_i_version field which is not
3611+
* available in HURD_COMPAT mode.
3612+
*/
3613+
if (ext4_has_feature_ea_inode(sb)) {
3614+
ext4_msg(sb, KERN_ERR,
3615+
"ea_inode feature is not supported for Hurd");
3616+
goto failed_mount;
3617+
}
36003618
}
36013619

36023620
if (IS_EXT2_SB(sb)) {
@@ -4067,6 +4085,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
40674085
goto failed_mount_wq;
40684086
}
40694087

4088+
if (ext4_has_feature_ea_inode(sb)) {
4089+
sbi->s_ea_inode_cache = ext4_xattr_create_cache();
4090+
if (!sbi->s_ea_inode_cache) {
4091+
ext4_msg(sb, KERN_ERR,
4092+
"Failed to create ea_inode_cache");
4093+
goto failed_mount_wq;
4094+
}
4095+
}
4096+
40704097
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
40714098
(blocksize != PAGE_SIZE)) {
40724099
ext4_msg(sb, KERN_ERR,
@@ -4296,6 +4323,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
42964323
if (EXT4_SB(sb)->rsv_conversion_wq)
42974324
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
42984325
failed_mount_wq:
4326+
if (sbi->s_ea_inode_cache) {
4327+
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
4328+
sbi->s_ea_inode_cache = NULL;
4329+
}
42994330
if (sbi->s_ea_block_cache) {
43004331
ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
43014332
sbi->s_ea_block_cache = NULL;

0 commit comments

Comments
 (0)