Skip to content

Commit 4f764e5

Browse files
fdmananamasoncl
authored andcommitted
Btrfs: remove deleted xattrs on fsync log replay
If we deleted xattrs from a file and fsynced the file, after a log replay the xattrs would remain associated to the file. This was an unexpected behaviour and differs from what other filesystems do, such as for example xfs and ext3/4. Fix this by, on fsync log replay, check if every xattr in the fs/subvol tree (that belongs to a logged inode) has a matching xattr in the log, and if it does not, delete it from the fs/subvol tree. This is a similar approach to what we do for dentries when we replay a directory from the fsync log. This issue is trivial to reproduce, and the following excerpt from my test for xfstests triggers the issue: _crash_and_mount() { # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey } rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create out test file and add 3 xattrs to it. touch $SCRATCH_MNT/foobar $SETFATTR_PROG -n user.attr1 -v val1 $SCRATCH_MNT/foobar $SETFATTR_PROG -n user.attr2 -v val2 $SCRATCH_MNT/foobar $SETFATTR_PROG -n user.attr3 -v val3 $SCRATCH_MNT/foobar # Make sure everything is durably persisted. sync # Now delete the second xattr and fsync the inode. $SETFATTR_PROG -x user.attr2 $SCRATCH_MNT/foobar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar _crash_and_mount # After the fsync log is replayed, the file should have only 2 xattrs, the ones # named user.attr1 and user.attr3. The btrfs fsync log replay bug left the file # with the 3 xattrs that we had before deleting the second one and fsyncing the # file. echo "xattr names and values after first fsync log replay:" $GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch # Now write some data to our file, fsync it, remove the first xattr, add a new # hard link to our file and commit the fsync log by fsyncing some other new # file. This is to verify that after log replay our first xattr does not exist # anymore. echo "hello world!" >> $SCRATCH_MNT/foobar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar $SETFATTR_PROG -x user.attr1 $SCRATCH_MNT/foobar ln $SCRATCH_MNT/foobar $SCRATCH_MNT/foobar_link touch $SCRATCH_MNT/qwerty $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/qwerty _crash_and_mount # Now only the xattr with name user.attr3 should be set in our file. echo "xattr names and values after second fsync log replay:" $GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch status=0 exit The expected golden output, which is produced with this patch applied or when testing against xfs or ext3/4, is: xattr names and values after first fsync log replay: # file: SCRATCH_MNT/foobar user.attr1="val1" user.attr3="val3" xattr names and values after second fsync log replay: # file: SCRATCH_MNT/foobar user.attr3="val3" Without this patch applied, the output is: xattr names and values after first fsync log replay: # file: SCRATCH_MNT/foobar user.attr1="val1" user.attr2="val2" user.attr3="val3" xattr names and values after second fsync log replay: # file: SCRATCH_MNT/foobar user.attr1="val1" user.attr2="val2" user.attr3="val3" A patch with a test case for xfstests follows soon. Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: Chris Mason <[email protected]>
1 parent 9b30563 commit 4f764e5

File tree

1 file changed

+109
-14
lines changed

1 file changed

+109
-14
lines changed

fs/btrfs/tree-log.c

Lines changed: 109 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,6 +1951,104 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
19511951
return ret;
19521952
}
19531953

1954+
static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
1955+
struct btrfs_root *root,
1956+
struct btrfs_root *log,
1957+
struct btrfs_path *path,
1958+
const u64 ino)
1959+
{
1960+
struct btrfs_key search_key;
1961+
struct btrfs_path *log_path;
1962+
int i;
1963+
int nritems;
1964+
int ret;
1965+
1966+
log_path = btrfs_alloc_path();
1967+
if (!log_path)
1968+
return -ENOMEM;
1969+
1970+
search_key.objectid = ino;
1971+
search_key.type = BTRFS_XATTR_ITEM_KEY;
1972+
search_key.offset = 0;
1973+
again:
1974+
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
1975+
if (ret < 0)
1976+
goto out;
1977+
process_leaf:
1978+
nritems = btrfs_header_nritems(path->nodes[0]);
1979+
for (i = path->slots[0]; i < nritems; i++) {
1980+
struct btrfs_key key;
1981+
struct btrfs_dir_item *di;
1982+
struct btrfs_dir_item *log_di;
1983+
u32 total_size;
1984+
u32 cur;
1985+
1986+
btrfs_item_key_to_cpu(path->nodes[0], &key, i);
1987+
if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
1988+
ret = 0;
1989+
goto out;
1990+
}
1991+
1992+
di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
1993+
total_size = btrfs_item_size_nr(path->nodes[0], i);
1994+
cur = 0;
1995+
while (cur < total_size) {
1996+
u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
1997+
u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
1998+
u32 this_len = sizeof(*di) + name_len + data_len;
1999+
char *name;
2000+
2001+
name = kmalloc(name_len, GFP_NOFS);
2002+
if (!name) {
2003+
ret = -ENOMEM;
2004+
goto out;
2005+
}
2006+
read_extent_buffer(path->nodes[0], name,
2007+
(unsigned long)(di + 1), name_len);
2008+
2009+
log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
2010+
name, name_len, 0);
2011+
btrfs_release_path(log_path);
2012+
if (!log_di) {
2013+
/* Doesn't exist in log tree, so delete it. */
2014+
btrfs_release_path(path);
2015+
di = btrfs_lookup_xattr(trans, root, path, ino,
2016+
name, name_len, -1);
2017+
kfree(name);
2018+
if (IS_ERR(di)) {
2019+
ret = PTR_ERR(di);
2020+
goto out;
2021+
}
2022+
ASSERT(di);
2023+
ret = btrfs_delete_one_dir_name(trans, root,
2024+
path, di);
2025+
if (ret)
2026+
goto out;
2027+
btrfs_release_path(path);
2028+
search_key = key;
2029+
goto again;
2030+
}
2031+
kfree(name);
2032+
if (IS_ERR(log_di)) {
2033+
ret = PTR_ERR(log_di);
2034+
goto out;
2035+
}
2036+
cur += this_len;
2037+
di = (struct btrfs_dir_item *)((char *)di + this_len);
2038+
}
2039+
}
2040+
ret = btrfs_next_leaf(root, path);
2041+
if (ret > 0)
2042+
ret = 0;
2043+
else if (ret == 0)
2044+
goto process_leaf;
2045+
out:
2046+
btrfs_free_path(log_path);
2047+
btrfs_release_path(path);
2048+
return ret;
2049+
}
2050+
2051+
19542052
/*
19552053
* deletion replay happens before we copy any new directory items
19562054
* out of the log or out of backreferences from inodes. It
@@ -2104,6 +2202,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
21042202

21052203
inode_item = btrfs_item_ptr(eb, i,
21062204
struct btrfs_inode_item);
2205+
ret = replay_xattr_deletes(wc->trans, root, log,
2206+
path, key.objectid);
2207+
if (ret)
2208+
break;
21072209
mode = btrfs_inode_mode(eb, inode_item);
21082210
if (S_ISDIR(mode)) {
21092211
ret = replay_dir_deletes(wc->trans,
@@ -4072,10 +4174,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
40724174
if (S_ISDIR(inode->i_mode)) {
40734175
int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
40744176

4075-
if (inode_only == LOG_INODE_EXISTS) {
4076-
max_key_type = BTRFS_INODE_EXTREF_KEY;
4077-
max_key.type = max_key_type;
4078-
}
4177+
if (inode_only == LOG_INODE_EXISTS)
4178+
max_key_type = BTRFS_XATTR_ITEM_KEY;
40794179
ret = drop_objectid_items(trans, log, path, ino, max_key_type);
40804180
} else {
40814181
if (inode_only == LOG_INODE_EXISTS) {
@@ -4100,7 +4200,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
41004200
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
41014201
&BTRFS_I(inode)->runtime_flags)) {
41024202
if (inode_only == LOG_INODE_EXISTS) {
4103-
max_key.type = BTRFS_INODE_EXTREF_KEY;
4203+
max_key.type = BTRFS_XATTR_ITEM_KEY;
41044204
ret = drop_objectid_items(trans, log, path, ino,
41054205
max_key.type);
41064206
} else {
@@ -4111,17 +4211,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
41114211
ret = btrfs_truncate_inode_items(trans, log,
41124212
inode, 0, 0);
41134213
}
4114-
} else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
4115-
&BTRFS_I(inode)->runtime_flags) ||
4214+
} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4215+
&BTRFS_I(inode)->runtime_flags) ||
41164216
inode_only == LOG_INODE_EXISTS) {
4117-
if (inode_only == LOG_INODE_ALL) {
4118-
clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4119-
&BTRFS_I(inode)->runtime_flags);
4217+
if (inode_only == LOG_INODE_ALL)
41204218
fast_search = true;
4121-
max_key.type = BTRFS_XATTR_ITEM_KEY;
4122-
} else {
4123-
max_key.type = BTRFS_INODE_EXTREF_KEY;
4124-
}
4219+
max_key.type = BTRFS_XATTR_ITEM_KEY;
41254220
ret = drop_objectid_items(trans, log, path, ino,
41264221
max_key.type);
41274222
} else {

0 commit comments

Comments
 (0)