Skip to content

Commit d4682ba

Browse files
fdmananakdave
authored andcommitted
Btrfs: sync log after logging new name
When we add a new name for an inode which was logged in the current transaction, we update the inode in the log so that its new name and ancestors are added to the log. However when we do this we do not persist the log, so the changes remain in memory only, and as a consequence, any ancestors that were created in the current transaction are updated such that future calls to btrfs_inode_in_log() return true. This leads to a subsequent fsync against such new ancestor directories returning immediately, without persisting the log, therefore after a power failure the new ancestor directories do not exist, despite fsync being called against them explicitly. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/A $ mkdir /mnt/B $ mkdir /mnt/A/C $ touch /mnt/B/foo $ xfs_io -c "fsync" /mnt/B/foo $ ln /mnt/B/foo /mnt/A/C/foo $ xfs_io -c "fsync" /mnt/A <power failure> After the power failure, directory "A" does not exist, despite the explicit fsync on it. Instead of fixing this by changing the behaviour of the explicit fsync on directory "A" to persist the log instead of doing nothing, make the logging of the new file name (which happens when creating a hard link or renaming) persist the log. This approach not only is simpler, not requiring addition of new fields to the inode in memory structure, but also gives us the same behaviour as ext4, xfs and f2fs (possibly other filesystems too). A test case for fstests follows soon. Fixes: 12fcfd2 ("Btrfs: tree logging unlink/rename fixes") Reported-by: Vijay Chidambaram <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 8ecebf4 commit d4682ba

File tree

3 files changed

+131
-19
lines changed

3 files changed

+131
-19
lines changed

fs/btrfs/inode.c

Lines changed: 80 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6634,6 +6634,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
66346634
drop_inode = 1;
66356635
} else {
66366636
struct dentry *parent = dentry->d_parent;
6637+
int ret;
6638+
66376639
err = btrfs_update_inode(trans, root, inode);
66386640
if (err)
66396641
goto fail;
@@ -6647,7 +6649,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
66476649
goto fail;
66486650
}
66496651
d_instantiate(dentry, inode);
6650-
btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
6652+
ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
6653+
true, NULL);
6654+
if (ret == BTRFS_NEED_TRANS_COMMIT) {
6655+
err = btrfs_commit_transaction(trans);
6656+
trans = NULL;
6657+
}
66516658
}
66526659

66536660
fail:
@@ -9386,14 +9393,21 @@ static int btrfs_rename_exchange(struct inode *old_dir,
93869393
u64 new_idx = 0;
93879394
u64 root_objectid;
93889395
int ret;
9389-
int ret2;
93909396
bool root_log_pinned = false;
93919397
bool dest_log_pinned = false;
9398+
struct btrfs_log_ctx ctx_root;
9399+
struct btrfs_log_ctx ctx_dest;
9400+
bool sync_log_root = false;
9401+
bool sync_log_dest = false;
9402+
bool commit_transaction = false;
93929403

93939404
/* we only allow rename subvolume link between subvolumes */
93949405
if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
93959406
return -EXDEV;
93969407

9408+
btrfs_init_log_ctx(&ctx_root, old_inode);
9409+
btrfs_init_log_ctx(&ctx_dest, new_inode);
9410+
93979411
/* close the race window with snapshot create/destroy ioctl */
93989412
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
93999413
down_read(&fs_info->subvol_sem);
@@ -9540,15 +9554,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,
95409554

95419555
if (root_log_pinned) {
95429556
parent = new_dentry->d_parent;
9543-
btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
9544-
parent);
9557+
ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9558+
BTRFS_I(old_dir), parent,
9559+
false, &ctx_root);
9560+
if (ret == BTRFS_NEED_LOG_SYNC)
9561+
sync_log_root = true;
9562+
else if (ret == BTRFS_NEED_TRANS_COMMIT)
9563+
commit_transaction = true;
9564+
ret = 0;
95459565
btrfs_end_log_trans(root);
95469566
root_log_pinned = false;
95479567
}
95489568
if (dest_log_pinned) {
9549-
parent = old_dentry->d_parent;
9550-
btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
9551-
parent);
9569+
if (!commit_transaction) {
9570+
parent = old_dentry->d_parent;
9571+
ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
9572+
BTRFS_I(new_dir), parent,
9573+
false, &ctx_dest);
9574+
if (ret == BTRFS_NEED_LOG_SYNC)
9575+
sync_log_dest = true;
9576+
else if (ret == BTRFS_NEED_TRANS_COMMIT)
9577+
commit_transaction = true;
9578+
ret = 0;
9579+
}
95529580
btrfs_end_log_trans(dest);
95539581
dest_log_pinned = false;
95549582
}
@@ -9581,8 +9609,26 @@ static int btrfs_rename_exchange(struct inode *old_dir,
95819609
dest_log_pinned = false;
95829610
}
95839611
}
9584-
ret2 = btrfs_end_transaction(trans);
9585-
ret = ret ? ret : ret2;
9612+
if (!ret && sync_log_root && !commit_transaction) {
9613+
ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
9614+
&ctx_root);
9615+
if (ret)
9616+
commit_transaction = true;
9617+
}
9618+
if (!ret && sync_log_dest && !commit_transaction) {
9619+
ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
9620+
&ctx_dest);
9621+
if (ret)
9622+
commit_transaction = true;
9623+
}
9624+
if (commit_transaction) {
9625+
ret = btrfs_commit_transaction(trans);
9626+
} else {
9627+
int ret2;
9628+
9629+
ret2 = btrfs_end_transaction(trans);
9630+
ret = ret ? ret : ret2;
9631+
}
95869632
out_notrans:
95879633
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
95889634
up_read(&fs_info->subvol_sem);
@@ -9659,6 +9705,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
96599705
int ret;
96609706
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
96619707
bool log_pinned = false;
9708+
struct btrfs_log_ctx ctx;
9709+
bool sync_log = false;
9710+
bool commit_transaction = false;
96629711

96639712
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
96649713
return -EPERM;
@@ -9816,8 +9865,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
98169865
if (log_pinned) {
98179866
struct dentry *parent = new_dentry->d_parent;
98189867

9819-
btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
9820-
parent);
9868+
btrfs_init_log_ctx(&ctx, old_inode);
9869+
ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9870+
BTRFS_I(old_dir), parent,
9871+
false, &ctx);
9872+
if (ret == BTRFS_NEED_LOG_SYNC)
9873+
sync_log = true;
9874+
else if (ret == BTRFS_NEED_TRANS_COMMIT)
9875+
commit_transaction = true;
9876+
ret = 0;
98219877
btrfs_end_log_trans(root);
98229878
log_pinned = false;
98239879
}
@@ -9854,7 +9910,19 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
98549910
btrfs_end_log_trans(root);
98559911
log_pinned = false;
98569912
}
9857-
btrfs_end_transaction(trans);
9913+
if (!ret && sync_log) {
9914+
ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
9915+
if (ret)
9916+
commit_transaction = true;
9917+
}
9918+
if (commit_transaction) {
9919+
ret = btrfs_commit_transaction(trans);
9920+
} else {
9921+
int ret2;
9922+
9923+
ret2 = btrfs_end_transaction(trans);
9924+
ret = ret ? ret : ret2;
9925+
}
98589926
out_notrans:
98599927
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
98609928
up_read(&fs_info->subvol_sem);

fs/btrfs/tree-log.c

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6025,14 +6025,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
60256025
* Call this after adding a new name for a file and it will properly
60266026
* update the log to reflect the new name.
60276027
*
6028-
* It will return zero if all goes well, and it will return 1 if a
6029-
* full transaction commit is required.
6028+
* @ctx can not be NULL when @sync_log is false, and should be NULL when it's
6029+
* true (because it's not used).
6030+
*
6031+
* Return value depends on whether @sync_log is true or false.
6032+
* When true: returns BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
6033+
* committed by the caller, and BTRFS_DONT_NEED_TRANS_COMMIT
6034+
* otherwise.
6035+
* When false: returns BTRFS_DONT_NEED_LOG_SYNC if the caller does not need to
6036+
* to sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log,
6037+
* or BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
6038+
* committed (without attempting to sync the log).
60306039
*/
60316040
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
60326041
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
6033-
struct dentry *parent)
6042+
struct dentry *parent,
6043+
bool sync_log, struct btrfs_log_ctx *ctx)
60346044
{
60356045
struct btrfs_fs_info *fs_info = trans->fs_info;
6046+
int ret;
60366047

60376048
/*
60386049
* this will force the logging code to walk the dentry chain
@@ -6047,9 +6058,34 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
60476058
*/
60486059
if (inode->logged_trans <= fs_info->last_trans_committed &&
60496060
(!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
6050-
return 0;
6061+
return sync_log ? BTRFS_DONT_NEED_TRANS_COMMIT :
6062+
BTRFS_DONT_NEED_LOG_SYNC;
6063+
6064+
if (sync_log) {
6065+
struct btrfs_log_ctx ctx2;
6066+
6067+
btrfs_init_log_ctx(&ctx2, &inode->vfs_inode);
6068+
ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
6069+
LOG_INODE_EXISTS, &ctx2);
6070+
if (ret == BTRFS_NO_LOG_SYNC)
6071+
return BTRFS_DONT_NEED_TRANS_COMMIT;
6072+
else if (ret)
6073+
return BTRFS_NEED_TRANS_COMMIT;
6074+
6075+
ret = btrfs_sync_log(trans, inode->root, &ctx2);
6076+
if (ret)
6077+
return BTRFS_NEED_TRANS_COMMIT;
6078+
return BTRFS_DONT_NEED_TRANS_COMMIT;
6079+
}
6080+
6081+
ASSERT(ctx);
6082+
ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
6083+
LOG_INODE_EXISTS, ctx);
6084+
if (ret == BTRFS_NO_LOG_SYNC)
6085+
return BTRFS_DONT_NEED_LOG_SYNC;
6086+
else if (ret)
6087+
return BTRFS_NEED_TRANS_COMMIT;
60516088

6052-
return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
6053-
LOG_INODE_EXISTS, NULL);
6089+
return BTRFS_NEED_LOG_SYNC;
60546090
}
60556091

fs/btrfs/tree-log.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,16 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
7171
int for_rename);
7272
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
7373
struct btrfs_inode *dir);
74+
/* Return values for btrfs_log_new_name() */
75+
enum {
76+
BTRFS_DONT_NEED_TRANS_COMMIT,
77+
BTRFS_NEED_TRANS_COMMIT,
78+
BTRFS_DONT_NEED_LOG_SYNC,
79+
BTRFS_NEED_LOG_SYNC,
80+
};
7481
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
7582
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
76-
struct dentry *parent);
83+
struct dentry *parent,
84+
bool sync_log, struct btrfs_log_ctx *ctx);
7785

7886
#endif

0 commit comments

Comments
 (0)