Skip to content

Commit cdd1fed

Browse files
Dan Fuhryfdmanana
authored andcommitted
btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT
Two new flags, RENAME_EXCHANGE and RENAME_WHITEOUT, provide for new behavior in the renameat2() syscall. This behavior is primarily used by overlayfs. This patch adds support for these flags to btrfs, enabling it to be used as a fully functional upper layer for overlayfs. RENAME_EXCHANGE support was written by Davide Italiano originally submitted on 2 April 2015. Signed-off-by: Davide Italiano <[email protected]> Signed-off-by: Dan Fuhry <[email protected]> [ remove unlikely ] Signed-off-by: David Sterba <[email protected]> Signed-off-by: Filipe Manana <[email protected]>
1 parent c4aba95 commit cdd1fed

File tree

1 file changed

+257
-7
lines changed

1 file changed

+257
-7
lines changed

fs/btrfs/inode.c

Lines changed: 257 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9394,8 +9394,244 @@ static int btrfs_getattr(struct vfsmount *mnt,
93949394
return 0;
93959395
}
93969396

9397+
static int btrfs_rename_exchange(struct inode *old_dir,
9398+
struct dentry *old_dentry,
9399+
struct inode *new_dir,
9400+
struct dentry *new_dentry)
9401+
{
9402+
struct btrfs_trans_handle *trans;
9403+
struct btrfs_root *root = BTRFS_I(old_dir)->root;
9404+
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9405+
struct inode *new_inode = new_dentry->d_inode;
9406+
struct inode *old_inode = old_dentry->d_inode;
9407+
struct timespec ctime = CURRENT_TIME;
9408+
struct dentry *parent;
9409+
u64 old_ino = btrfs_ino(old_inode);
9410+
u64 new_ino = btrfs_ino(new_inode);
9411+
u64 old_idx = 0;
9412+
u64 new_idx = 0;
9413+
u64 root_objectid;
9414+
int ret;
9415+
9416+
/* we only allow rename subvolume link between subvolumes */
9417+
if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9418+
return -EXDEV;
9419+
9420+
/* close the race window with snapshot create/destroy ioctl */
9421+
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9422+
down_read(&root->fs_info->subvol_sem);
9423+
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9424+
down_read(&dest->fs_info->subvol_sem);
9425+
9426+
/*
9427+
* We want to reserve the absolute worst case amount of items. So if
9428+
* both inodes are subvols and we need to unlink them then that would
9429+
* require 4 item modifications, but if they are both normal inodes it
9430+
* would require 5 item modifications, so we'll assume their normal
9431+
* inodes. So 5 * 2 is 10, plus 2 for the new links, so 12 total items
9432+
* should cover the worst case number of items we'll modify.
9433+
*/
9434+
trans = btrfs_start_transaction(root, 12);
9435+
if (IS_ERR(trans)) {
9436+
ret = PTR_ERR(trans);
9437+
goto out_notrans;
9438+
}
9439+
9440+
/*
9441+
* We need to find a free sequence number both in the source and
9442+
* in the destination directory for the exchange.
9443+
*/
9444+
ret = btrfs_set_inode_index(new_dir, &old_idx);
9445+
if (ret)
9446+
goto out_fail;
9447+
ret = btrfs_set_inode_index(old_dir, &new_idx);
9448+
if (ret)
9449+
goto out_fail;
9450+
9451+
BTRFS_I(old_inode)->dir_index = 0ULL;
9452+
BTRFS_I(new_inode)->dir_index = 0ULL;
9453+
9454+
/* Reference for the source. */
9455+
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9456+
/* force full log commit if subvolume involved. */
9457+
btrfs_set_log_full_commit(root->fs_info, trans);
9458+
} else {
9459+
ret = btrfs_insert_inode_ref(trans, dest,
9460+
new_dentry->d_name.name,
9461+
new_dentry->d_name.len,
9462+
old_ino,
9463+
btrfs_ino(new_dir), old_idx);
9464+
if (ret)
9465+
goto out_fail;
9466+
btrfs_pin_log_trans(root);
9467+
}
9468+
9469+
/* And now for the dest. */
9470+
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9471+
/* force full log commit if subvolume involved. */
9472+
btrfs_set_log_full_commit(dest->fs_info, trans);
9473+
} else {
9474+
ret = btrfs_insert_inode_ref(trans, root,
9475+
old_dentry->d_name.name,
9476+
old_dentry->d_name.len,
9477+
new_ino,
9478+
btrfs_ino(old_dir), new_idx);
9479+
if (ret)
9480+
goto out_fail;
9481+
btrfs_pin_log_trans(dest);
9482+
}
9483+
9484+
/* Update inode version and ctime/mtime. */
9485+
inode_inc_iversion(old_dir);
9486+
inode_inc_iversion(new_dir);
9487+
inode_inc_iversion(old_inode);
9488+
inode_inc_iversion(new_inode);
9489+
old_dir->i_ctime = old_dir->i_mtime = ctime;
9490+
new_dir->i_ctime = new_dir->i_mtime = ctime;
9491+
old_inode->i_ctime = ctime;
9492+
new_inode->i_ctime = ctime;
9493+
9494+
if (old_dentry->d_parent != new_dentry->d_parent) {
9495+
btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
9496+
btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
9497+
}
9498+
9499+
/* src is a subvolume */
9500+
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9501+
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9502+
ret = btrfs_unlink_subvol(trans, root, old_dir,
9503+
root_objectid,
9504+
old_dentry->d_name.name,
9505+
old_dentry->d_name.len);
9506+
} else { /* src is an inode */
9507+
ret = __btrfs_unlink_inode(trans, root, old_dir,
9508+
old_dentry->d_inode,
9509+
old_dentry->d_name.name,
9510+
old_dentry->d_name.len);
9511+
if (!ret)
9512+
ret = btrfs_update_inode(trans, root, old_inode);
9513+
}
9514+
if (ret) {
9515+
btrfs_abort_transaction(trans, root, ret);
9516+
goto out_fail;
9517+
}
9518+
9519+
/* dest is a subvolume */
9520+
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9521+
root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
9522+
ret = btrfs_unlink_subvol(trans, dest, new_dir,
9523+
root_objectid,
9524+
new_dentry->d_name.name,
9525+
new_dentry->d_name.len);
9526+
} else { /* dest is an inode */
9527+
ret = __btrfs_unlink_inode(trans, dest, new_dir,
9528+
new_dentry->d_inode,
9529+
new_dentry->d_name.name,
9530+
new_dentry->d_name.len);
9531+
if (!ret)
9532+
ret = btrfs_update_inode(trans, dest, new_inode);
9533+
}
9534+
if (ret) {
9535+
btrfs_abort_transaction(trans, root, ret);
9536+
goto out_fail;
9537+
}
9538+
9539+
ret = btrfs_add_link(trans, new_dir, old_inode,
9540+
new_dentry->d_name.name,
9541+
new_dentry->d_name.len, 0, old_idx);
9542+
if (ret) {
9543+
btrfs_abort_transaction(trans, root, ret);
9544+
goto out_fail;
9545+
}
9546+
9547+
ret = btrfs_add_link(trans, old_dir, new_inode,
9548+
old_dentry->d_name.name,
9549+
old_dentry->d_name.len, 0, new_idx);
9550+
if (ret) {
9551+
btrfs_abort_transaction(trans, root, ret);
9552+
goto out_fail;
9553+
}
9554+
9555+
if (old_inode->i_nlink == 1)
9556+
BTRFS_I(old_inode)->dir_index = old_idx;
9557+
if (new_inode->i_nlink == 1)
9558+
BTRFS_I(new_inode)->dir_index = new_idx;
9559+
9560+
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
9561+
parent = new_dentry->d_parent;
9562+
btrfs_log_new_name(trans, old_inode, old_dir, parent);
9563+
btrfs_end_log_trans(root);
9564+
}
9565+
if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
9566+
parent = old_dentry->d_parent;
9567+
btrfs_log_new_name(trans, new_inode, new_dir, parent);
9568+
btrfs_end_log_trans(dest);
9569+
}
9570+
out_fail:
9571+
ret = btrfs_end_transaction(trans, root);
9572+
out_notrans:
9573+
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9574+
up_read(&dest->fs_info->subvol_sem);
9575+
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9576+
up_read(&root->fs_info->subvol_sem);
9577+
9578+
return ret;
9579+
}
9580+
9581+
static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
9582+
struct btrfs_root *root,
9583+
struct inode *dir,
9584+
struct dentry *dentry)
9585+
{
9586+
int ret;
9587+
struct inode *inode;
9588+
u64 objectid;
9589+
u64 index;
9590+
9591+
ret = btrfs_find_free_ino(root, &objectid);
9592+
if (ret)
9593+
return ret;
9594+
9595+
inode = btrfs_new_inode(trans, root, dir,
9596+
dentry->d_name.name,
9597+
dentry->d_name.len,
9598+
btrfs_ino(dir),
9599+
objectid,
9600+
S_IFCHR | WHITEOUT_MODE,
9601+
&index);
9602+
9603+
if (IS_ERR(inode)) {
9604+
ret = PTR_ERR(inode);
9605+
return ret;
9606+
}
9607+
9608+
inode->i_op = &btrfs_special_inode_operations;
9609+
init_special_inode(inode, inode->i_mode,
9610+
WHITEOUT_DEV);
9611+
9612+
ret = btrfs_init_inode_security(trans, inode, dir,
9613+
&dentry->d_name);
9614+
if (ret)
9615+
return ret;
9616+
9617+
ret = btrfs_add_nondir(trans, dir, dentry,
9618+
inode, 0, index);
9619+
if (ret)
9620+
return ret;
9621+
9622+
ret = btrfs_update_inode(trans, root, inode);
9623+
if (ret)
9624+
return ret;
9625+
9626+
unlock_new_inode(inode);
9627+
iput(inode);
9628+
9629+
return 0;
9630+
}
9631+
93979632
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9398-
struct inode *new_dir, struct dentry *new_dentry)
9633+
struct inode *new_dir, struct dentry *new_dentry,
9634+
unsigned int flags)
93999635
{
94009636
struct btrfs_trans_handle *trans;
94019637
struct btrfs_root *root = BTRFS_I(old_dir)->root;
@@ -9457,15 +9693,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
94579693
* We want to reserve the absolute worst case amount of items. So if
94589694
* both inodes are subvols and we need to unlink them then that would
94599695
* require 4 item modifications, but if they are both normal inodes it
9460-
* would require 5 item modifications, so we'll assume their normal
9696+
* would require 5 item modifications, so we'll assume they are normal
94619697
* inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
94629698
* should cover the worst case number of items we'll modify.
94639699
*/
94649700
trans = btrfs_start_transaction(root, 11);
94659701
if (IS_ERR(trans)) {
9466-
ret = PTR_ERR(trans);
9467-
goto out_notrans;
9468-
}
9702+
ret = PTR_ERR(trans);
9703+
goto out_notrans;
9704+
}
94699705

94709706
if (dest != root)
94719707
btrfs_record_root_in_trans(trans, dest);
@@ -9561,6 +9797,16 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
95619797
btrfs_end_log_trans(root);
95629798
log_pinned = false;
95639799
}
9800+
9801+
if (flags & RENAME_WHITEOUT) {
9802+
ret = btrfs_whiteout_for_rename(trans, root, old_dir,
9803+
old_dentry);
9804+
9805+
if (ret) {
9806+
btrfs_abort_transaction(trans, root, ret);
9807+
goto out_fail;
9808+
}
9809+
}
95649810
out_fail:
95659811
/*
95669812
* If we have pinned the log and an error happened, we unpin tasks
@@ -9596,10 +9842,14 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
95969842
struct inode *new_dir, struct dentry *new_dentry,
95979843
unsigned int flags)
95989844
{
9599-
if (flags & ~RENAME_NOREPLACE)
9845+
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
96009846
return -EINVAL;
96019847

9602-
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
9848+
if (flags & RENAME_EXCHANGE)
9849+
return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
9850+
new_dentry);
9851+
9852+
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
96039853
}
96049854

96059855
static void btrfs_run_delalloc_work(struct btrfs_work *work)

0 commit comments

Comments
 (0)