Skip to content

Commit 318adf8

Browse files
committed
Merge tag 'for-5.2-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "A few more fixes for bugs reported by users, fuzzing tools and regressions: - fix crashes in relocation: + resuming interrupted balance operation does not properly clean up orphan trees + with enabled qgroups, resuming needs to be more careful about block groups due to limited context when updating qgroups - fsync and logging fixes found by fuzzing - incremental send fixes for no-holes and clone - fix spin lock type used in timer function for zstd" * tag 'for-5.2-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: Btrfs: fix race updating log root item during fsync Btrfs: fix wrong ctime and mtime of a directory after log replay Btrfs: fix fsync not persisting changed attributes of a directory btrfs: qgroup: Check bg while resuming relocation to avoid NULL pointer dereference btrfs: reloc: Also queue orphan reloc tree for cleanup to avoid BUG_ON() Btrfs: incremental send, fix emission of invalid clone operations Btrfs: incremental send, fix file corruption when no-holes feature is enabled btrfs: correct zstd workspace manager lock to use spin_lock_bh() btrfs: Ensure replaced device doesn't have pending chunk allocation
2 parents 8cb7104 + 06989c7 commit 318adf8

File tree

7 files changed

+128
-48
lines changed

7 files changed

+128
-48
lines changed

fs/btrfs/dev-replace.c

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -603,17 +603,33 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
603603
}
604604
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
605605

606-
trans = btrfs_start_transaction(root, 0);
607-
if (IS_ERR(trans)) {
608-
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
609-
return PTR_ERR(trans);
606+
/*
607+
* We have to use this loop approach because at this point src_device
608+
* has to be available for transaction commit to complete, yet new
609+
* chunks shouldn't be allocated on the device.
610+
*/
611+
while (1) {
612+
trans = btrfs_start_transaction(root, 0);
613+
if (IS_ERR(trans)) {
614+
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
615+
return PTR_ERR(trans);
616+
}
617+
ret = btrfs_commit_transaction(trans);
618+
WARN_ON(ret);
619+
620+
/* Prevent write_all_supers() during the finishing procedure */
621+
mutex_lock(&fs_info->fs_devices->device_list_mutex);
622+
/* Prevent new chunks being allocated on the source device */
623+
mutex_lock(&fs_info->chunk_mutex);
624+
625+
if (!list_empty(&src_device->post_commit_list)) {
626+
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
627+
mutex_unlock(&fs_info->chunk_mutex);
628+
} else {
629+
break;
630+
}
610631
}
611-
ret = btrfs_commit_transaction(trans);
612-
WARN_ON(ret);
613632

614-
/* keep away write_all_supers() during the finishing procedure */
615-
mutex_lock(&fs_info->fs_devices->device_list_mutex);
616-
mutex_lock(&fs_info->chunk_mutex);
617633
down_write(&dev_replace->rwsem);
618634
dev_replace->replace_state =
619635
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
@@ -662,7 +678,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
662678
btrfs_device_set_disk_total_bytes(tgt_device,
663679
src_device->disk_total_bytes);
664680
btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
665-
ASSERT(list_empty(&src_device->post_commit_list));
666681
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
667682
tgt_device->commit_bytes_used = src_device->bytes_used;
668683

fs/btrfs/inode.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6433,8 +6433,18 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
64336433
btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
64346434
name_len * 2);
64356435
inode_inc_iversion(&parent_inode->vfs_inode);
6436-
parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime =
6437-
current_time(&parent_inode->vfs_inode);
6436+
/*
6437+
* If we are replaying a log tree, we do not want to update the mtime
6438+
* and ctime of the parent directory with the current time, since the
6439+
* log replay procedure is responsible for setting them to their correct
6440+
* values (the ones it had when the fsync was done).
6441+
*/
6442+
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
6443+
struct timespec64 now = current_time(&parent_inode->vfs_inode);
6444+
6445+
parent_inode->vfs_inode.i_mtime = now;
6446+
parent_inode->vfs_inode.i_ctime = now;
6447+
}
64386448
ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
64396449
if (ret)
64406450
btrfs_abort_transaction(trans, ret);

fs/btrfs/qgroup.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3830,7 +3830,13 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
38303830
subvol_slot);
38313831
block->last_snapshot = last_snapshot;
38323832
block->level = level;
3833-
if (bg->flags & BTRFS_BLOCK_GROUP_DATA)
3833+
3834+
/*
3835+
* If we have bg == NULL, we're called from btrfs_recover_relocation(),
3836+
* no one else can modify tree blocks thus we qgroup will not change
3837+
* no matter the value of trace_leaf.
3838+
*/
3839+
if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA)
38343840
block->trace_leaf = true;
38353841
else
38363842
block->trace_leaf = false;

fs/btrfs/relocation.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2177,22 +2177,30 @@ static int clean_dirty_subvols(struct reloc_control *rc)
21772177
struct btrfs_root *root;
21782178
struct btrfs_root *next;
21792179
int ret = 0;
2180+
int ret2;
21802181

21812182
list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
21822183
reloc_dirty_list) {
2183-
struct btrfs_root *reloc_root = root->reloc_root;
2184+
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2185+
/* Merged subvolume, cleanup its reloc root */
2186+
struct btrfs_root *reloc_root = root->reloc_root;
21842187

2185-
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
2186-
list_del_init(&root->reloc_dirty_list);
2187-
root->reloc_root = NULL;
2188-
if (reloc_root) {
2189-
int ret2;
2188+
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
2189+
list_del_init(&root->reloc_dirty_list);
2190+
root->reloc_root = NULL;
2191+
if (reloc_root) {
21902192

2191-
ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1);
2193+
ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1);
2194+
if (ret2 < 0 && !ret)
2195+
ret = ret2;
2196+
}
2197+
btrfs_put_fs_root(root);
2198+
} else {
2199+
/* Orphan reloc tree, just clean it up */
2200+
ret2 = btrfs_drop_snapshot(root, NULL, 0, 1);
21922201
if (ret2 < 0 && !ret)
21932202
ret = ret2;
21942203
}
2195-
btrfs_put_fs_root(root);
21962204
}
21972205
return ret;
21982206
}
@@ -2480,6 +2488,9 @@ void merge_reloc_roots(struct reloc_control *rc)
24802488
}
24812489
} else {
24822490
list_del_init(&reloc_root->root_list);
2491+
/* Don't forget to queue this reloc root for cleanup */
2492+
list_add_tail(&reloc_root->reloc_dirty_list,
2493+
&rc->dirty_subvol_roots);
24832494
}
24842495
}
24852496

fs/btrfs/send.c

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4999,6 +4999,12 @@ static int send_hole(struct send_ctx *sctx, u64 end)
49994999
if (offset >= sctx->cur_inode_size)
50005000
return 0;
50015001

5002+
/*
5003+
* Don't go beyond the inode's i_size due to prealloc extents that start
5004+
* after the i_size.
5005+
*/
5006+
end = min_t(u64, end, sctx->cur_inode_size);
5007+
50025008
if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
50035009
return send_update_extent(sctx, offset, end - offset);
50045010

@@ -5218,10 +5224,50 @@ static int clone_range(struct send_ctx *sctx,
52185224
clone_len = min_t(u64, ext_len, len);
52195225

52205226
if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
5221-
clone_data_offset == data_offset)
5222-
ret = send_clone(sctx, offset, clone_len, clone_root);
5223-
else
5227+
clone_data_offset == data_offset) {
5228+
const u64 src_end = clone_root->offset + clone_len;
5229+
const u64 sectorsize = SZ_64K;
5230+
5231+
/*
5232+
* We can't clone the last block, when its size is not
5233+
* sector size aligned, into the middle of a file. If we
5234+
* do so, the receiver will get a failure (-EINVAL) when
5235+
* trying to clone or will silently corrupt the data in
5236+
* the destination file if it's on a kernel without the
5237+
* fix introduced by commit ac765f83f1397646
5238+
* ("Btrfs: fix data corruption due to cloning of eof
5239+
* block).
5240+
*
5241+
* So issue a clone of the aligned down range plus a
5242+
* regular write for the eof block, if we hit that case.
5243+
*
5244+
* Also, we use the maximum possible sector size, 64K,
5245+
* because we don't know what's the sector size of the
5246+
* filesystem that receives the stream, so we have to
5247+
* assume the largest possible sector size.
5248+
*/
5249+
if (src_end == clone_src_i_size &&
5250+
!IS_ALIGNED(src_end, sectorsize) &&
5251+
offset + clone_len < sctx->cur_inode_size) {
5252+
u64 slen;
5253+
5254+
slen = ALIGN_DOWN(src_end - clone_root->offset,
5255+
sectorsize);
5256+
if (slen > 0) {
5257+
ret = send_clone(sctx, offset, slen,
5258+
clone_root);
5259+
if (ret < 0)
5260+
goto out;
5261+
}
5262+
ret = send_extent_data(sctx, offset + slen,
5263+
clone_len - slen);
5264+
} else {
5265+
ret = send_clone(sctx, offset, clone_len,
5266+
clone_root);
5267+
}
5268+
} else {
52245269
ret = send_extent_data(sctx, offset, clone_len);
5270+
}
52255271

52265272
if (ret < 0)
52275273
goto out;

fs/btrfs/tree-log.c

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3109,6 +3109,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
31093109
root->log_transid++;
31103110
log->log_transid = root->log_transid;
31113111
root->log_start_pid = 0;
3112+
/*
3113+
* Update or create log root item under the root's log_mutex to prevent
3114+
* races with concurrent log syncs that can lead to failure to update
3115+
* log root item because it was not created yet.
3116+
*/
3117+
ret = update_log_root(trans, log);
31123118
/*
31133119
* IO has been started, blocks of the log tree have WRITTEN flag set
31143120
* in their headers. new modifications of the log will be written to
@@ -3128,8 +3134,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
31283134

31293135
mutex_unlock(&log_root_tree->log_mutex);
31303136

3131-
ret = update_log_root(trans, log);
3132-
31333137
mutex_lock(&log_root_tree->log_mutex);
31343138
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
31353139
/* atomic_dec_and_test implies a barrier */
@@ -5478,7 +5482,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
54785482
{
54795483
int ret = 0;
54805484
struct dentry *old_parent = NULL;
5481-
struct btrfs_inode *orig_inode = inode;
54825485

54835486
/*
54845487
* for regular files, if its inode is already on disk, we don't
@@ -5498,16 +5501,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
54985501
}
54995502

55005503
while (1) {
5501-
/*
5502-
* If we are logging a directory then we start with our inode,
5503-
* not our parent's inode, so we need to skip setting the
5504-
* logged_trans so that further down in the log code we don't
5505-
* think this inode has already been logged.
5506-
*/
5507-
if (inode != orig_inode)
5508-
inode->logged_trans = trans->transid;
5509-
smp_mb();
5510-
55115504
if (btrfs_must_commit_transaction(trans, inode)) {
55125505
ret = 1;
55135506
break;
@@ -6384,7 +6377,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
63846377
* if this directory was already logged any new
63856378
* names for this file/dir will get recorded
63866379
*/
6387-
smp_mb();
63886380
if (dir->logged_trans == trans->transid)
63896381
return;
63906382

fs/btrfs/zstd.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,10 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
105105
unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
106106
struct list_head *pos, *next;
107107

108-
spin_lock(&wsm.lock);
108+
spin_lock_bh(&wsm.lock);
109109

110110
if (list_empty(&wsm.lru_list)) {
111-
spin_unlock(&wsm.lock);
111+
spin_unlock_bh(&wsm.lock);
112112
return;
113113
}
114114

@@ -137,7 +137,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
137137
if (!list_empty(&wsm.lru_list))
138138
mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
139139

140-
spin_unlock(&wsm.lock);
140+
spin_unlock_bh(&wsm.lock);
141141
}
142142

143143
/*
@@ -198,7 +198,7 @@ static void zstd_cleanup_workspace_manager(void)
198198
struct workspace *workspace;
199199
int i;
200200

201-
spin_lock(&wsm.lock);
201+
spin_lock_bh(&wsm.lock);
202202
for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
203203
while (!list_empty(&wsm.idle_ws[i])) {
204204
workspace = container_of(wsm.idle_ws[i].next,
@@ -208,7 +208,7 @@ static void zstd_cleanup_workspace_manager(void)
208208
zstd_free_workspace(&workspace->list);
209209
}
210210
}
211-
spin_unlock(&wsm.lock);
211+
spin_unlock_bh(&wsm.lock);
212212

213213
del_timer_sync(&wsm.timer);
214214
}
@@ -230,7 +230,7 @@ static struct list_head *zstd_find_workspace(unsigned int level)
230230
struct workspace *workspace;
231231
int i = level - 1;
232232

233-
spin_lock(&wsm.lock);
233+
spin_lock_bh(&wsm.lock);
234234
for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
235235
if (!list_empty(&wsm.idle_ws[i])) {
236236
ws = wsm.idle_ws[i].next;
@@ -242,11 +242,11 @@ static struct list_head *zstd_find_workspace(unsigned int level)
242242
list_del(&workspace->lru_list);
243243
if (list_empty(&wsm.idle_ws[i]))
244244
clear_bit(i, &wsm.active_map);
245-
spin_unlock(&wsm.lock);
245+
spin_unlock_bh(&wsm.lock);
246246
return ws;
247247
}
248248
}
249-
spin_unlock(&wsm.lock);
249+
spin_unlock_bh(&wsm.lock);
250250

251251
return NULL;
252252
}
@@ -305,7 +305,7 @@ static void zstd_put_workspace(struct list_head *ws)
305305
{
306306
struct workspace *workspace = list_to_workspace(ws);
307307

308-
spin_lock(&wsm.lock);
308+
spin_lock_bh(&wsm.lock);
309309

310310
/* A node is only taken off the lru if we are the corresponding level */
311311
if (workspace->req_level == workspace->level) {
@@ -325,7 +325,7 @@ static void zstd_put_workspace(struct list_head *ws)
325325
list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
326326
workspace->req_level = 0;
327327

328-
spin_unlock(&wsm.lock);
328+
spin_unlock_bh(&wsm.lock);
329329

330330
if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
331331
cond_wake_up(&wsm.wait);

0 commit comments

Comments
 (0)