Skip to content

Commit 499f377

Browse files
jeffmahoneymasoncl
authored andcommitted
btrfs: iterate over unused chunk space in FITRIM
Since we now clean up block groups automatically as they become empty, iterating over block groups is no longer sufficient to discard unused space. This patch iterates over the unused chunk space and discards any regions that are unallocated, regardless of whether they were ever used. This is a change for btrfs but is consistent with other file systems. We do this in a transactionless manner since the discard process can take a substantial amount of time and a transaction would need to be started before the acquisition of the device list lock. That would mean a transaction would be held open across /all/ of the discards collectively. In order to prevent other threads from allocating or freeing chunks, we hold the chunks lock across the search and discard calls. We release it between searches to allow the file system to perform more-or-less normally. Since the running transaction can commit and disappear while we're using the transaction pointer, we take a reference to it and release it after the search. This is safe since it would happen normally at the end of the transaction commit after any locks are released anyway. We also take the commit_root_sem to protect against a transaction starting and committing while we're running. Signed-off-by: Jeff Mahoney <[email protected]> Reviewed-by: Filipe Manana <[email protected]> Tested-by: Filipe Manana <[email protected]> Signed-off-by: Chris Mason <[email protected]>
1 parent 8655786 commit 499f377

File tree

3 files changed

+143
-24
lines changed

3 files changed

+143
-24
lines changed

fs/btrfs/extent-tree.c

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10135,10 +10135,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
1013510135
return unpin_extent_range(root, start, end, false);
1013610136
}
1013710137

10138+
/*
10139+
* It used to be that old block groups would be left around forever.
10140+
* Iterating over them would be enough to trim unused space. Since we
10141+
* now automatically remove them, we also need to iterate over unallocated
10142+
* space.
10143+
*
10144+
* We don't want a transaction for this since the discard may take a
10145+
* substantial amount of time. We don't require that a transaction be
10146+
* running, but we do need to take a running transaction into account
10147+
* to ensure that we're not discarding chunks that were released in
10148+
* the current transaction.
10149+
*
10150+
* Holding the chunks lock will prevent other threads from allocating
10151+
* or releasing chunks, but it won't prevent a running transaction
10152+
* from committing and releasing the memory that the pending chunks
10153+
* list head uses. For that, we need to take a reference to the
10154+
* transaction.
10155+
*/
10156+
static int btrfs_trim_free_extents(struct btrfs_device *device,
10157+
u64 minlen, u64 *trimmed)
10158+
{
10159+
u64 start = 0, len = 0;
10160+
int ret;
10161+
10162+
*trimmed = 0;
10163+
10164+
/* Not writeable = nothing to do. */
10165+
if (!device->writeable)
10166+
return 0;
10167+
10168+
/* No free space = nothing to do. */
10169+
if (device->total_bytes <= device->bytes_used)
10170+
return 0;
10171+
10172+
ret = 0;
10173+
10174+
while (1) {
10175+
struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
10176+
struct btrfs_transaction *trans;
10177+
u64 bytes;
10178+
10179+
ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
10180+
if (ret)
10181+
return ret;
10182+
10183+
down_read(&fs_info->commit_root_sem);
10184+
10185+
spin_lock(&fs_info->trans_lock);
10186+
trans = fs_info->running_transaction;
10187+
if (trans)
10188+
atomic_inc(&trans->use_count);
10189+
spin_unlock(&fs_info->trans_lock);
10190+
10191+
ret = find_free_dev_extent_start(trans, device, minlen, start,
10192+
&start, &len);
10193+
if (trans)
10194+
btrfs_put_transaction(trans);
10195+
10196+
if (ret) {
10197+
up_read(&fs_info->commit_root_sem);
10198+
mutex_unlock(&fs_info->chunk_mutex);
10199+
if (ret == -ENOSPC)
10200+
ret = 0;
10201+
break;
10202+
}
10203+
10204+
ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
10205+
up_read(&fs_info->commit_root_sem);
10206+
mutex_unlock(&fs_info->chunk_mutex);
10207+
10208+
if (ret)
10209+
break;
10210+
10211+
start += len;
10212+
*trimmed += bytes;
10213+
10214+
if (fatal_signal_pending(current)) {
10215+
ret = -ERESTARTSYS;
10216+
break;
10217+
}
10218+
10219+
cond_resched();
10220+
}
10221+
10222+
return ret;
10223+
}
10224+
1013810225
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
1013910226
{
1014010227
struct btrfs_fs_info *fs_info = root->fs_info;
1014110228
struct btrfs_block_group_cache *cache = NULL;
10229+
struct btrfs_device *device;
10230+
struct list_head *devices;
1014210231
u64 group_trimmed;
1014310232
u64 start;
1014410233
u64 end;
@@ -10193,6 +10282,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
1019310282
cache = next_block_group(fs_info->tree_root, cache);
1019410283
}
1019510284

10285+
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
10286+
devices = &root->fs_info->fs_devices->alloc_list;
10287+
list_for_each_entry(device, devices, dev_alloc_list) {
10288+
ret = btrfs_trim_free_extents(device, range->minlen,
10289+
&group_trimmed);
10290+
if (ret)
10291+
break;
10292+
10293+
trimmed += group_trimmed;
10294+
}
10295+
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
10296+
1019610297
range->len = trimmed;
1019710298
return ret;
1019810299
}

fs/btrfs/volumes.c

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,15 +1116,18 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
11161116
return ret;
11171117
}
11181118

1119-
static int contains_pending_extent(struct btrfs_trans_handle *trans,
1119+
static int contains_pending_extent(struct btrfs_transaction *transaction,
11201120
struct btrfs_device *device,
11211121
u64 *start, u64 len)
11221122
{
1123+
struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
11231124
struct extent_map *em;
1124-
struct list_head *search_list = &trans->transaction->pending_chunks;
1125+
struct list_head *search_list = &fs_info->pinned_chunks;
11251126
int ret = 0;
11261127
u64 physical_start = *start;
11271128

1129+
if (transaction)
1130+
search_list = &transaction->pending_chunks;
11281131
again:
11291132
list_for_each_entry(em, search_list, list) {
11301133
struct map_lookup *map;
@@ -1159,8 +1162,8 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
11591162
}
11601163
}
11611164
}
1162-
if (search_list == &trans->transaction->pending_chunks) {
1163-
search_list = &trans->root->fs_info->pinned_chunks;
1165+
if (search_list != &fs_info->pinned_chunks) {
1166+
search_list = &fs_info->pinned_chunks;
11641167
goto again;
11651168
}
11661169

@@ -1169,12 +1172,13 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
11691172

11701173

11711174
/*
1172-
* find_free_dev_extent - find free space in the specified device
1173-
* @device: the device which we search the free space in
1174-
* @num_bytes: the size of the free space that we need
1175-
* @start: store the start of the free space.
1176-
* @len: the size of the free space. that we find, or the size of the max
1177-
* free space if we don't find suitable free space
1175+
* find_free_dev_extent_start - find free space in the specified device
1176+
* @device: the device which we search the free space in
1177+
* @num_bytes: the size of the free space that we need
1178+
* @search_start: the position from which to begin the search
1179+
* @start: store the start of the free space.
1180+
* @len: the size of the free space. that we find, or the size
1181+
* of the max free space if we don't find suitable free space
11781182
*
11791183
* this uses a pretty simple search, the expectation is that it is
11801184
* called very infrequently and that a given device has a small number
@@ -1188,9 +1192,9 @@ static int contains_pending_extent(struct btrfs_trans_handle *trans,
11881192
* But if we don't find suitable free space, it is used to store the size of
11891193
* the max free space.
11901194
*/
1191-
int find_free_dev_extent(struct btrfs_trans_handle *trans,
1192-
struct btrfs_device *device, u64 num_bytes,
1193-
u64 *start, u64 *len)
1195+
int find_free_dev_extent_start(struct btrfs_transaction *transaction,
1196+
struct btrfs_device *device, u64 num_bytes,
1197+
u64 search_start, u64 *start, u64 *len)
11941198
{
11951199
struct btrfs_key key;
11961200
struct btrfs_root *root = device->dev_root;
@@ -1200,19 +1204,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
12001204
u64 max_hole_start;
12011205
u64 max_hole_size;
12021206
u64 extent_end;
1203-
u64 search_start;
12041207
u64 search_end = device->total_bytes;
12051208
int ret;
12061209
int slot;
12071210
struct extent_buffer *l;
12081211

1209-
/* FIXME use last free of some kind */
1210-
1211-
/* we don't want to overwrite the superblock on the drive,
1212-
* so we make sure to start at an offset of at least 1MB
1213-
*/
1214-
search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
1215-
12161212
path = btrfs_alloc_path();
12171213
if (!path)
12181214
return -ENOMEM;
@@ -1273,7 +1269,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
12731269
* Have to check before we set max_hole_start, otherwise
12741270
* we could end up sending back this offset anyway.
12751271
*/
1276-
if (contains_pending_extent(trans, device,
1272+
if (contains_pending_extent(transaction, device,
12771273
&search_start,
12781274
hole_size)) {
12791275
if (key.offset >= search_start) {
@@ -1322,7 +1318,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
13221318
if (search_end > search_start) {
13231319
hole_size = search_end - search_start;
13241320

1325-
if (contains_pending_extent(trans, device, &search_start,
1321+
if (contains_pending_extent(transaction, device, &search_start,
13261322
hole_size)) {
13271323
btrfs_release_path(path);
13281324
goto again;
@@ -1348,6 +1344,24 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
13481344
return ret;
13491345
}
13501346

1347+
int find_free_dev_extent(struct btrfs_trans_handle *trans,
1348+
struct btrfs_device *device, u64 num_bytes,
1349+
u64 *start, u64 *len)
1350+
{
1351+
struct btrfs_root *root = device->dev_root;
1352+
u64 search_start;
1353+
1354+
/* FIXME use last free of some kind */
1355+
1356+
/*
1357+
* we don't want to overwrite the superblock on the drive,
1358+
* so we make sure to start at an offset of at least 1MB
1359+
*/
1360+
search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
1361+
return find_free_dev_extent_start(trans->transaction, device,
1362+
num_bytes, search_start, start, len);
1363+
}
1364+
13511365
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
13521366
struct btrfs_device *device,
13531367
u64 start, u64 *dev_extent_len)
@@ -4200,7 +4214,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
42004214
u64 start = new_size;
42014215
u64 len = old_size - new_size;
42024216

4203-
if (contains_pending_extent(trans, device, &start, len)) {
4217+
if (contains_pending_extent(trans->transaction, device,
4218+
&start, len)) {
42044219
unlock_chunks(root);
42054220
checked_pending_chunks = true;
42064221
failed = 0;

fs/btrfs/volumes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,9 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
455455
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
456456
int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
457457
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
458+
int find_free_dev_extent_start(struct btrfs_transaction *transaction,
459+
struct btrfs_device *device, u64 num_bytes,
460+
u64 search_start, u64 *start, u64 *max_avail);
458461
int find_free_dev_extent(struct btrfs_trans_handle *trans,
459462
struct btrfs_device *device, u64 num_bytes,
460463
u64 *start, u64 *max_avail);

0 commit comments

Comments
 (0)