Skip to content

Commit 97420be

Browse files
fdmananakdave
authored andcommitted
btrfs: use sector numbers as keys for the dirty extents xarray
We are using the logical address ("bytenr") of an extent as the key for qgroup records in the dirty extents xarray. This is a problem because the xarrays use "unsigned long" for keys/indices, meaning that on a 32 bits platform any extent starting at or beyond 4G is truncated, which is a too low limitation as virtually everyone is using storage with more than 4G of space. This means a "bytenr" of 4G gets truncated to 0, and so does 8G and 16G for example, resulting in incorrect qgroup accounting. Fix this by using sector numbers as keys instead, that is, using keys that match the logical address right shifted by fs_info->sectorsize_bits, which is what we do for the fs_info->buffer_radix that tracks extent buffers (radix trees also use an "unsigned long" type for keys). This also makes the index space more dense which helps optimize the xarray (as mentioned at Documentation/core-api/xarray.rst). Fixes: 3cce39a ("btrfs: qgroup: use xarray to track dirty extents in transaction") Reviewed-by: Qu Wenruo <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent e761be2 commit 97420be

File tree

3 files changed

+33
-13
lines changed

3 files changed

+33
-13
lines changed

fs/btrfs/delayed-ref.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
849849
struct btrfs_qgroup_extent_record *qrecord,
850850
int action, bool *qrecord_inserted_ret)
851851
{
852+
struct btrfs_fs_info *fs_info = trans->fs_info;
852853
struct btrfs_delayed_ref_head *existing;
853854
struct btrfs_delayed_ref_root *delayed_refs;
854855
bool qrecord_inserted = false;
@@ -859,11 +860,11 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
859860
if (qrecord) {
860861
int ret;
861862

862-
ret = btrfs_qgroup_trace_extent_nolock(trans->fs_info,
863-
delayed_refs, qrecord);
863+
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord);
864864
if (ret) {
865865
/* Clean up if insertion fails or item exists. */
866-
xa_release(&delayed_refs->dirty_extents, qrecord->bytenr);
866+
xa_release(&delayed_refs->dirty_extents,
867+
qrecord->bytenr >> fs_info->sectorsize_bits);
867868
/* Caller responsible for freeing qrecord on error. */
868869
if (ret < 0)
869870
return ERR_PTR(ret);
@@ -873,7 +874,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
873874
}
874875
}
875876

876-
trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
877+
trace_add_delayed_ref_head(fs_info, head_ref, action);
877878

878879
existing = htree_insert(&delayed_refs->href_root,
879880
&head_ref->href_node);
@@ -895,8 +896,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
895896
if (head_ref->is_data && head_ref->ref_mod < 0) {
896897
delayed_refs->pending_csums += head_ref->num_bytes;
897898
trans->delayed_ref_csum_deletions +=
898-
btrfs_csum_bytes_to_leaves(trans->fs_info,
899-
head_ref->num_bytes);
899+
btrfs_csum_bytes_to_leaves(fs_info, head_ref->num_bytes);
900900
}
901901
delayed_refs->num_heads++;
902902
delayed_refs->num_heads_ready++;
@@ -1030,7 +1030,8 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
10301030
goto free_head_ref;
10311031
}
10321032
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents,
1033-
generic_ref->bytenr, GFP_NOFS)) {
1033+
generic_ref->bytenr >> fs_info->sectorsize_bits,
1034+
GFP_NOFS)) {
10341035
ret = -ENOMEM;
10351036
goto free_record;
10361037
}

fs/btrfs/delayed-ref.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,15 @@ struct btrfs_delayed_ref_root {
202202
/* head ref rbtree */
203203
struct rb_root_cached href_root;
204204

205-
/* Track dirty extent records. */
205+
/*
206+
* Track dirty extent records.
207+
* The keys correspond to the logical address of the extent ("bytenr")
208+
* right shifted by fs_info->sectorsize_bits. This is both to get a more
209+
* dense index space (optimizes xarray structure) and because indexes in
210+
* xarrays are of "unsigned long" type, meaning they are 32 bits wide on
211+
* 32 bits platforms, limiting the extent range to 4G which is too low
212+
* and makes it unusable (truncated index values) on 32 bits platforms.
213+
*/
206214
struct xarray dirty_extents;
207215

208216
/* this spin lock protects the rbtree and the entries inside */

fs/btrfs/qgroup.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2005,16 +2005,26 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
20052005
struct btrfs_qgroup_extent_record *record)
20062006
{
20072007
struct btrfs_qgroup_extent_record *existing, *ret;
2008-
unsigned long bytenr = record->bytenr;
2008+
const unsigned long index = (record->bytenr >> fs_info->sectorsize_bits);
20092009

20102010
if (!btrfs_qgroup_full_accounting(fs_info))
20112011
return 1;
20122012

2013+
#if BITS_PER_LONG == 32
2014+
if (record->bytenr >= MAX_LFS_FILESIZE) {
2015+
btrfs_err_rl(fs_info,
2016+
"qgroup record for extent at %llu is beyond 32bit page cache and xarray index limit",
2017+
record->bytenr);
2018+
btrfs_err_32bit_limit(fs_info);
2019+
return -EOVERFLOW;
2020+
}
2021+
#endif
2022+
20132023
lockdep_assert_held(&delayed_refs->lock);
20142024
trace_btrfs_qgroup_trace_extent(fs_info, record);
20152025

20162026
xa_lock(&delayed_refs->dirty_extents);
2017-
existing = xa_load(&delayed_refs->dirty_extents, bytenr);
2027+
existing = xa_load(&delayed_refs->dirty_extents, index);
20182028
if (existing) {
20192029
if (record->data_rsv && !existing->data_rsv) {
20202030
existing->data_rsv = record->data_rsv;
@@ -2024,7 +2034,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
20242034
return 1;
20252035
}
20262036

2027-
ret = __xa_store(&delayed_refs->dirty_extents, record->bytenr, record, GFP_ATOMIC);
2037+
ret = __xa_store(&delayed_refs->dirty_extents, index, record, GFP_ATOMIC);
20282038
xa_unlock(&delayed_refs->dirty_extents);
20292039
if (xa_is_err(ret)) {
20302040
qgroup_mark_inconsistent(fs_info);
@@ -2129,6 +2139,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
21292139
struct btrfs_fs_info *fs_info = trans->fs_info;
21302140
struct btrfs_qgroup_extent_record *record;
21312141
struct btrfs_delayed_ref_root *delayed_refs;
2142+
const unsigned long index = (bytenr >> fs_info->sectorsize_bits);
21322143
int ret;
21332144

21342145
if (!btrfs_qgroup_full_accounting(fs_info) || bytenr == 0 || num_bytes == 0)
@@ -2137,7 +2148,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
21372148
if (!record)
21382149
return -ENOMEM;
21392150

2140-
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, bytenr, GFP_NOFS)) {
2151+
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, index, GFP_NOFS)) {
21412152
kfree(record);
21422153
return -ENOMEM;
21432154
}
@@ -2152,7 +2163,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
21522163
spin_unlock(&delayed_refs->lock);
21532164
if (ret) {
21542165
/* Clean up if insertion fails or item exists. */
2155-
xa_release(&delayed_refs->dirty_extents, record->bytenr);
2166+
xa_release(&delayed_refs->dirty_extents, index);
21562167
kfree(record);
21572168
return 0;
21582169
}

0 commit comments

Comments
 (0)