Skip to content

Commit 7dc66ab

Browse files
fdmananakdave
authored andcommitted
btrfs: use a dedicated data structure for chunk maps
Currently we abuse the extent_map structure for two purposes: 1) To actually represent extents for inodes; 2) To represent chunk mappings. This is odd and has several disadvantages: 1) To create a chunk map, we need to do two memory allocations: one for an extent_map structure and another one for a map_lookup structure, so more potential for an allocation failure and more complicated code to manage and link two structures; 2) For a chunk map we actually only use 3 fields (24 bytes) of the respective extent map structure: the 'start' field to have the logical start address of the chunk, the 'len' field to have the chunk's size, and the 'orig_block_len' field to contain the chunk's stripe size. Besides wasting a memory, it's also odd and not intuitive at all to have the stripe size in a field named 'orig_block_len'. We are also using 'block_len' of the extent_map structure to contain the chunk size, so we have 2 fields for the same value, 'len' and 'block_len', which is pointless; 3) When an extent map is associated to a chunk mapping, we set the bit EXTENT_FLAG_FS_MAPPING on its flags and then make its member named 'map_lookup' point to the associated map_lookup structure. This means that for an extent map associated to an inode extent, we are not using this 'map_lookup' pointer, so wasting 8 bytes (on a 64 bits platform); 4) Extent maps associated to a chunk mapping are never merged or split so it's pointless to use the existing extent map infrastructure. So add a dedicated data structure named 'btrfs_chunk_map' to represent chunk mappings, this is basically the existing map_lookup structure with some extra fields: 1) 'start' to contain the chunk logical address; 2) 'chunk_len' to contain the chunk's length; 3) 'stripe_size' for the stripe size; 4) 'rb_node' for insertion into a rb tree; 5) 'refs' for reference counting. This way we do a single memory allocation for chunk mappings and we don't waste memory for them with unused/unnecessary fields from an extent_map. We also save 8 bytes from the extent_map structure by removing the 'map_lookup' pointer, so the size of struct extent_map is reduced from 144 bytes down to 136 bytes, and we can now have 30 extents map per 4K page instead of 28. Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Filipe Manana <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent ebb0bec commit 7dc66ab

File tree

17 files changed

+506
-494
lines changed

17 files changed

+506
-494
lines changed

fs/btrfs/block-group.c

Lines changed: 68 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
168168
cache);
169169

170170
kfree(cache->free_space_ctl);
171-
kfree(cache->physical_map);
171+
btrfs_free_chunk_map(cache->physical_map);
172172
kfree(cache);
173173
}
174174
}
@@ -1047,7 +1047,7 @@ static int remove_block_group_item(struct btrfs_trans_handle *trans,
10471047
}
10481048

10491049
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
1050-
u64 group_start, struct extent_map *em)
1050+
struct btrfs_chunk_map *map)
10511051
{
10521052
struct btrfs_fs_info *fs_info = trans->fs_info;
10531053
struct btrfs_path *path;
@@ -1059,10 +1059,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10591059
int index;
10601060
int factor;
10611061
struct btrfs_caching_control *caching_ctl = NULL;
1062-
bool remove_em;
1062+
bool remove_map;
10631063
bool remove_rsv = false;
10641064

1065-
block_group = btrfs_lookup_block_group(fs_info, group_start);
1065+
block_group = btrfs_lookup_block_group(fs_info, map->start);
10661066
BUG_ON(!block_group);
10671067
BUG_ON(!block_group->ro);
10681068

@@ -1252,7 +1252,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
12521252
* entries because we already removed them all when we called
12531253
* btrfs_remove_free_space_cache().
12541254
*
1255-
* And we must not remove the extent map from the fs_info->mapping_tree
1255+
* And we must not remove the chunk map from the fs_info->mapping_tree
12561256
* to prevent the same logical address range and physical device space
12571257
* ranges from being reused for a new block group. This is needed to
12581258
* avoid races with trimming and scrub.
@@ -1268,19 +1268,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
12681268
* in place until the extents have been discarded completely when
12691269
* the transaction commit has completed.
12701270
*/
1271-
remove_em = (atomic_read(&block_group->frozen) == 0);
1271+
remove_map = (atomic_read(&block_group->frozen) == 0);
12721272
spin_unlock(&block_group->lock);
12731273

1274-
if (remove_em) {
1275-
struct extent_map_tree *em_tree;
1276-
1277-
em_tree = &fs_info->mapping_tree;
1278-
write_lock(&em_tree->lock);
1279-
remove_extent_mapping(em_tree, em);
1280-
write_unlock(&em_tree->lock);
1281-
/* once for the tree */
1282-
free_extent_map(em);
1283-
}
1274+
if (remove_map)
1275+
btrfs_remove_chunk_map(fs_info, map);
12841276

12851277
out:
12861278
/* Once for the lookup reference */
@@ -1295,16 +1287,12 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
12951287
struct btrfs_fs_info *fs_info, const u64 chunk_offset)
12961288
{
12971289
struct btrfs_root *root = btrfs_block_group_root(fs_info);
1298-
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
1299-
struct extent_map *em;
1300-
struct map_lookup *map;
1290+
struct btrfs_chunk_map *map;
13011291
unsigned int num_items;
13021292

1303-
read_lock(&em_tree->lock);
1304-
em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1305-
read_unlock(&em_tree->lock);
1306-
ASSERT(em != NULL);
1307-
ASSERT(em->start == chunk_offset);
1293+
map = btrfs_find_chunk_map(fs_info, chunk_offset, 1);
1294+
ASSERT(map != NULL);
1295+
ASSERT(map->start == chunk_offset);
13081296

13091297
/*
13101298
* We need to reserve 3 + N units from the metadata space info in order
@@ -1325,9 +1313,8 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
13251313
* more device items and remove one chunk item), but this is done at
13261314
* btrfs_remove_chunk() through a call to check_system_chunk().
13271315
*/
1328-
map = em->map_lookup;
13291316
num_items = 3 + map->num_stripes;
1330-
free_extent_map(em);
1317+
btrfs_free_chunk_map(map);
13311318

13321319
return btrfs_start_transaction_fallback_global_rsv(root, num_items);
13331320
}
@@ -1928,8 +1915,7 @@ void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
19281915
static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
19291916
struct btrfs_path *path)
19301917
{
1931-
struct extent_map_tree *em_tree;
1932-
struct extent_map *em;
1918+
struct btrfs_chunk_map *map;
19331919
struct btrfs_block_group_item bg;
19341920
struct extent_buffer *leaf;
19351921
int slot;
@@ -1939,40 +1925,37 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
19391925
slot = path->slots[0];
19401926
leaf = path->nodes[0];
19411927

1942-
em_tree = &fs_info->mapping_tree;
1943-
read_lock(&em_tree->lock);
1944-
em = lookup_extent_mapping(em_tree, key->objectid, key->offset);
1945-
read_unlock(&em_tree->lock);
1946-
if (!em) {
1928+
map = btrfs_find_chunk_map(fs_info, key->objectid, key->offset);
1929+
if (!map) {
19471930
btrfs_err(fs_info,
19481931
"logical %llu len %llu found bg but no related chunk",
19491932
key->objectid, key->offset);
19501933
return -ENOENT;
19511934
}
19521935

1953-
if (em->start != key->objectid || em->len != key->offset) {
1936+
if (map->start != key->objectid || map->chunk_len != key->offset) {
19541937
btrfs_err(fs_info,
19551938
"block group %llu len %llu mismatch with chunk %llu len %llu",
1956-
key->objectid, key->offset, em->start, em->len);
1939+
key->objectid, key->offset, map->start, map->chunk_len);
19571940
ret = -EUCLEAN;
1958-
goto out_free_em;
1941+
goto out_free_map;
19591942
}
19601943

19611944
read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
19621945
sizeof(bg));
19631946
flags = btrfs_stack_block_group_flags(&bg) &
19641947
BTRFS_BLOCK_GROUP_TYPE_MASK;
19651948

1966-
if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
1949+
if (flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
19671950
btrfs_err(fs_info,
19681951
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
19691952
key->objectid, key->offset, flags,
1970-
(BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type));
1953+
(BTRFS_BLOCK_GROUP_TYPE_MASK & map->type));
19711954
ret = -EUCLEAN;
19721955
}
19731956

1974-
out_free_em:
1975-
free_extent_map(em);
1957+
out_free_map:
1958+
btrfs_free_chunk_map(map);
19761959
return ret;
19771960
}
19781961

@@ -2025,23 +2008,21 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
20252008
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
20262009
u64 physical, u64 **logical, int *naddrs, int *stripe_len)
20272010
{
2028-
struct extent_map *em;
2029-
struct map_lookup *map;
2011+
struct btrfs_chunk_map *map;
20302012
u64 *buf;
20312013
u64 bytenr;
20322014
u64 data_stripe_length;
20332015
u64 io_stripe_size;
20342016
int i, nr = 0;
20352017
int ret = 0;
20362018

2037-
em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
2038-
if (IS_ERR(em))
2019+
map = btrfs_get_chunk_map(fs_info, chunk_start, 1);
2020+
if (IS_ERR(map))
20392021
return -EIO;
20402022

2041-
map = em->map_lookup;
2042-
data_stripe_length = em->orig_block_len;
2023+
data_stripe_length = map->stripe_size;
20432024
io_stripe_size = BTRFS_STRIPE_LEN;
2044-
chunk_start = em->start;
2025+
chunk_start = map->start;
20452026

20462027
/* For RAID5/6 adjust to a full IO stripe length */
20472028
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
@@ -2095,7 +2076,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
20952076
*naddrs = nr;
20962077
*stripe_len = io_stripe_size;
20972078
out:
2098-
free_extent_map(em);
2079+
btrfs_free_chunk_map(map);
20992080
return ret;
21002081
}
21012082

@@ -2200,49 +2181,47 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
22002181
*/
22012182
static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
22022183
{
2203-
struct extent_map_tree *map_tree = &fs_info->mapping_tree;
2204-
struct extent_map *em;
2205-
struct btrfs_block_group *bg;
22062184
u64 start = 0;
22072185
int ret = 0;
22082186

22092187
while (1) {
2210-
read_lock(&map_tree->lock);
2188+
struct btrfs_chunk_map *map;
2189+
struct btrfs_block_group *bg;
2190+
22112191
/*
2212-
* lookup_extent_mapping will return the first extent map
2213-
* intersecting the range, so setting @len to 1 is enough to
2192+
* btrfs_find_chunk_map() will return the first chunk map
2193+
* intersecting the range, so setting @length to 1 is enough to
22142194
* get the first chunk.
22152195
*/
2216-
em = lookup_extent_mapping(map_tree, start, 1);
2217-
read_unlock(&map_tree->lock);
2218-
if (!em)
2196+
map = btrfs_find_chunk_map(fs_info, start, 1);
2197+
if (!map)
22192198
break;
22202199

2221-
bg = btrfs_lookup_block_group(fs_info, em->start);
2200+
bg = btrfs_lookup_block_group(fs_info, map->start);
22222201
if (!bg) {
22232202
btrfs_err(fs_info,
22242203
"chunk start=%llu len=%llu doesn't have corresponding block group",
2225-
em->start, em->len);
2204+
map->start, map->chunk_len);
22262205
ret = -EUCLEAN;
2227-
free_extent_map(em);
2206+
btrfs_free_chunk_map(map);
22282207
break;
22292208
}
2230-
if (bg->start != em->start || bg->length != em->len ||
2209+
if (bg->start != map->start || bg->length != map->chunk_len ||
22312210
(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
2232-
(em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
2211+
(map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
22332212
btrfs_err(fs_info,
22342213
"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
2235-
em->start, em->len,
2236-
em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
2214+
map->start, map->chunk_len,
2215+
map->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
22372216
bg->start, bg->length,
22382217
bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
22392218
ret = -EUCLEAN;
2240-
free_extent_map(em);
2219+
btrfs_free_chunk_map(map);
22412220
btrfs_put_block_group(bg);
22422221
break;
22432222
}
2244-
start = em->start + em->len;
2245-
free_extent_map(em);
2223+
start = map->start + map->chunk_len;
2224+
btrfs_free_chunk_map(map);
22462225
btrfs_put_block_group(bg);
22472226
}
22482227
return ret;
@@ -2370,28 +2349,25 @@ static int read_one_block_group(struct btrfs_fs_info *info,
23702349

23712350
static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
23722351
{
2373-
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
23742352
struct rb_node *node;
23752353
int ret = 0;
23762354

2377-
for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
2378-
struct extent_map *em;
2379-
struct map_lookup *map;
2355+
for (node = rb_first_cached(&fs_info->mapping_tree); node; node = rb_next(node)) {
2356+
struct btrfs_chunk_map *map;
23802357
struct btrfs_block_group *bg;
23812358

2382-
em = rb_entry(node, struct extent_map, rb_node);
2383-
map = em->map_lookup;
2384-
bg = btrfs_create_block_group_cache(fs_info, em->start);
2359+
map = rb_entry(node, struct btrfs_chunk_map, rb_node);
2360+
bg = btrfs_create_block_group_cache(fs_info, map->start);
23852361
if (!bg) {
23862362
ret = -ENOMEM;
23872363
break;
23882364
}
23892365

23902366
/* Fill dummy cache as FULL */
2391-
bg->length = em->len;
2367+
bg->length = map->chunk_len;
23922368
bg->flags = map->type;
23932369
bg->cached = BTRFS_CACHE_FINISHED;
2394-
bg->used = em->len;
2370+
bg->used = map->chunk_len;
23952371
bg->flags = map->type;
23962372
ret = btrfs_add_block_group_cache(fs_info, bg);
23972373
/*
@@ -2619,19 +2595,17 @@ static int insert_dev_extents(struct btrfs_trans_handle *trans,
26192595
{
26202596
struct btrfs_fs_info *fs_info = trans->fs_info;
26212597
struct btrfs_device *device;
2622-
struct extent_map *em;
2623-
struct map_lookup *map;
2598+
struct btrfs_chunk_map *map;
26242599
u64 dev_offset;
26252600
u64 stripe_size;
26262601
int i;
26272602
int ret = 0;
26282603

2629-
em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
2630-
if (IS_ERR(em))
2631-
return PTR_ERR(em);
2604+
map = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
2605+
if (IS_ERR(map))
2606+
return PTR_ERR(map);
26322607

2633-
map = em->map_lookup;
2634-
stripe_size = em->orig_block_len;
2608+
stripe_size = map->stripe_size;
26352609

26362610
/*
26372611
* Take the device list mutex to prevent races with the final phase of
@@ -2654,7 +2628,7 @@ static int insert_dev_extents(struct btrfs_trans_handle *trans,
26542628
}
26552629
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
26562630

2657-
free_extent_map(em);
2631+
btrfs_free_chunk_map(map);
26582632
return ret;
26592633
}
26602634

@@ -4407,8 +4381,6 @@ void btrfs_freeze_block_group(struct btrfs_block_group *cache)
44074381
void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
44084382
{
44094383
struct btrfs_fs_info *fs_info = block_group->fs_info;
4410-
struct extent_map_tree *em_tree;
4411-
struct extent_map *em;
44124384
bool cleanup;
44134385

44144386
spin_lock(&block_group->lock);
@@ -4417,17 +4389,16 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
44174389
spin_unlock(&block_group->lock);
44184390

44194391
if (cleanup) {
4420-
em_tree = &fs_info->mapping_tree;
4421-
write_lock(&em_tree->lock);
4422-
em = lookup_extent_mapping(em_tree, block_group->start,
4423-
1);
4424-
BUG_ON(!em); /* logic error, can't happen */
4425-
remove_extent_mapping(em_tree, em);
4426-
write_unlock(&em_tree->lock);
4427-
4428-
/* once for us and once for the tree */
4429-
free_extent_map(em);
4430-
free_extent_map(em);
4392+
struct btrfs_chunk_map *map;
4393+
4394+
map = btrfs_find_chunk_map(fs_info, block_group->start, 1);
4395+
/* Logic error, can't happen. */
4396+
ASSERT(map);
4397+
4398+
btrfs_remove_chunk_map(fs_info, map);
4399+
4400+
/* Once for our lookup reference. */
4401+
btrfs_free_chunk_map(map);
44314402

44324403
/*
44334404
* We may have left one free space entry and other possible

fs/btrfs/block-group.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
#include "free-space-cache.h"
77

8+
struct btrfs_chunk_map;
9+
810
enum btrfs_disk_cache_state {
911
BTRFS_DC_WRITTEN,
1012
BTRFS_DC_ERROR,
@@ -243,7 +245,7 @@ struct btrfs_block_group {
243245
u64 zone_unusable;
244246
u64 zone_capacity;
245247
u64 meta_write_pointer;
246-
struct map_lookup *physical_map;
248+
struct btrfs_chunk_map *physical_map;
247249
struct list_head active_bg_list;
248250
struct work_struct zone_finish_work;
249251
struct extent_buffer *last_eb;
@@ -297,7 +299,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
297299
struct btrfs_fs_info *fs_info,
298300
const u64 chunk_offset);
299301
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
300-
u64 group_start, struct extent_map *em);
302+
struct btrfs_chunk_map *map);
301303
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
302304
void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
303305
void btrfs_reclaim_bgs_work(struct work_struct *work);

0 commit comments

Comments
 (0)