|
3 | 3 | #include <linux/bitops.h>
|
4 | 4 | #include <linux/slab.h>
|
5 | 5 | #include <linux/blkdev.h>
|
| 6 | +#include <linux/sched/mm.h> |
6 | 7 | #include "ctree.h"
|
7 | 8 | #include "volumes.h"
|
8 | 9 | #include "zoned.h"
|
9 | 10 | #include "rcu-string.h"
|
10 | 11 | #include "disk-io.h"
|
| 12 | +#include "block-group.h" |
11 | 13 |
|
12 | 14 | /* Maximum number of zones to report per blkdev_report_zones() call */
|
13 | 15 | #define BTRFS_REPORT_NR_ZONES 4096
|
| 16 | +/* Invalid allocation pointer value for missing devices */ |
| 17 | +#define WP_MISSING_DEV ((u64)-1) |
| 18 | +/* Pseudo write pointer value for conventional zone */ |
| 19 | +#define WP_CONVENTIONAL ((u64)-2) |
14 | 20 |
|
15 | 21 | /* Number of superblock log zones */
|
16 | 22 | #define BTRFS_NR_SB_LOG_ZONES 2
|
@@ -920,3 +926,148 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
|
920 | 926 |
|
921 | 927 | return 0;
|
922 | 928 | }
|
| 929 | + |
| 930 | +int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache) |
| 931 | +{ |
| 932 | + struct btrfs_fs_info *fs_info = cache->fs_info; |
| 933 | + struct extent_map_tree *em_tree = &fs_info->mapping_tree; |
| 934 | + struct extent_map *em; |
| 935 | + struct map_lookup *map; |
| 936 | + struct btrfs_device *device; |
| 937 | + u64 logical = cache->start; |
| 938 | + u64 length = cache->length; |
| 939 | + u64 physical = 0; |
| 940 | + int ret; |
| 941 | + int i; |
| 942 | + unsigned int nofs_flag; |
| 943 | + u64 *alloc_offsets = NULL; |
| 944 | + u32 num_sequential = 0, num_conventional = 0; |
| 945 | + |
| 946 | + if (!btrfs_is_zoned(fs_info)) |
| 947 | + return 0; |
| 948 | + |
| 949 | + /* Sanity check */ |
| 950 | + if (!IS_ALIGNED(length, fs_info->zone_size)) { |
| 951 | + btrfs_err(fs_info, |
| 952 | + "zoned: block group %llu len %llu unaligned to zone size %llu", |
| 953 | + logical, length, fs_info->zone_size); |
| 954 | + return -EIO; |
| 955 | + } |
| 956 | + |
| 957 | + /* Get the chunk mapping */ |
| 958 | + read_lock(&em_tree->lock); |
| 959 | + em = lookup_extent_mapping(em_tree, logical, length); |
| 960 | + read_unlock(&em_tree->lock); |
| 961 | + |
| 962 | + if (!em) |
| 963 | + return -EINVAL; |
| 964 | + |
| 965 | + map = em->map_lookup; |
| 966 | + |
| 967 | + alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS); |
| 968 | + if (!alloc_offsets) { |
| 969 | + free_extent_map(em); |
| 970 | + return -ENOMEM; |
| 971 | + } |
| 972 | + |
| 973 | + for (i = 0; i < map->num_stripes; i++) { |
| 974 | + bool is_sequential; |
| 975 | + struct blk_zone zone; |
| 976 | + |
| 977 | + device = map->stripes[i].dev; |
| 978 | + physical = map->stripes[i].physical; |
| 979 | + |
| 980 | + if (device->bdev == NULL) { |
| 981 | + alloc_offsets[i] = WP_MISSING_DEV; |
| 982 | + continue; |
| 983 | + } |
| 984 | + |
| 985 | + is_sequential = btrfs_dev_is_sequential(device, physical); |
| 986 | + if (is_sequential) |
| 987 | + num_sequential++; |
| 988 | + else |
| 989 | + num_conventional++; |
| 990 | + |
| 991 | + if (!is_sequential) { |
| 992 | + alloc_offsets[i] = WP_CONVENTIONAL; |
| 993 | + continue; |
| 994 | + } |
| 995 | + |
| 996 | + /* |
| 997 | + * This zone will be used for allocation, so mark this zone |
| 998 | + * non-empty. |
| 999 | + */ |
| 1000 | + btrfs_dev_clear_zone_empty(device, physical); |
| 1001 | + |
| 1002 | + /* |
| 1003 | + * The group is mapped to a sequential zone. Get the zone write |
| 1004 | + * pointer to determine the allocation offset within the zone. |
| 1005 | + */ |
| 1006 | + WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size)); |
| 1007 | + nofs_flag = memalloc_nofs_save(); |
| 1008 | + ret = btrfs_get_dev_zone(device, physical, &zone); |
| 1009 | + memalloc_nofs_restore(nofs_flag); |
| 1010 | + if (ret == -EIO || ret == -EOPNOTSUPP) { |
| 1011 | + ret = 0; |
| 1012 | + alloc_offsets[i] = WP_MISSING_DEV; |
| 1013 | + continue; |
| 1014 | + } else if (ret) { |
| 1015 | + goto out; |
| 1016 | + } |
| 1017 | + |
| 1018 | + switch (zone.cond) { |
| 1019 | + case BLK_ZONE_COND_OFFLINE: |
| 1020 | + case BLK_ZONE_COND_READONLY: |
| 1021 | + btrfs_err(fs_info, |
| 1022 | + "zoned: offline/readonly zone %llu on device %s (devid %llu)", |
| 1023 | + physical >> device->zone_info->zone_size_shift, |
| 1024 | + rcu_str_deref(device->name), device->devid); |
| 1025 | + alloc_offsets[i] = WP_MISSING_DEV; |
| 1026 | + break; |
| 1027 | + case BLK_ZONE_COND_EMPTY: |
| 1028 | + alloc_offsets[i] = 0; |
| 1029 | + break; |
| 1030 | + case BLK_ZONE_COND_FULL: |
| 1031 | + alloc_offsets[i] = fs_info->zone_size; |
| 1032 | + break; |
| 1033 | + default: |
| 1034 | + /* Partially used zone */ |
| 1035 | + alloc_offsets[i] = |
| 1036 | + ((zone.wp - zone.start) << SECTOR_SHIFT); |
| 1037 | + break; |
| 1038 | + } |
| 1039 | + } |
| 1040 | + |
| 1041 | + if (num_conventional > 0) { |
| 1042 | + /* |
| 1043 | + * Since conventional zones do not have a write pointer, we |
| 1044 | + * cannot determine alloc_offset from the pointer |
| 1045 | + */ |
| 1046 | + ret = -EINVAL; |
| 1047 | + goto out; |
| 1048 | + } |
| 1049 | + |
| 1050 | + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { |
| 1051 | + case 0: /* single */ |
| 1052 | + cache->alloc_offset = alloc_offsets[0]; |
| 1053 | + break; |
| 1054 | + case BTRFS_BLOCK_GROUP_DUP: |
| 1055 | + case BTRFS_BLOCK_GROUP_RAID1: |
| 1056 | + case BTRFS_BLOCK_GROUP_RAID0: |
| 1057 | + case BTRFS_BLOCK_GROUP_RAID10: |
| 1058 | + case BTRFS_BLOCK_GROUP_RAID5: |
| 1059 | + case BTRFS_BLOCK_GROUP_RAID6: |
| 1060 | + /* non-single profiles are not supported yet */ |
| 1061 | + default: |
| 1062 | + btrfs_err(fs_info, "zoned: profile %s not yet supported", |
| 1063 | + btrfs_bg_type_to_raid_name(map->type)); |
| 1064 | + ret = -EINVAL; |
| 1065 | + goto out; |
| 1066 | + } |
| 1067 | + |
| 1068 | +out: |
| 1069 | + kfree(alloc_offsets); |
| 1070 | + free_extent_map(em); |
| 1071 | + |
| 1072 | + return ret; |
| 1073 | +} |
0 commit comments