Skip to content

Commit 6a0cb1b

Browse files
hreineckeaxboe
authored andcommitted
block: Implement support for zoned block devices
Implement zoned block device zone information reporting and reset. Zone information are reported as struct blk_zone. This implementation does not differentiate between host-aware and host-managed device models and is valid for both. Two functions are provided: blkdev_report_zones for discovering the zone configuration of a zoned block device, and blkdev_reset_zones for resetting the write pointer of sequential zones. The helper function blk_queue_zone_size and bdev_zone_size are also provided for, as the name suggest, obtaining the zone size (in 512B sectors) of the zones of the device. Signed-off-by: Hannes Reinecke <[email protected]> [Damien: * Removed the zone cache * Implement report zones operation based on earlier proposal by Shaun Tancheff <[email protected]>] Signed-off-by: Damien Le Moal <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Martin K. Petersen <[email protected]> Reviewed-by: Shaun Tancheff <[email protected]> Tested-by: Shaun Tancheff <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 2d25344 commit 6a0cb1b

File tree

6 files changed

+401
-0
lines changed

6 files changed

+401
-0
lines changed

block/Kconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ config BLK_DEV_INTEGRITY
8989
T10/SCSI Data Integrity Field or the T13/ATA External Path
9090
Protection. If in doubt, say N.
9191

92+
config BLK_DEV_ZONED
93+
bool "Zoned block device support"
94+
---help---
95+
Block layer zoned block device support. This option enables
96+
support for ZAC/ZBC host-managed and host-aware zoned block devices.
97+
98+
Say yes here if you have a ZAC or ZBC storage device.
99+
92100
config BLK_DEV_THROTTLING
93101
bool "Block layer bio throttling support"
94102
depends on BLK_CGROUP=y

block/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
2323
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
2424
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
2525
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
26+
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o

block/blk-zoned.c

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
/*
2+
* Zoned block device handling
3+
*
4+
* Copyright (c) 2015, Hannes Reinecke
5+
* Copyright (c) 2015, SUSE Linux GmbH
6+
*
7+
* Copyright (c) 2016, Damien Le Moal
8+
* Copyright (c) 2016, Western Digital
9+
*/
10+
11+
#include <linux/kernel.h>
12+
#include <linux/module.h>
13+
#include <linux/rbtree.h>
14+
#include <linux/blkdev.h>
15+
16+
static inline sector_t blk_zone_start(struct request_queue *q,
17+
sector_t sector)
18+
{
19+
sector_t zone_mask = blk_queue_zone_size(q) - 1;
20+
21+
return sector & ~zone_mask;
22+
}
23+
24+
/*
25+
* Check that a zone report belongs to the partition.
26+
* If yes, fix its start sector and write pointer, copy it in the
27+
* zone information array and return true. Return false otherwise.
28+
*/
29+
static bool blkdev_report_zone(struct block_device *bdev,
30+
struct blk_zone *rep,
31+
struct blk_zone *zone)
32+
{
33+
sector_t offset = get_start_sect(bdev);
34+
35+
if (rep->start < offset)
36+
return false;
37+
38+
rep->start -= offset;
39+
if (rep->start + rep->len > bdev->bd_part->nr_sects)
40+
return false;
41+
42+
if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
43+
rep->wp = rep->start + rep->len;
44+
else
45+
rep->wp -= offset;
46+
memcpy(zone, rep, sizeof(struct blk_zone));
47+
48+
return true;
49+
}
50+
51+
/**
52+
* blkdev_report_zones - Get zones information
53+
* @bdev: Target block device
54+
* @sector: Sector from which to report zones
55+
* @zones: Array of zone structures where to return the zones information
56+
* @nr_zones: Number of zone structures in the zone array
57+
* @gfp_mask: Memory allocation flags (for bio_alloc)
58+
*
59+
* Description:
60+
* Get zone information starting from the zone containing @sector.
61+
* The number of zone information reported may be less than the number
62+
* requested by @nr_zones. The number of zones actually reported is
63+
* returned in @nr_zones.
64+
*/
65+
int blkdev_report_zones(struct block_device *bdev,
66+
sector_t sector,
67+
struct blk_zone *zones,
68+
unsigned int *nr_zones,
69+
gfp_t gfp_mask)
70+
{
71+
struct request_queue *q = bdev_get_queue(bdev);
72+
struct blk_zone_report_hdr *hdr;
73+
unsigned int nrz = *nr_zones;
74+
struct page *page;
75+
unsigned int nr_rep;
76+
size_t rep_bytes;
77+
unsigned int nr_pages;
78+
struct bio *bio;
79+
struct bio_vec *bv;
80+
unsigned int i, n, nz;
81+
unsigned int ofst;
82+
void *addr;
83+
int ret = 0;
84+
85+
if (!q)
86+
return -ENXIO;
87+
88+
if (!blk_queue_is_zoned(q))
89+
return -EOPNOTSUPP;
90+
91+
if (!nrz)
92+
return 0;
93+
94+
if (sector > bdev->bd_part->nr_sects) {
95+
*nr_zones = 0;
96+
return 0;
97+
}
98+
99+
/*
100+
* The zone report has a header. So make room for it in the
101+
* payload. Also make sure that the report fits in a single BIO
102+
* that will not be split down the stack.
103+
*/
104+
rep_bytes = sizeof(struct blk_zone_report_hdr) +
105+
sizeof(struct blk_zone) * nrz;
106+
rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
107+
if (rep_bytes > (queue_max_sectors(q) << 9))
108+
rep_bytes = queue_max_sectors(q) << 9;
109+
110+
nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
111+
rep_bytes >> PAGE_SHIFT);
112+
nr_pages = min_t(unsigned int, nr_pages,
113+
queue_max_segments(q));
114+
115+
bio = bio_alloc(gfp_mask, nr_pages);
116+
if (!bio)
117+
return -ENOMEM;
118+
119+
bio->bi_bdev = bdev;
120+
bio->bi_iter.bi_sector = blk_zone_start(q, sector);
121+
bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
122+
123+
for (i = 0; i < nr_pages; i++) {
124+
page = alloc_page(gfp_mask);
125+
if (!page) {
126+
ret = -ENOMEM;
127+
goto out;
128+
}
129+
if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
130+
__free_page(page);
131+
break;
132+
}
133+
}
134+
135+
if (i == 0)
136+
ret = -ENOMEM;
137+
else
138+
ret = submit_bio_wait(bio);
139+
if (ret)
140+
goto out;
141+
142+
/*
143+
* Process the report result: skip the header and go through the
144+
* reported zones to fixup and fixup the zone information for
145+
* partitions. At the same time, return the zone information into
146+
* the zone array.
147+
*/
148+
n = 0;
149+
nz = 0;
150+
nr_rep = 0;
151+
bio_for_each_segment_all(bv, bio, i) {
152+
153+
if (!bv->bv_page)
154+
break;
155+
156+
addr = kmap_atomic(bv->bv_page);
157+
158+
/* Get header in the first page */
159+
ofst = 0;
160+
if (!nr_rep) {
161+
hdr = (struct blk_zone_report_hdr *) addr;
162+
nr_rep = hdr->nr_zones;
163+
ofst = sizeof(struct blk_zone_report_hdr);
164+
}
165+
166+
/* Fixup and report zones */
167+
while (ofst < bv->bv_len &&
168+
n < nr_rep && nz < nrz) {
169+
if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
170+
nz++;
171+
ofst += sizeof(struct blk_zone);
172+
n++;
173+
}
174+
175+
kunmap_atomic(addr);
176+
177+
if (n >= nr_rep || nz >= nrz)
178+
break;
179+
180+
}
181+
182+
out:
183+
bio_for_each_segment_all(bv, bio, i)
184+
__free_page(bv->bv_page);
185+
bio_put(bio);
186+
187+
if (ret == 0)
188+
*nr_zones = nz;
189+
190+
return ret;
191+
}
192+
EXPORT_SYMBOL_GPL(blkdev_report_zones);
193+
194+
/**
195+
* blkdev_reset_zones - Reset zones write pointer
196+
* @bdev: Target block device
197+
* @sector: Start sector of the first zone to reset
198+
* @nr_sectors: Number of sectors, at least the length of one zone
199+
* @gfp_mask: Memory allocation flags (for bio_alloc)
200+
*
201+
* Description:
202+
* Reset the write pointer of the zones contained in the range
203+
* @sector..@sector+@nr_sectors. Specifying the entire disk sector range
204+
* is valid, but the specified range should not contain conventional zones.
205+
*/
206+
int blkdev_reset_zones(struct block_device *bdev,
207+
sector_t sector, sector_t nr_sectors,
208+
gfp_t gfp_mask)
209+
{
210+
struct request_queue *q = bdev_get_queue(bdev);
211+
sector_t zone_sectors;
212+
sector_t end_sector = sector + nr_sectors;
213+
struct bio *bio;
214+
int ret;
215+
216+
if (!q)
217+
return -ENXIO;
218+
219+
if (!blk_queue_is_zoned(q))
220+
return -EOPNOTSUPP;
221+
222+
if (end_sector > bdev->bd_part->nr_sects)
223+
/* Out of range */
224+
return -EINVAL;
225+
226+
/* Check alignment (handle eventual smaller last zone) */
227+
zone_sectors = blk_queue_zone_size(q);
228+
if (sector & (zone_sectors - 1))
229+
return -EINVAL;
230+
231+
if ((nr_sectors & (zone_sectors - 1)) &&
232+
end_sector != bdev->bd_part->nr_sects)
233+
return -EINVAL;
234+
235+
while (sector < end_sector) {
236+
237+
bio = bio_alloc(gfp_mask, 0);
238+
bio->bi_iter.bi_sector = sector;
239+
bio->bi_bdev = bdev;
240+
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
241+
242+
ret = submit_bio_wait(bio);
243+
bio_put(bio);
244+
245+
if (ret)
246+
return ret;
247+
248+
sector += zone_sectors;
249+
250+
/* This may take a while, so be nice to others */
251+
cond_resched();
252+
253+
}
254+
255+
return 0;
256+
}
257+
EXPORT_SYMBOL_GPL(blkdev_reset_zones);

include/linux/blkdev.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/rcupdate.h>
2525
#include <linux/percpu-refcount.h>
2626
#include <linux/scatterlist.h>
27+
#include <linux/blkzoned.h>
2728

2829
struct module;
2930
struct scsi_ioctl_command;
@@ -302,6 +303,21 @@ struct queue_limits {
302303
enum blk_zoned_model zoned;
303304
};
304305

306+
#ifdef CONFIG_BLK_DEV_ZONED
307+
308+
struct blk_zone_report_hdr {
309+
unsigned int nr_zones;
310+
u8 padding[60];
311+
};
312+
313+
extern int blkdev_report_zones(struct block_device *bdev,
314+
sector_t sector, struct blk_zone *zones,
315+
unsigned int *nr_zones, gfp_t gfp_mask);
316+
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
317+
sector_t nr_sectors, gfp_t gfp_mask);
318+
319+
#endif /* CONFIG_BLK_DEV_ZONED */
320+
305321
struct request_queue {
306322
/*
307323
* Together with queue_head for cacheline sharing
@@ -654,6 +670,11 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
654670
}
655671
}
656672

673+
static inline unsigned int blk_queue_zone_size(struct request_queue *q)
674+
{
675+
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
676+
}
677+
657678
/*
658679
* We regard a request as sync, if either a read or a sync write
659680
*/
@@ -1401,6 +1422,16 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
14011422
return false;
14021423
}
14031424

1425+
static inline unsigned int bdev_zone_size(struct block_device *bdev)
1426+
{
1427+
struct request_queue *q = bdev_get_queue(bdev);
1428+
1429+
if (q)
1430+
return blk_queue_zone_size(q);
1431+
1432+
return 0;
1433+
}
1434+
14041435
static inline int queue_dma_alignment(struct request_queue *q)
14051436
{
14061437
return q ? q->dma_alignment : 511;

include/uapi/linux/Kbuild

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ header-y += bfs_fs.h
7070
header-y += binfmts.h
7171
header-y += blkpg.h
7272
header-y += blktrace_api.h
73+
header-y += blkzoned.h
7374
header-y += bpf_common.h
7475
header-y += bpf_perf_event.h
7576
header-y += bpf.h

0 commit comments

Comments
 (0)