Skip to content

Commit b091ac6

Browse files
damien-lemoalaxboe
authored andcommitted
sd_zbc: Fix report zones buffer allocation
During disk scan and revalidation done with sd_revalidate(), the zones of a zoned disk are checked using the helper function blk_revalidate_disk_zones() if a configuration change is detected (change in the number of zones or zone size). The function blk_revalidate_disk_zones() issues report_zones calls that are very large, that is, to obtain zone information for all zones of the disk with a single command. The size of the report zones command buffer necessary for such large request generally is lower than the disk max_hw_sectors and KMALLOC_MAX_SIZE (4MB) and succeeds on boot (no memory fragmentation), but often fail at run time (e.g. hot-plug event). This causes the disk revalidation to fail and the disk capacity to be changed to 0. This problem can be avoided by using vmalloc() instead of kmalloc() for the buffer allocation. To limit the amount of memory to be allocated, this patch also introduces the arbitrary SD_ZBC_REPORT_MAX_ZONES maximum number of zones to report with a single report zones command. This limit may be lowered further to satisfy the disk max_hw_sectors limit. Finally, to ensure that the vmalloc-ed buffer can always be mapped in a request, the buffer size is further limited to at most queue_max_segments() pages, allowing successful mapping of the buffer even in the worst case scenario where none of the buffer pages are contiguous. Fixes: 515ce60 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer allocation") Fixes: e76239a ("block: add a report_zones method") Cc: [email protected] Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Martin K. Petersen <[email protected]> Signed-off-by: Damien Le Moal <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent bd976e5 commit b091ac6

File tree

1 file changed

+75
-29
lines changed

1 file changed

+75
-29
lines changed

drivers/scsi/sd_zbc.c

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
*/
1010

1111
#include <linux/blkdev.h>
12+
#include <linux/vmalloc.h>
13+
#include <linux/sched/mm.h>
1214

1315
#include <asm/unaligned.h>
1416

@@ -50,7 +52,7 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
5052
/**
5153
* sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
5254
* @sdkp: The target disk
53-
* @buf: Buffer to use for the reply
55+
* @buf: vmalloc-ed buffer to use for the reply
5456
* @buflen: the buffer size
5557
* @lba: Start LBA of the report
5658
* @partial: Do partial report
@@ -79,7 +81,6 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
7981
put_unaligned_be32(buflen, &cmd[10]);
8082
if (partial)
8183
cmd[14] = ZBC_REPORT_ZONE_PARTIAL;
82-
memset(buf, 0, buflen);
8384

8485
result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
8586
buf, buflen, &sshdr,
@@ -103,6 +104,53 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
103104
return 0;
104105
}
105106

107+
/*
108+
* Maximum number of zones to get with one report zones command.
109+
*/
110+
#define SD_ZBC_REPORT_MAX_ZONES 8192U
111+
112+
/**
113+
* Allocate a buffer for report zones reply.
114+
* @sdkp: The target disk
115+
* @nr_zones: Maximum number of zones to report
116+
* @buflen: Size of the buffer allocated
117+
*
118+
* Try to allocate a reply buffer for the number of requested zones.
119+
* The size of the buffer allocated may be smaller than requested to
120+
* satify the device constraint (max_hw_sectors, max_segments, etc).
121+
*
122+
* Return the address of the allocated buffer and update @buflen with
123+
* the size of the allocated buffer.
124+
*/
125+
static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
126+
unsigned int nr_zones, size_t *buflen)
127+
{
128+
struct request_queue *q = sdkp->disk->queue;
129+
size_t bufsize;
130+
void *buf;
131+
132+
/*
133+
* Report zone buffer size should be at most 64B times the number of
134+
* zones requested plus the 64B reply header, but should be at least
135+
* SECTOR_SIZE for ATA devices.
136+
* Make sure that this size does not exceed the hardware capabilities.
137+
* Furthermore, since the report zone command cannot be split, make
138+
* sure that the allocated buffer can always be mapped by limiting the
139+
* number of pages allocated to the HBA max segments limit.
140+
*/
141+
nr_zones = min(nr_zones, SD_ZBC_REPORT_MAX_ZONES);
142+
bufsize = roundup((nr_zones + 1) * 64, 512);
143+
bufsize = min_t(size_t, bufsize,
144+
queue_max_hw_sectors(q) << SECTOR_SHIFT);
145+
bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
146+
147+
buf = vzalloc(bufsize);
148+
if (buf)
149+
*buflen = bufsize;
150+
151+
return buf;
152+
}
153+
106154
/**
107155
* sd_zbc_report_zones - Disk report zones operation.
108156
* @disk: The target disk
@@ -116,30 +164,23 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
116164
struct blk_zone *zones, unsigned int *nr_zones)
117165
{
118166
struct scsi_disk *sdkp = scsi_disk(disk);
119-
unsigned int i, buflen, nrz = *nr_zones;
167+
unsigned int i, nrz = *nr_zones;
120168
unsigned char *buf;
121-
size_t offset = 0;
169+
size_t buflen = 0, offset = 0;
122170
int ret = 0;
123171

124172
if (!sd_is_zoned(sdkp))
125173
/* Not a zoned device */
126174
return -EOPNOTSUPP;
127175

128-
/*
129-
* Get a reply buffer for the number of requested zones plus a header,
130-
* without exceeding the device maximum command size. For ATA disks,
131-
* buffers must be aligned to 512B.
132-
*/
133-
buflen = min(queue_max_hw_sectors(disk->queue) << 9,
134-
roundup((nrz + 1) * 64, 512));
135-
buf = kmalloc(buflen, GFP_KERNEL);
176+
buf = sd_zbc_alloc_report_buffer(sdkp, nrz, &buflen);
136177
if (!buf)
137178
return -ENOMEM;
138179

139180
ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
140181
sectors_to_logical(sdkp->device, sector), true);
141182
if (ret)
142-
goto out_free_buf;
183+
goto out;
143184

144185
nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64);
145186
for (i = 0; i < nrz; i++) {
@@ -150,8 +191,8 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
150191

151192
*nr_zones = nrz;
152193

153-
out_free_buf:
154-
kfree(buf);
194+
out:
195+
kvfree(buf);
155196

156197
return ret;
157198
}
@@ -285,8 +326,6 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
285326
return 0;
286327
}
287328

288-
#define SD_ZBC_BUF_SIZE 131072U
289-
290329
/**
291330
* sd_zbc_check_zones - Check the device capacity and zone sizes
292331
* @sdkp: Target disk
@@ -302,22 +341,28 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
302341
*/
303342
static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
304343
{
344+
size_t bufsize, buflen;
345+
unsigned int noio_flag;
305346
u64 zone_blocks = 0;
306347
sector_t max_lba, block = 0;
307348
unsigned char *buf;
308349
unsigned char *rec;
309-
unsigned int buf_len;
310-
unsigned int list_length;
311350
int ret;
312351
u8 same;
313352

353+
/* Do all memory allocations as if GFP_NOIO was specified */
354+
noio_flag = memalloc_noio_save();
355+
314356
/* Get a buffer */
315-
buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
316-
if (!buf)
317-
return -ENOMEM;
357+
buf = sd_zbc_alloc_report_buffer(sdkp, SD_ZBC_REPORT_MAX_ZONES,
358+
&bufsize);
359+
if (!buf) {
360+
ret = -ENOMEM;
361+
goto out;
362+
}
318363

319364
/* Do a report zone to get max_lba and the same field */
320-
ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false);
365+
ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, 0, false);
321366
if (ret)
322367
goto out_free;
323368

@@ -353,12 +398,12 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
353398
do {
354399

355400
/* Parse REPORT ZONES header */
356-
list_length = get_unaligned_be32(&buf[0]) + 64;
401+
buflen = min_t(size_t, get_unaligned_be32(&buf[0]) + 64,
402+
bufsize);
357403
rec = buf + 64;
358-
buf_len = min(list_length, SD_ZBC_BUF_SIZE);
359404

360405
/* Parse zone descriptors */
361-
while (rec < buf + buf_len) {
406+
while (rec < buf + buflen) {
362407
u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
363408

364409
if (zone_blocks == 0) {
@@ -374,8 +419,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
374419
}
375420

376421
if (block < sdkp->capacity) {
377-
ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
378-
block, true);
422+
ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, block,
423+
true);
379424
if (ret)
380425
goto out_free;
381426
}
@@ -406,7 +451,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
406451
}
407452

408453
out_free:
409-
kfree(buf);
454+
memalloc_noio_restore(noio_flag);
455+
kvfree(buf);
410456

411457
return ret;
412458
}

0 commit comments

Comments
 (0)