Skip to content

Commit ccce20f

Browse files
KAGA-KOKOmartinkpetersen
authored andcommitted
scsi: sd_zbc: Avoid that resetting a zone fails sporadically
Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is called from sd_probe_async() and since sd_revalidate_disk() calls sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called concurrently with blkdev_report_zones() and/or blkdev_reset_zones(). That can cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets q->nr_zones to zero before restoring it to the actual value, even if no drive characteristics have changed. Avoid that this can happen by making the following changes: - Protect the code that updates zone information with blk_queue_enter() and blk_queue_exit(). - Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that these functions do not modify struct scsi_disk before all zone information has been obtained. Note: since commit 055f6e1 ("block: Make q_usage_counter also track legacy requests"; kernel v4.15) the request queue freezing mechanism also affects legacy request queues. Fixes: 89d9475 ("sd: Implement support for ZBC devices") Signed-off-by: Bart Van Assche <[email protected]> Cc: Jens Axboe <[email protected]> Cc: Damien Le Moal <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Hannes Reinecke <[email protected]> Cc: [email protected] # v4.16 Reviewed-by: Damien Le Moal <[email protected]> Signed-off-by: Martin K. Petersen <[email protected]>
1 parent 505aa4b commit ccce20f

File tree

2 files changed

+87
-58
lines changed

2 files changed

+87
-58
lines changed

drivers/scsi/sd_zbc.c

Lines changed: 82 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
400400
*
401401
* Check that all zones of the device are equal. The last zone can however
402402
* be smaller. The zone size must also be a power of two number of LBAs.
403+
*
404+
* Returns the zone size in bytes upon success or an error code upon failure.
403405
*/
404-
static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
406+
static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
405407
{
406408
u64 zone_blocks = 0;
407409
sector_t block = 0;
@@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
412414
int ret;
413415
u8 same;
414416

415-
sdkp->zone_blocks = 0;
416-
417417
/* Get a buffer */
418418
buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
419419
if (!buf)
@@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
445445

446446
/* Parse zone descriptors */
447447
while (rec < buf + buf_len) {
448-
zone_blocks = get_unaligned_be64(&rec[8]);
449-
if (sdkp->zone_blocks == 0) {
450-
sdkp->zone_blocks = zone_blocks;
451-
} else if (zone_blocks != sdkp->zone_blocks &&
452-
(block + zone_blocks < sdkp->capacity
453-
|| zone_blocks > sdkp->zone_blocks)) {
454-
zone_blocks = 0;
448+
u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
449+
450+
if (zone_blocks == 0) {
451+
zone_blocks = this_zone_blocks;
452+
} else if (this_zone_blocks != zone_blocks &&
453+
(block + this_zone_blocks < sdkp->capacity
454+
|| this_zone_blocks > zone_blocks)) {
455+
this_zone_blocks = 0;
455456
goto out;
456457
}
457-
block += zone_blocks;
458+
block += this_zone_blocks;
458459
rec += 64;
459460
}
460461

@@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
467468

468469
} while (block < sdkp->capacity);
469470

470-
zone_blocks = sdkp->zone_blocks;
471-
472471
out:
473472
if (!zone_blocks) {
474473
if (sdkp->first_scan)
@@ -488,8 +487,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
488487
"Zone size too large\n");
489488
ret = -ENODEV;
490489
} else {
491-
sdkp->zone_blocks = zone_blocks;
492-
sdkp->zone_shift = ilog2(zone_blocks);
490+
ret = zone_blocks;
493491
}
494492

495493
out_free:
@@ -500,22 +498,22 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
500498

501499
/**
502500
* sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
503-
* @sdkp: The disk of the bitmap
501+
* @nr_zones: Number of zones to allocate space for.
502+
* @numa_node: NUMA node to allocate the memory from.
504503
*/
505-
static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
504+
static inline unsigned long *
505+
sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
506506
{
507-
struct request_queue *q = sdkp->disk->queue;
508-
509-
return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
510-
* sizeof(unsigned long),
511-
GFP_KERNEL, q->node);
507+
return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),
508+
GFP_KERNEL, numa_node);
512509
}
513510

514511
/**
515512
* sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones
516513
* @sdkp: disk used
517514
* @buf: report reply buffer
518515
* @buflen: length of @buf
516+
* @zone_shift: logarithm base 2 of the number of blocks in a zone
519517
* @seq_zones_bitmap: bitmap of sequential zones to set
520518
*
521519
* Parse reported zone descriptors in @buf to identify sequential zones and
@@ -525,7 +523,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
525523
* Return the LBA after the last zone reported.
526524
*/
527525
static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
528-
unsigned int buflen,
526+
unsigned int buflen, u32 zone_shift,
529527
unsigned long *seq_zones_bitmap)
530528
{
531529
sector_t lba, next_lba = sdkp->capacity;
@@ -544,7 +542,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
544542
if (type != ZBC_ZONE_TYPE_CONV &&
545543
cond != ZBC_ZONE_COND_READONLY &&
546544
cond != ZBC_ZONE_COND_OFFLINE)
547-
set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap);
545+
set_bit(lba >> zone_shift, seq_zones_bitmap);
548546
next_lba = lba + get_unaligned_be64(&rec[8]);
549547
rec += 64;
550548
}
@@ -553,22 +551,26 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
553551
}
554552

555553
/**
556-
* sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap.
554+
* sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
557555
* @sdkp: target disk
556+
* @zone_shift: logarithm base 2 of the number of blocks in a zone
557+
* @nr_zones: number of zones to set up a seq zone bitmap for
558558
*
559559
* Allocate a zone bitmap and initialize it by identifying sequential zones.
560560
*/
561-
static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
561+
static unsigned long *
562+
sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
563+
u32 nr_zones)
562564
{
563565
struct request_queue *q = sdkp->disk->queue;
564566
unsigned long *seq_zones_bitmap;
565567
sector_t lba = 0;
566568
unsigned char *buf;
567569
int ret = -ENOMEM;
568570

569-
seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp);
571+
seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
570572
if (!seq_zones_bitmap)
571-
return -ENOMEM;
573+
return ERR_PTR(-ENOMEM);
572574

573575
buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
574576
if (!buf)
@@ -579,7 +581,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
579581
if (ret)
580582
goto out;
581583
lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
582-
seq_zones_bitmap);
584+
zone_shift, seq_zones_bitmap);
583585
}
584586

585587
if (lba != sdkp->capacity) {
@@ -591,12 +593,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
591593
kfree(buf);
592594
if (ret) {
593595
kfree(seq_zones_bitmap);
594-
return ret;
596+
return ERR_PTR(ret);
595597
}
596-
597-
q->seq_zones_bitmap = seq_zones_bitmap;
598-
599-
return 0;
598+
return seq_zones_bitmap;
600599
}
601600

602601
static void sd_zbc_cleanup(struct scsi_disk *sdkp)
@@ -612,44 +611,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
612611
q->nr_zones = 0;
613612
}
614613

615-
static int sd_zbc_setup(struct scsi_disk *sdkp)
614+
static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
616615
{
617616
struct request_queue *q = sdkp->disk->queue;
617+
u32 zone_shift = ilog2(zone_blocks);
618+
u32 nr_zones;
618619
int ret;
619620

620-
/* READ16/WRITE16 is mandatory for ZBC disks */
621-
sdkp->device->use_16_for_rw = 1;
622-
sdkp->device->use_10_for_rw = 0;
623-
624621
/* chunk_sectors indicates the zone size */
625-
blk_queue_chunk_sectors(sdkp->disk->queue,
626-
logical_to_sectors(sdkp->device, sdkp->zone_blocks));
627-
sdkp->nr_zones =
628-
round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
622+
blk_queue_chunk_sectors(q,
623+
logical_to_sectors(sdkp->device, zone_blocks));
624+
nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
629625

630626
/*
631627
* Initialize the device request queue information if the number
632628
* of zones changed.
633629
*/
634-
if (sdkp->nr_zones != q->nr_zones) {
635-
636-
sd_zbc_cleanup(sdkp);
637-
638-
q->nr_zones = sdkp->nr_zones;
639-
if (sdkp->nr_zones) {
640-
q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
641-
if (!q->seq_zones_wlock) {
630+
if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
631+
unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
632+
size_t zone_bitmap_size;
633+
634+
if (nr_zones) {
635+
seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
636+
q->node);
637+
if (!seq_zones_wlock) {
642638
ret = -ENOMEM;
643639
goto err;
644640
}
645641

646-
ret = sd_zbc_setup_seq_zones_bitmap(sdkp);
647-
if (ret) {
648-
sd_zbc_cleanup(sdkp);
642+
seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
643+
zone_shift, nr_zones);
644+
if (IS_ERR(seq_zones_bitmap)) {
645+
ret = PTR_ERR(seq_zones_bitmap);
646+
kfree(seq_zones_wlock);
649647
goto err;
650648
}
651649
}
652-
650+
zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
651+
sizeof(unsigned long);
652+
blk_mq_freeze_queue(q);
653+
if (q->nr_zones != nr_zones) {
654+
/* READ16/WRITE16 is mandatory for ZBC disks */
655+
sdkp->device->use_16_for_rw = 1;
656+
sdkp->device->use_10_for_rw = 0;
657+
658+
sdkp->zone_blocks = zone_blocks;
659+
sdkp->zone_shift = zone_shift;
660+
sdkp->nr_zones = nr_zones;
661+
q->nr_zones = nr_zones;
662+
swap(q->seq_zones_wlock, seq_zones_wlock);
663+
swap(q->seq_zones_bitmap, seq_zones_bitmap);
664+
} else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
665+
zone_bitmap_size) != 0) {
666+
memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
667+
zone_bitmap_size);
668+
}
669+
blk_mq_unfreeze_queue(q);
670+
kfree(seq_zones_wlock);
671+
kfree(seq_zones_bitmap);
653672
}
654673

655674
return 0;
@@ -661,6 +680,7 @@ static int sd_zbc_setup(struct scsi_disk *sdkp)
661680

662681
int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
663682
{
683+
int64_t zone_blocks;
664684
int ret;
665685

666686
if (!sd_is_zoned(sdkp))
@@ -697,12 +717,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
697717
* Check zone size: only devices with a constant zone size (except
698718
* an eventual last runt zone) that is a power of 2 are supported.
699719
*/
700-
ret = sd_zbc_check_zone_size(sdkp);
701-
if (ret)
720+
zone_blocks = sd_zbc_check_zone_size(sdkp);
721+
ret = -EFBIG;
722+
if (zone_blocks != (u32)zone_blocks)
723+
goto err;
724+
ret = zone_blocks;
725+
if (ret < 0)
702726
goto err;
703727

704728
/* The drive satisfies the kernel restrictions: set it up */
705-
ret = sd_zbc_setup(sdkp);
729+
ret = sd_zbc_setup(sdkp, zone_blocks);
706730
if (ret)
707731
goto err;
708732

include/linux/blkdev.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,11 @@ struct request_queue {
605605
* initialized by the low level device driver (e.g. scsi/sd.c).
606606
* Stacking drivers (device mappers) may or may not initialize
607607
* these fields.
608+
*
609+
* Reads of this information must be protected with blk_queue_enter() /
610+
* blk_queue_exit(). Modifying this information is only allowed while
611+
* no requests are being processed. See also blk_mq_freeze_queue() and
612+
* blk_mq_unfreeze_queue().
608613
*/
609614
unsigned int nr_zones;
610615
unsigned long *seq_zones_bitmap;

0 commit comments

Comments
 (0)