Skip to content

Commit 498f665

Browse files
KAGA-KOKOaxboe
authored andcommitted
block: Fix a race between the cgroup code and request queue initialization
Initialize the request queue lock earlier such that the following race can no longer occur: blk_init_queue_node() blkcg_print_blkgs() blk_alloc_queue_node (1) q->queue_lock = &q->__queue_lock (2) blkcg_init_queue(q) (3) spin_lock_irq(blkg->q->queue_lock) (4) q->queue_lock = lock (5) spin_unlock_irq(blkg->q->queue_lock) (6) (1) allocate an uninitialized queue; (2) initialize queue_lock to its default internal lock; (3) initialize blkcg part of request queue, which will create blkg and then insert it to blkg_list; (4) traverse blkg_list and find the created blkg, and then take its queue lock, here it is the default *internal lock*; (5) *race window*, now queue_lock is overridden with *driver specified lock*; (6) now unlock *driver specified lock*, not the locked *internal lock*, unlock balance breaks. The changes in this patch are as follows: - Move the .queue_lock initialization from blk_init_queue_node() into blk_alloc_queue_node(). - Only override the .queue_lock pointer for legacy queues because it is not useful for blk-mq queues to override this pointer. - For all all block drivers that initialize .queue_lock explicitly, change the blk_alloc_queue() call in the driver into a blk_alloc_queue_node() call and remove the explicit .queue_lock initialization. Additionally, initialize the spin lock that will be used as queue lock earlier if necessary. Reported-by: Joseph Qi <[email protected]> Signed-off-by: Bart Van Assche <[email protected]> Reviewed-by: Joseph Qi <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Philipp Reisner <[email protected]> Cc: Ulf Hansson <[email protected]> Cc: Kees Cook <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 5ee0524 commit 498f665

File tree

3 files changed

+20
-14
lines changed

3 files changed

+20
-14
lines changed

block/blk-core.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,19 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
888888
kblockd_schedule_work(&q->timeout_work);
889889
}
890890

891+
/**
892+
* blk_alloc_queue_node - allocate a request queue
893+
* @gfp_mask: memory allocation flags
894+
* @node_id: NUMA node to allocate memory from
895+
* @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
896+
* serialize calls to the legacy .request_fn() callback. Ignored for
897+
* blk-mq request queues.
898+
*
899+
* Note: pass the queue lock as the third argument to this function instead of
900+
* setting the queue lock pointer explicitly to avoid triggering a sporadic
901+
* crash in the blkcg code. This function namely calls blkcg_init_queue() and
902+
* the queue lock pointer must be set before blkcg_init_queue() is called.
903+
*/
891904
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
892905
spinlock_t *lock)
893906
{
@@ -940,11 +953,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
940953
mutex_init(&q->sysfs_lock);
941954
spin_lock_init(&q->__queue_lock);
942955

943-
/*
944-
* By default initialize queue_lock to internal lock and driver can
945-
* override it later if need be.
946-
*/
947-
q->queue_lock = &q->__queue_lock;
956+
if (!q->mq_ops)
957+
q->queue_lock = lock ? : &q->__queue_lock;
948958

949959
/*
950960
* A queue starts its life with bypass turned on to avoid
@@ -1031,13 +1041,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
10311041
{
10321042
struct request_queue *q;
10331043

1034-
q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL);
1044+
q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
10351045
if (!q)
10361046
return NULL;
10371047

10381048
q->request_fn = rfn;
1039-
if (lock)
1040-
q->queue_lock = lock;
10411049
if (blk_init_allocated_queue(q) < 0) {
10421050
blk_cleanup_queue(q);
10431051
return NULL;

drivers/block/drbd/drbd_main.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
28162816

28172817
drbd_init_set_defaults(device);
28182818

2819-
q = blk_alloc_queue(GFP_KERNEL);
2819+
q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
28202820
if (!q)
28212821
goto out_no_q;
28222822
device->rq_queue = q;
@@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
28482848
/* Setting the max_hw_sectors to an odd value of 8kibyte here
28492849
This triggers a max_bio_size message upon first attach or connect */
28502850
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
2851-
q->queue_lock = &resource->req_lock;
28522851

28532852
device->md_io.page = alloc_page(GFP_KERNEL);
28542853
if (!device->md_io.page)

drivers/block/umem.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
888888
card->Active = -1; /* no page is active */
889889
card->bio = NULL;
890890
card->biotail = &card->bio;
891+
spin_lock_init(&card->lock);
891892

892-
card->queue = blk_alloc_queue(GFP_KERNEL);
893+
card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
894+
&card->lock);
893895
if (!card->queue)
894896
goto failed_alloc;
895897

896898
blk_queue_make_request(card->queue, mm_make_request);
897-
card->queue->queue_lock = &card->lock;
898899
card->queue->queuedata = card;
899900

900901
tasklet_init(&card->tasklet, process_page, (unsigned long)card);
@@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
968969
dev_printk(KERN_INFO, &card->dev->dev,
969970
"Window size %d bytes, IRQ %d\n", data, dev->irq);
970971

971-
spin_lock_init(&card->lock);
972-
973972
pci_set_drvdata(dev, card);
974973

975974
if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */

0 commit comments

Comments
 (0)