Skip to content

Commit ea1ee5f

Browse files
committed
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer fixes from Jens Axboe: "A final set of fixes for 4.3. It is (again) bigger than I would have liked, but it's all been through the testing mill and has been carefully reviewed by multiple parties. Each fix is either a regression fix for this cycle, or is marked stable. You can scold me at KS. The pull request contains: - Three simple fixes for NVMe, fixing regressions since 4.3. From Arnd, Christoph, and Keith. - A single xen-blkfront fix from Cathy, fixing a NULL dereference if an error is returned through the staste change callback. - Fixup for some bad/sloppy code in nbd that got introduced earlier in this cycle. From Markus Pargmann. - A blk-mq tagset use-after-free fix from Junichi. - A backing device lifetime fix from Tejun, fixing a crash. - And finally, a set of regression/stable fixes for cgroup writeback from Tejun" * 'for-linus' of git://git.kernel.dk/linux-block: writeback: remove broken rbtree_postorder_for_each_entry_safe() usage in cgwb_bdi_destroy() NVMe: Fix memory leak on retried commands block: don't release bdi while request_queue has live references nvme: use an integer value to Linux errno values blk-mq: fix use-after-free in blk_mq_free_tag_set() nvme: fix 32-bit build warning writeback: fix incorrect calculation of available memory for memcg domains writeback: memcg dirty_throttle_control should be initialized with wb->memcg_completions writeback: bdi_writeback iteration must not skip dying ones writeback: fix bdi_writeback iteration in wakeup_dirtytime_writeback() writeback: laptop_mode_timer_fn() needs rcu_read_lock() around bdi_writeback iteration nbd: Add locking for tasks xen-blkfront: check for null drvdata in blkback_changed (XenbusStateClosing)
2 parents ef594c4 + e27c5b9 commit ea1ee5f

File tree

14 files changed

+167
-144
lines changed

14 files changed

+167
-144
lines changed

block/blk-core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ void blk_cleanup_queue(struct request_queue *q)
576576
q->queue_lock = &q->__queue_lock;
577577
spin_unlock_irq(lock);
578578

579-
bdi_destroy(&q->backing_dev_info);
579+
bdi_unregister(&q->backing_dev_info);
580580

581581
/* @q is and will stay empty, shutdown and put */
582582
blk_put_queue(q);

block/blk-mq-tag.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
641641
{
642642
bt_free(&tags->bitmap_tags);
643643
bt_free(&tags->breserved_tags);
644+
free_cpumask_var(tags->cpumask);
644645
kfree(tags);
645646
}
646647

block/blk-mq.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2296,10 +2296,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
22962296
int i;
22972297

22982298
for (i = 0; i < set->nr_hw_queues; i++) {
2299-
if (set->tags[i]) {
2299+
if (set->tags[i])
23002300
blk_mq_free_rq_map(set, set->tags[i], i);
2301-
free_cpumask_var(set->tags[i]->cpumask);
2302-
}
23032301
}
23042302

23052303
kfree(set->tags);

block/blk-sysfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ static void blk_release_queue(struct kobject *kobj)
540540
struct request_queue *q =
541541
container_of(kobj, struct request_queue, kobj);
542542

543+
bdi_exit(&q->backing_dev_info);
543544
blkcg_exit_queue(q);
544545

545546
if (q->elevator) {

drivers/block/nbd.c

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct nbd_device {
6060
bool disconnect; /* a disconnect has been requested by user */
6161

6262
struct timer_list timeout_timer;
63+
spinlock_t tasks_lock;
6364
struct task_struct *task_recv;
6465
struct task_struct *task_send;
6566

@@ -140,21 +141,23 @@ static void sock_shutdown(struct nbd_device *nbd)
140141
static void nbd_xmit_timeout(unsigned long arg)
141142
{
142143
struct nbd_device *nbd = (struct nbd_device *)arg;
143-
struct task_struct *task;
144+
unsigned long flags;
144145

145146
if (list_empty(&nbd->queue_head))
146147
return;
147148

148149
nbd->disconnect = true;
149150

150-
task = READ_ONCE(nbd->task_recv);
151-
if (task)
152-
force_sig(SIGKILL, task);
151+
spin_lock_irqsave(&nbd->tasks_lock, flags);
152+
153+
if (nbd->task_recv)
154+
force_sig(SIGKILL, nbd->task_recv);
153155

154-
task = READ_ONCE(nbd->task_send);
155-
if (task)
156+
if (nbd->task_send)
156157
force_sig(SIGKILL, nbd->task_send);
157158

159+
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
160+
158161
dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
159162
}
160163

@@ -403,17 +406,24 @@ static int nbd_thread_recv(struct nbd_device *nbd)
403406
{
404407
struct request *req;
405408
int ret;
409+
unsigned long flags;
406410

407411
BUG_ON(nbd->magic != NBD_MAGIC);
408412

409413
sk_set_memalloc(nbd->sock->sk);
410414

415+
spin_lock_irqsave(&nbd->tasks_lock, flags);
411416
nbd->task_recv = current;
417+
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
412418

413419
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
414420
if (ret) {
415421
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
422+
423+
spin_lock_irqsave(&nbd->tasks_lock, flags);
416424
nbd->task_recv = NULL;
425+
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
426+
417427
return ret;
418428
}
419429

@@ -429,7 +439,9 @@ static int nbd_thread_recv(struct nbd_device *nbd)
429439

430440
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
431441

442+
spin_lock_irqsave(&nbd->tasks_lock, flags);
432443
nbd->task_recv = NULL;
444+
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
433445

434446
if (signal_pending(current)) {
435447
siginfo_t info;
@@ -534,8 +546,11 @@ static int nbd_thread_send(void *data)
534546
{
535547
struct nbd_device *nbd = data;
536548
struct request *req;
549+
unsigned long flags;
537550

551+
spin_lock_irqsave(&nbd->tasks_lock, flags);
538552
nbd->task_send = current;
553+
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
539554

540555
set_user_nice(current, MIN_NICE);
541556
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
@@ -572,7 +587,15 @@ static int nbd_thread_send(void *data)
572587
nbd_handle_req(nbd, req);
573588
}
574589

590+
spin_lock_irqsave(&nbd->tasks_lock, flags);
575591
nbd->task_send = NULL;
592+
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
593+
594+
/* Clear maybe pending signals */
595+
if (signal_pending(current)) {
596+
siginfo_t info;
597+
dequeue_signal_lock(current, &current->blocked, &info);
598+
}
576599

577600
return 0;
578601
}
@@ -1052,6 +1075,7 @@ static int __init nbd_init(void)
10521075
nbd_dev[i].magic = NBD_MAGIC;
10531076
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
10541077
spin_lock_init(&nbd_dev[i].queue_lock);
1078+
spin_lock_init(&nbd_dev[i].tasks_lock);
10551079
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
10561080
mutex_init(&nbd_dev[i].tx_lock);
10571081
init_timer(&nbd_dev[i].timeout_timer);

drivers/block/nvme-core.c

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -603,27 +603,31 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
603603
struct nvme_iod *iod = ctx;
604604
struct request *req = iod_get_private(iod);
605605
struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
606-
607606
u16 status = le16_to_cpup(&cqe->status) >> 1;
607+
bool requeue = false;
608+
int error = 0;
608609

609610
if (unlikely(status)) {
610611
if (!(status & NVME_SC_DNR || blk_noretry_request(req))
611612
&& (jiffies - req->start_time) < req->timeout) {
612613
unsigned long flags;
613614

615+
requeue = true;
614616
blk_mq_requeue_request(req);
615617
spin_lock_irqsave(req->q->queue_lock, flags);
616618
if (!blk_queue_stopped(req->q))
617619
blk_mq_kick_requeue_list(req->q);
618620
spin_unlock_irqrestore(req->q->queue_lock, flags);
619-
return;
621+
goto release_iod;
620622
}
621623

622624
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
623625
if (cmd_rq->ctx == CMD_CTX_CANCELLED)
624-
status = -EINTR;
626+
error = -EINTR;
627+
else
628+
error = status;
625629
} else {
626-
status = nvme_error_status(status);
630+
error = nvme_error_status(status);
627631
}
628632
}
629633

@@ -635,8 +639,9 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
635639
if (cmd_rq->aborted)
636640
dev_warn(nvmeq->dev->dev,
637641
"completing aborted command with status:%04x\n",
638-
status);
642+
error);
639643

644+
release_iod:
640645
if (iod->nents) {
641646
dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
642647
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
@@ -649,7 +654,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
649654
}
650655
nvme_free_iod(nvmeq->dev, iod);
651656

652-
blk_mq_complete_request(req, status);
657+
if (likely(!requeue))
658+
blk_mq_complete_request(req, error);
653659
}
654660

655661
/* length is in bytes. gfp flags indicates whether we may sleep. */
@@ -1804,7 +1810,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
18041810

18051811
length = (io.nblocks + 1) << ns->lba_shift;
18061812
meta_len = (io.nblocks + 1) * ns->ms;
1807-
metadata = (void __user *)(unsigned long)io.metadata;
1813+
metadata = (void __user *)(uintptr_t)io.metadata;
18081814
write = io.opcode & 1;
18091815

18101816
if (ns->ext) {
@@ -1844,7 +1850,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
18441850
c.rw.metadata = cpu_to_le64(meta_dma);
18451851

18461852
status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
1847-
(void __user *)io.addr, length, NULL, 0);
1853+
(void __user *)(uintptr_t)io.addr, length, NULL, 0);
18481854
unmap:
18491855
if (meta) {
18501856
if (status == NVME_SC_SUCCESS && !write) {
@@ -1886,7 +1892,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
18861892
timeout = msecs_to_jiffies(cmd.timeout_ms);
18871893

18881894
status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
1889-
NULL, (void __user *)cmd.addr, cmd.data_len,
1895+
NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
18901896
&cmd.result, timeout);
18911897
if (status >= 0) {
18921898
if (put_user(cmd.result, &ucmd->result))

drivers/block/xen-blkfront.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1956,7 +1956,8 @@ static void blkback_changed(struct xenbus_device *dev,
19561956
break;
19571957
/* Missed the backend's Closing state -- fallthrough */
19581958
case XenbusStateClosing:
1959-
blkfront_closing(info);
1959+
if (info)
1960+
blkfront_closing(info);
19601961
break;
19611962
}
19621963
}

fs/fs-writeback.c

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -778,19 +778,24 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
778778
struct wb_writeback_work *base_work,
779779
bool skip_if_busy)
780780
{
781-
int next_memcg_id = 0;
782-
struct bdi_writeback *wb;
783-
struct wb_iter iter;
781+
struct bdi_writeback *last_wb = NULL;
782+
struct bdi_writeback *wb = list_entry_rcu(&bdi->wb_list,
783+
struct bdi_writeback, bdi_node);
784784

785785
might_sleep();
786786
restart:
787787
rcu_read_lock();
788-
bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) {
788+
list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
789789
DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
790790
struct wb_writeback_work fallback_work;
791791
struct wb_writeback_work *work;
792792
long nr_pages;
793793

794+
if (last_wb) {
795+
wb_put(last_wb);
796+
last_wb = NULL;
797+
}
798+
794799
/* SYNC_ALL writes out I_DIRTY_TIME too */
795800
if (!wb_has_dirty_io(wb) &&
796801
(base_work->sync_mode == WB_SYNC_NONE ||
@@ -819,12 +824,22 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
819824

820825
wb_queue_work(wb, work);
821826

822-
next_memcg_id = wb->memcg_css->id + 1;
827+
/*
828+
* Pin @wb so that it stays on @bdi->wb_list. This allows
829+
* continuing iteration from @wb after dropping and
830+
* regrabbing rcu read lock.
831+
*/
832+
wb_get(wb);
833+
last_wb = wb;
834+
823835
rcu_read_unlock();
824836
wb_wait_for_completion(bdi, &fallback_work_done);
825837
goto restart;
826838
}
827839
rcu_read_unlock();
840+
841+
if (last_wb)
842+
wb_put(last_wb);
828843
}
829844

830845
#else /* CONFIG_CGROUP_WRITEBACK */
@@ -1857,12 +1872,11 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
18571872
rcu_read_lock();
18581873
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
18591874
struct bdi_writeback *wb;
1860-
struct wb_iter iter;
18611875

18621876
if (!bdi_has_dirty_io(bdi))
18631877
continue;
18641878

1865-
bdi_for_each_wb(wb, bdi, &iter, 0)
1879+
list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
18661880
wb_start_writeback(wb, wb_split_bdi_pages(wb, nr_pages),
18671881
false, reason);
18681882
}
@@ -1894,11 +1908,10 @@ static void wakeup_dirtytime_writeback(struct work_struct *w)
18941908
rcu_read_lock();
18951909
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
18961910
struct bdi_writeback *wb;
1897-
struct wb_iter iter;
18981911

1899-
bdi_for_each_wb(wb, bdi, &iter, 0)
1900-
if (!list_empty(&bdi->wb.b_dirty_time))
1901-
wb_wakeup(&bdi->wb);
1912+
list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
1913+
if (!list_empty(&wb->b_dirty_time))
1914+
wb_wakeup(wb);
19021915
}
19031916
rcu_read_unlock();
19041917
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);

include/linux/backing-dev-defs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ struct bdi_writeback {
116116
struct list_head work_list;
117117
struct delayed_work dwork; /* work item used for writeback */
118118

119+
struct list_head bdi_node; /* anchored at bdi->wb_list */
120+
119121
#ifdef CONFIG_CGROUP_WRITEBACK
120122
struct percpu_ref refcnt; /* used only for !root wb's */
121123
struct fprop_local_percpu memcg_completions;
@@ -150,6 +152,7 @@ struct backing_dev_info {
150152
atomic_long_t tot_write_bandwidth;
151153

152154
struct bdi_writeback wb; /* the root writeback info for this bdi */
155+
struct list_head wb_list; /* list of all wbs */
153156
#ifdef CONFIG_CGROUP_WRITEBACK
154157
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
155158
struct rb_root cgwb_congested_tree; /* their congested states */

0 commit comments

Comments
 (0)