Skip to content

Commit 7b05bf7

Browse files
committed
Revert "block/mq-deadline: Prioritize high-priority requests"
This reverts commit fb92603. Zhen reports that this commit slows down mq-deadline on a 128 thread box, going from 258K IOPS to 170-180K. My testing shows that Optane gen2 IOPS goes from 2.3M IOPS to 1.2M IOPS on a 64 thread box. Looking in detail at the code, the main culprit here is needing to sum percpu counters in the dispatch hot path, leading to very high CPU utilization there. To make matters worse, the code currently needs to sum 2 percpu counters, and it does so in the most naive way of iterating possible CPUs _twice_. Since we're close to release, revert this commit and we can re-do it with regular per-priority counters instead for the 5.15 kernel. Link: https://lore.kernel.org/linux-block/[email protected]/ Reported-by: Zhen Lei <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent b6d2b05 commit 7b05bf7

File tree

1 file changed

+5
-37
lines changed

1 file changed

+5
-37
lines changed

block/mq-deadline.c

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@
3131
*/
3232
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
3333
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
34-
/*
35-
* Time after which to dispatch lower priority requests even if higher
36-
* priority requests are pending.
37-
*/
38-
static const int aging_expire = 10 * HZ;
3934
static const int writes_starved = 2; /* max times reads can starve a write */
4035
static const int fifo_batch = 16; /* # of sequential requests treated as one
4136
by the above parameters. For throughput. */
@@ -103,7 +98,6 @@ struct deadline_data {
10398
int writes_starved;
10499
int front_merges;
105100
u32 async_depth;
106-
int aging_expire;
107101

108102
spinlock_t lock;
109103
spinlock_t zone_lock;
@@ -369,11 +363,10 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
369363

370364
/*
371365
* deadline_dispatch_requests selects the best request according to
372-
* read/write expire, fifo_batch, etc and with a start time <= @latest.
366+
* read/write expire, fifo_batch, etc
373367
*/
374368
static struct request *__dd_dispatch_request(struct deadline_data *dd,
375-
struct dd_per_prio *per_prio,
376-
u64 latest_start_ns)
369+
struct dd_per_prio *per_prio)
377370
{
378371
struct request *rq, *next_rq;
379372
enum dd_data_dir data_dir;
@@ -385,8 +378,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
385378
if (!list_empty(&per_prio->dispatch)) {
386379
rq = list_first_entry(&per_prio->dispatch, struct request,
387380
queuelist);
388-
if (rq->start_time_ns > latest_start_ns)
389-
return NULL;
390381
list_del_init(&rq->queuelist);
391382
goto done;
392383
}
@@ -464,8 +455,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
464455
dd->batching = 0;
465456

466457
dispatch_request:
467-
if (rq->start_time_ns > latest_start_ns)
468-
return NULL;
469458
/*
470459
* rq is the selected appropriate request.
471460
*/
@@ -494,32 +483,15 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
494483
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
495484
{
496485
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
497-
const u64 now_ns = ktime_get_ns();
498-
struct request *rq = NULL;
486+
struct request *rq;
499487
enum dd_prio prio;
500488

501489
spin_lock(&dd->lock);
502-
/*
503-
* Start with dispatching requests whose deadline expired more than
504-
* aging_expire jiffies ago.
505-
*/
506-
for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
507-
rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns -
508-
jiffies_to_nsecs(dd->aging_expire));
509-
if (rq)
510-
goto unlock;
511-
}
512-
/*
513-
* Next, dispatch requests in priority order. Ignore lower priority
514-
* requests if any higher priority requests are pending.
515-
*/
516490
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
517-
rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns);
518-
if (rq || dd_queued(dd, prio))
491+
rq = __dd_dispatch_request(dd, &dd->per_prio[prio]);
492+
if (rq)
519493
break;
520494
}
521-
522-
unlock:
523495
spin_unlock(&dd->lock);
524496

525497
return rq;
@@ -620,7 +592,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
620592
dd->front_merges = 1;
621593
dd->last_dir = DD_WRITE;
622594
dd->fifo_batch = fifo_batch;
623-
dd->aging_expire = aging_expire;
624595
spin_lock_init(&dd->lock);
625596
spin_lock_init(&dd->zone_lock);
626597

@@ -842,7 +813,6 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
842813
#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
843814
SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
844815
SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
845-
SHOW_JIFFIES(deadline_aging_expire_show, dd->aging_expire);
846816
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
847817
SHOW_INT(deadline_front_merges_show, dd->front_merges);
848818
SHOW_INT(deadline_async_depth_show, dd->front_merges);
@@ -872,7 +842,6 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
872842
STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
873843
STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
874844
STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
875-
STORE_JIFFIES(deadline_aging_expire_store, &dd->aging_expire, 0, INT_MAX);
876845
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
877846
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
878847
STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
@@ -891,7 +860,6 @@ static struct elv_fs_entry deadline_attrs[] = {
891860
DD_ATTR(front_merges),
892861
DD_ATTR(async_depth),
893862
DD_ATTR(fifo_batch),
894-
DD_ATTR(aging_expire),
895863
__ATTR_NULL
896864
};
897865

0 commit comments

Comments
 (0)