Skip to content

Commit c288d9c

Browse files
committed
Merge tag 'for-5.14/io_uring-2021-06-30' of git://git.kernel.dk/linux-block
Pull io_uring updates from Jens Axboe: - Multi-queue iopoll improvement (Fam) - Allow configurable io-wq CPU masks (me) - renameat/linkat tightening (me) - poll re-arm improvement (Olivier) - SQPOLL race fix (Olivier) - Cancelation unification (Pavel) - SQPOLL cleanups (Pavel) - Enable file backed buffers for shmem/memfd (Pavel) - A ton of cleanups and performance improvements (Pavel) - Followup and misc fixes (Colin, Fam, Hao, Olivier) * tag 'for-5.14/io_uring-2021-06-30' of git://git.kernel.dk/linux-block: (83 commits) io_uring: code clean for kiocb_done() io_uring: spin in iopoll() only when reqs are in a single queue io_uring: pre-initialise some of req fields io_uring: refactor io_submit_flush_completions io_uring: optimise hot path restricted checks io_uring: remove not needed PF_EXITING check io_uring: mainstream sqpoll task_work running io_uring: refactor io_arm_poll_handler() io_uring: reduce latency by reissueing the operation io_uring: add IOPOLL and reserved field checks to IORING_OP_UNLINKAT io_uring: add IOPOLL and reserved field checks to IORING_OP_RENAMEAT io_uring: refactor io_openat2() io_uring: simplify struct io_uring_sqe layout io_uring: update sqe layout build checks io_uring: fix code style problems io_uring: refactor io_sq_thread() io_uring: don't change sqpoll creds if not needed io_uring: Create define to modify a SQPOLL parameter io_uring: Fix race condition when sqp thread goes to sleep io_uring: improve in tctx_task_work() resubmission ...
2 parents 911a299 + e149bd7 commit c288d9c

File tree

5 files changed

+874
-690
lines changed

5 files changed

+874
-690
lines changed

fs/io-wq.c

Lines changed: 71 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
#include <linux/init.h>
1010
#include <linux/errno.h>
1111
#include <linux/sched/signal.h>
12-
#include <linux/mm.h>
13-
#include <linux/sched/mm.h>
1412
#include <linux/percpu.h>
1513
#include <linux/slab.h>
1614
#include <linux/rculist_nulls.h>
@@ -96,28 +94,29 @@ struct io_wqe {
9694

9795
struct io_wq *wq;
9896
struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
97+
98+
cpumask_var_t cpu_mask;
9999
};
100100

101101
/*
102102
* Per io_wq state
103103
*/
104104
struct io_wq {
105-
struct io_wqe **wqes;
106105
unsigned long state;
107106

108107
free_work_fn *free_work;
109108
io_wq_work_fn *do_work;
110109

111110
struct io_wq_hash *hash;
112111

113-
refcount_t refs;
114-
115112
atomic_t worker_refs;
116113
struct completion worker_done;
117114

118115
struct hlist_node cpuhp_node;
119116

120117
struct task_struct *task;
118+
119+
struct io_wqe *wqes[];
121120
};
122121

123122
static enum cpuhp_state io_wq_online;
@@ -241,7 +240,8 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
241240
* Most likely an attempt to queue unbounded work on an io_wq that
242241
* wasn't setup with any unbounded workers.
243242
*/
244-
WARN_ON_ONCE(!acct->max_workers);
243+
if (unlikely(!acct->max_workers))
244+
pr_warn_once("io-wq is not configured for unbound workers");
245245

246246
rcu_read_lock();
247247
ret = io_wqe_activate_free_worker(wqe);
@@ -560,17 +560,13 @@ static int io_wqe_worker(void *data)
560560
if (ret)
561561
continue;
562562
/* timed out, exit unless we're the fixed worker */
563-
if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
564-
!(worker->flags & IO_WORKER_F_FIXED))
563+
if (!(worker->flags & IO_WORKER_F_FIXED))
565564
break;
566565
}
567566

568567
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
569568
raw_spin_lock_irq(&wqe->lock);
570-
if (!wq_list_empty(&wqe->work_list))
571-
io_worker_handle_work(worker);
572-
else
573-
raw_spin_unlock_irq(&wqe->lock);
569+
io_worker_handle_work(worker);
574570
}
575571

576572
io_worker_exit(worker);
@@ -645,7 +641,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
645641

646642
tsk->pf_io_worker = worker;
647643
worker->task = tsk;
648-
set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
644+
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
649645
tsk->flags |= PF_NO_SETAFFINITY;
650646

651647
raw_spin_lock_irq(&wqe->lock);
@@ -901,23 +897,20 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
901897

902898
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
903899
{
904-
int ret = -ENOMEM, node;
900+
int ret, node;
905901
struct io_wq *wq;
906902

907903
if (WARN_ON_ONCE(!data->free_work || !data->do_work))
908904
return ERR_PTR(-EINVAL);
905+
if (WARN_ON_ONCE(!bounded))
906+
return ERR_PTR(-EINVAL);
909907

910-
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
908+
wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL);
911909
if (!wq)
912910
return ERR_PTR(-ENOMEM);
913-
914-
wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
915-
if (!wq->wqes)
916-
goto err_wq;
917-
918911
ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
919912
if (ret)
920-
goto err_wqes;
913+
goto err_wq;
921914

922915
refcount_inc(&data->hash->refs);
923916
wq->hash = data->hash;
@@ -934,6 +927,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
934927
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
935928
if (!wqe)
936929
goto err;
930+
if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL))
931+
goto err;
932+
cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
937933
wq->wqes[node] = wqe;
938934
wqe->node = alloc_node;
939935
wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
@@ -953,17 +949,18 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
953949
}
954950

955951
wq->task = get_task_struct(data->task);
956-
refcount_set(&wq->refs, 1);
957952
atomic_set(&wq->worker_refs, 1);
958953
init_completion(&wq->worker_done);
959954
return wq;
960955
err:
961956
io_wq_put_hash(data->hash);
962957
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
963-
for_each_node(node)
958+
for_each_node(node) {
959+
if (!wq->wqes[node])
960+
continue;
961+
free_cpumask_var(wq->wqes[node]->cpu_mask);
964962
kfree(wq->wqes[node]);
965-
err_wqes:
966-
kfree(wq->wqes);
963+
}
967964
err_wq:
968965
kfree(wq);
969966
return ERR_PTR(ret);
@@ -1033,10 +1030,10 @@ static void io_wq_destroy(struct io_wq *wq)
10331030
.cancel_all = true,
10341031
};
10351032
io_wqe_cancel_pending_work(wqe, &match);
1033+
free_cpumask_var(wqe->cpu_mask);
10361034
kfree(wqe);
10371035
}
10381036
io_wq_put_hash(wq->hash);
1039-
kfree(wq->wqes);
10401037
kfree(wq);
10411038
}
10421039

@@ -1045,25 +1042,67 @@ void io_wq_put_and_exit(struct io_wq *wq)
10451042
WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
10461043

10471044
io_wq_exit_workers(wq);
1048-
if (refcount_dec_and_test(&wq->refs))
1049-
io_wq_destroy(wq);
1045+
io_wq_destroy(wq);
10501046
}
10511047

1048+
struct online_data {
1049+
unsigned int cpu;
1050+
bool online;
1051+
};
1052+
10521053
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
10531054
{
1054-
set_cpus_allowed_ptr(worker->task, cpumask_of_node(worker->wqe->node));
1055+
struct online_data *od = data;
10551056

1057+
if (od->online)
1058+
cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask);
1059+
else
1060+
cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask);
10561061
return false;
10571062
}
10581063

1064+
static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online)
1065+
{
1066+
struct online_data od = {
1067+
.cpu = cpu,
1068+
.online = online
1069+
};
1070+
int i;
1071+
1072+
rcu_read_lock();
1073+
for_each_node(i)
1074+
io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od);
1075+
rcu_read_unlock();
1076+
return 0;
1077+
}
1078+
10591079
static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
10601080
{
10611081
struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
1082+
1083+
return __io_wq_cpu_online(wq, cpu, true);
1084+
}
1085+
1086+
static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
1087+
{
1088+
struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
1089+
1090+
return __io_wq_cpu_online(wq, cpu, false);
1091+
}
1092+
1093+
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
1094+
{
10621095
int i;
10631096

10641097
rcu_read_lock();
1065-
for_each_node(i)
1066-
io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
1098+
for_each_node(i) {
1099+
struct io_wqe *wqe = wq->wqes[i];
1100+
1101+
if (mask)
1102+
cpumask_copy(wqe->cpu_mask, mask);
1103+
else
1104+
cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
1105+
}
10671106
rcu_read_unlock();
10681107
return 0;
10691108
}
@@ -1073,7 +1112,7 @@ static __init int io_wq_init(void)
10731112
int ret;
10741113

10751114
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
1076-
io_wq_cpu_online, NULL);
1115+
io_wq_cpu_online, io_wq_cpu_offline);
10771116
if (ret < 0)
10781117
return ret;
10791118
io_wq_online = ret;

fs/io-wq.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ static inline void wq_list_del(struct io_wq_work_list *list,
8787

8888
struct io_wq_work {
8989
struct io_wq_work_node list;
90-
const struct cred *creds;
9190
unsigned flags;
9291
};
9392

@@ -128,6 +127,8 @@ void io_wq_put_and_exit(struct io_wq *wq);
128127
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
129128
void io_wq_hash_work(struct io_wq_work *work, void *val);
130129

130+
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
131+
131132
static inline bool io_wq_is_hashed(struct io_wq_work *work)
132133
{
133134
return work->flags & IO_WQ_WORK_HASHED;

0 commit comments

Comments
 (0)