Skip to content

Commit 896f8d2

Browse files
committed
Merge tag 'for-5.6/io_uring-vfs-2020-01-29' of git://git.kernel.dk/linux-block
Pull io_uring updates from Jens Axboe: - Support for various new opcodes (fallocate, openat, close, statx, fadvise, madvise, openat2, non-vectored read/write, send/recv, and epoll_ctl) - Faster ring quiesce for fileset updates - Optimizations for overflow condition checking - Support for max-sized clamping - Support for probing what opcodes are supported - Support for io-wq backend sharing between "sibling" rings - Support for registering personalities - Lots of little fixes and improvements * tag 'for-5.6/io_uring-vfs-2020-01-29' of git://git.kernel.dk/linux-block: (64 commits) io_uring: add support for epoll_ctl(2) eventpoll: support non-blocking do_epoll_ctl() calls eventpoll: abstract out epoll_ctl() handler io_uring: fix linked command file table usage io_uring: support using a registered personality for commands io_uring: allow registering credentials io_uring: add io-wq workqueue sharing io-wq: allow grabbing existing io-wq io_uring/io-wq: don't use static creds/mm assignments io-wq: make the io_wq ref counted io_uring: fix refcounting with batched allocations at OOM io_uring: add comment for drain_next io_uring: don't attempt to copy iovec for READ/WRITE io_uring: honor IOSQE_ASYNC for linked reqs io_uring: prep req when do IOSQE_ASYNC io_uring: use labeled array init in io_op_defs io_uring: optimise sqe-to-req flags translation io_uring: remove REQ_F_IO_DRAINED io_uring: file switch work needs to get flushed on exit io_uring: hide uring_fd in ctx ...
2 parents 33c84e8 + 3e4827b commit 896f8d2

File tree

15 files changed

+2218
-583
lines changed

15 files changed

+2218
-583
lines changed

drivers/android/binder.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2249,10 +2249,12 @@ static void binder_deferred_fd_close(int fd)
22492249
return;
22502250
init_task_work(&twcb->twork, binder_do_fd_close);
22512251
__close_fd_get_file(fd, &twcb->file);
2252-
if (twcb->file)
2252+
if (twcb->file) {
2253+
filp_close(twcb->file, current->files);
22532254
task_work_add(current, &twcb->twork, true);
2254-
else
2255+
} else {
22552256
kfree(twcb);
2257+
}
22562258
}
22572259

22582260
static void binder_transaction_buffer_release(struct binder_proc *proc,

fs/eventpoll.c

Lines changed: 56 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -354,12 +354,6 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
354354
return container_of(p, struct ep_pqueue, pt)->epi;
355355
}
356356

357-
/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
358-
static inline int ep_op_has_event(int op)
359-
{
360-
return op != EPOLL_CTL_DEL;
361-
}
362-
363357
/* Initialize the poll safe wake up structure */
364358
static void ep_nested_calls_init(struct nested_calls *ncalls)
365359
{
@@ -2074,27 +2068,28 @@ SYSCALL_DEFINE1(epoll_create, int, size)
20742068
return do_epoll_create(0);
20752069
}
20762070

2077-
/*
2078-
* The following function implements the controller interface for
2079-
* the eventpoll file that enables the insertion/removal/change of
2080-
* file descriptors inside the interest set.
2081-
*/
2082-
SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
2083-
struct epoll_event __user *, event)
2071+
static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
2072+
bool nonblock)
2073+
{
2074+
if (!nonblock) {
2075+
mutex_lock_nested(mutex, depth);
2076+
return 0;
2077+
}
2078+
if (mutex_trylock(mutex))
2079+
return 0;
2080+
return -EAGAIN;
2081+
}
2082+
2083+
int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
2084+
bool nonblock)
20842085
{
20852086
int error;
20862087
int full_check = 0;
20872088
struct fd f, tf;
20882089
struct eventpoll *ep;
20892090
struct epitem *epi;
2090-
struct epoll_event epds;
20912091
struct eventpoll *tep = NULL;
20922092

2093-
error = -EFAULT;
2094-
if (ep_op_has_event(op) &&
2095-
copy_from_user(&epds, event, sizeof(struct epoll_event)))
2096-
goto error_return;
2097-
20982093
error = -EBADF;
20992094
f = fdget(epfd);
21002095
if (!f.file)
@@ -2112,7 +2107,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
21122107

21132108
/* Check if EPOLLWAKEUP is allowed */
21142109
if (ep_op_has_event(op))
2115-
ep_take_care_of_epollwakeup(&epds);
2110+
ep_take_care_of_epollwakeup(epds);
21162111

21172112
/*
21182113
* We have to check that the file structure underneath the file descriptor
@@ -2128,11 +2123,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
21282123
* so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
21292124
* Also, we do not currently supported nested exclusive wakeups.
21302125
*/
2131-
if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) {
2126+
if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) {
21322127
if (op == EPOLL_CTL_MOD)
21332128
goto error_tgt_fput;
21342129
if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
2135-
(epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
2130+
(epds->events & ~EPOLLEXCLUSIVE_OK_BITS)))
21362131
goto error_tgt_fput;
21372132
}
21382133

@@ -2157,13 +2152,17 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
21572152
* deep wakeup paths from forming in parallel through multiple
21582153
* EPOLL_CTL_ADD operations.
21592154
*/
2160-
mutex_lock_nested(&ep->mtx, 0);
2155+
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
2156+
if (error)
2157+
goto error_tgt_fput;
21612158
if (op == EPOLL_CTL_ADD) {
21622159
if (!list_empty(&f.file->f_ep_links) ||
21632160
is_file_epoll(tf.file)) {
2164-
full_check = 1;
21652161
mutex_unlock(&ep->mtx);
2166-
mutex_lock(&epmutex);
2162+
error = epoll_mutex_lock(&epmutex, 0, nonblock);
2163+
if (error)
2164+
goto error_tgt_fput;
2165+
full_check = 1;
21672166
if (is_file_epoll(tf.file)) {
21682167
error = -ELOOP;
21692168
if (ep_loop_check(ep, tf.file) != 0) {
@@ -2173,10 +2172,19 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
21732172
} else
21742173
list_add(&tf.file->f_tfile_llink,
21752174
&tfile_check_list);
2176-
mutex_lock_nested(&ep->mtx, 0);
2175+
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
2176+
if (error) {
2177+
out_del:
2178+
list_del(&tf.file->f_tfile_llink);
2179+
goto error_tgt_fput;
2180+
}
21772181
if (is_file_epoll(tf.file)) {
21782182
tep = tf.file->private_data;
2179-
mutex_lock_nested(&tep->mtx, 1);
2183+
error = epoll_mutex_lock(&tep->mtx, 1, nonblock);
2184+
if (error) {
2185+
mutex_unlock(&ep->mtx);
2186+
goto out_del;
2187+
}
21802188
}
21812189
}
21822190
}
@@ -2192,8 +2200,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
21922200
switch (op) {
21932201
case EPOLL_CTL_ADD:
21942202
if (!epi) {
2195-
epds.events |= EPOLLERR | EPOLLHUP;
2196-
error = ep_insert(ep, &epds, tf.file, fd, full_check);
2203+
epds->events |= EPOLLERR | EPOLLHUP;
2204+
error = ep_insert(ep, epds, tf.file, fd, full_check);
21972205
} else
21982206
error = -EEXIST;
21992207
if (full_check)
@@ -2208,8 +2216,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
22082216
case EPOLL_CTL_MOD:
22092217
if (epi) {
22102218
if (!(epi->event.events & EPOLLEXCLUSIVE)) {
2211-
epds.events |= EPOLLERR | EPOLLHUP;
2212-
error = ep_modify(ep, epi, &epds);
2219+
epds->events |= EPOLLERR | EPOLLHUP;
2220+
error = ep_modify(ep, epi, epds);
22132221
}
22142222
} else
22152223
error = -ENOENT;
@@ -2231,6 +2239,23 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
22312239
return error;
22322240
}
22332241

2242+
/*
2243+
* The following function implements the controller interface for
2244+
* the eventpoll file that enables the insertion/removal/change of
2245+
* file descriptors inside the interest set.
2246+
*/
2247+
SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
2248+
struct epoll_event __user *, event)
2249+
{
2250+
struct epoll_event epds;
2251+
2252+
if (ep_op_has_event(op) &&
2253+
copy_from_user(&epds, event, sizeof(struct epoll_event)))
2254+
return -EFAULT;
2255+
2256+
return do_epoll_ctl(epfd, op, fd, &epds, false);
2257+
}
2258+
22342259
/*
22352260
* Implement the event wait interface for the eventpoll file. It is the kernel
22362261
* part of the user space epoll_wait(2).

fs/file.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,9 @@ int __close_fd(struct files_struct *files, unsigned fd)
642642
EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
643643

644644
/*
645-
* variant of __close_fd that gets a ref on the file for later fput
645+
* variant of __close_fd that gets a ref on the file for later fput.
646+
* The caller must ensure that filp_close() called on the file, and then
647+
* an fput().
646648
*/
647649
int __close_fd_get_file(unsigned int fd, struct file **res)
648650
{
@@ -662,7 +664,7 @@ int __close_fd_get_file(unsigned int fd, struct file **res)
662664
spin_unlock(&files->file_lock);
663665
get_file(file);
664666
*res = file;
665-
return filp_close(file, files);
667+
return 0;
666668

667669
out_unlock:
668670
spin_unlock(&files->file_lock);

fs/internal.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ extern struct file *do_filp_open(int dfd, struct filename *pathname,
124124
const struct open_flags *op);
125125
extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
126126
const char *, const struct open_flags *);
127+
extern struct open_how build_open_how(int flags, umode_t mode);
128+
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
127129

128130
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
129131
long do_faccessat(int dfd, const char __user *filename, int mode);
@@ -182,3 +184,9 @@ extern const struct dentry_operations ns_dentry_operations;
182184

183185
/* direct-io.c: */
184186
int sb_init_dio_done_wq(struct super_block *sb);
187+
188+
/*
189+
* fs/stat.c:
190+
*/
191+
unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, int flags);
192+
int cp_statx(const struct kstat *stat, struct statx __user *buffer);

0 commit comments

Comments
 (0)