Skip to content

Commit 8a81252

Browse files
edumazetAl Viro
authored andcommitted
fs/file.c: don't acquire files->file_lock in fd_install()
Mateusz Guzik reported : Currently obtaining a new file descriptor results in locking fdtable twice - once in order to reserve a slot and second time to fill it. Holding the spinlock in __fd_install() is needed in case a resize is done, or to prevent a resize. Mateusz provided an RFC patch and a micro benchmark : http://people.redhat.com/~mguzik/pipebench.c A resize is an unlikely operation in a process lifetime, as table size is at least doubled at every resize. We can use RCU instead of the spinlock. __fd_install() must wait if a resize is in progress. The resize must block new __fd_install() callers from starting, and wait that ongoing install are finished (synchronize_sched()) resize should be attempted by a single thread to not waste resources. rcu_sched variant is used, as __fd_install() and expand_fdtable() run from process context. It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R) CPU E5-2696 v2 @ 2.50GHz Signed-off-by: Eric Dumazet <[email protected]> Reported-by: Mateusz Guzik <[email protected]> Acked-by: Mateusz Guzik <[email protected]> Tested-by: Mateusz Guzik <[email protected]> Signed-off-by: Al Viro <[email protected]>
1 parent 1af95de commit 8a81252

File tree

3 files changed

+55
-19
lines changed

3 files changed

+55
-19
lines changed

Documentation/filesystems/porting

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,3 +500,7 @@ in your dentry operations instead.
500500
dentry, it does not get nameidata at all and it gets called only when cookie
501501
is non-NULL. Note that link body isn't available anymore, so if you need it,
502502
store it as cookie.
503+
--
504+
[mandatory]
505+
__fd_install() & fd_install() can now sleep. Callers should not
506+
hold a spinlock or other resources that do not allow a schedule.

fs/file.c

Lines changed: 48 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr)
147147

148148
spin_unlock(&files->file_lock);
149149
new_fdt = alloc_fdtable(nr);
150+
151+
/* make sure all __fd_install() have seen resize_in_progress
152+
* or have finished their rcu_read_lock_sched() section.
153+
*/
154+
if (atomic_read(&files->count) > 1)
155+
synchronize_sched();
156+
150157
spin_lock(&files->file_lock);
151158
if (!new_fdt)
152159
return -ENOMEM;
@@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr)
158165
__free_fdtable(new_fdt);
159166
return -EMFILE;
160167
}
161-
/*
162-
* Check again since another task may have expanded the fd table while
163-
* we dropped the lock
164-
*/
165168
cur_fdt = files_fdtable(files);
166-
if (nr >= cur_fdt->max_fds) {
167-
/* Continue as planned */
168-
copy_fdtable(new_fdt, cur_fdt);
169-
rcu_assign_pointer(files->fdt, new_fdt);
170-
if (cur_fdt != &files->fdtab)
171-
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
172-
} else {
173-
/* Somebody else expanded, so undo our attempt */
174-
__free_fdtable(new_fdt);
175-
}
169+
BUG_ON(nr < cur_fdt->max_fds);
170+
copy_fdtable(new_fdt, cur_fdt);
171+
rcu_assign_pointer(files->fdt, new_fdt);
172+
if (cur_fdt != &files->fdtab)
173+
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
174+
/* coupled with smp_rmb() in __fd_install() */
175+
smp_wmb();
176176
return 1;
177177
}
178178

@@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr)
185185
* The files->file_lock should be held on entry, and will be held on exit.
186186
*/
187187
static int expand_files(struct files_struct *files, int nr)
188+
__releases(files->file_lock)
189+
__acquires(files->file_lock)
188190
{
189191
struct fdtable *fdt;
192+
int expanded = 0;
190193

194+
repeat:
191195
fdt = files_fdtable(files);
192196

193197
/* Do we need to expand? */
194198
if (nr < fdt->max_fds)
195-
return 0;
199+
return expanded;
196200

197201
/* Can we expand? */
198202
if (nr >= sysctl_nr_open)
199203
return -EMFILE;
200204

205+
if (unlikely(files->resize_in_progress)) {
206+
spin_unlock(&files->file_lock);
207+
expanded = 1;
208+
wait_event(files->resize_wait, !files->resize_in_progress);
209+
spin_lock(&files->file_lock);
210+
goto repeat;
211+
}
212+
201213
/* All good, so we try */
202-
return expand_fdtable(files, nr);
214+
files->resize_in_progress = true;
215+
expanded = expand_fdtable(files, nr);
216+
files->resize_in_progress = false;
217+
218+
wake_up_all(&files->resize_wait);
219+
return expanded;
203220
}
204221

205222
static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
@@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
256273
atomic_set(&newf->count, 1);
257274

258275
spin_lock_init(&newf->file_lock);
276+
newf->resize_in_progress = false;
277+
init_waitqueue_head(&newf->resize_wait);
259278
newf->next_fd = 0;
260279
new_fdt = &newf->fdtab;
261280
new_fdt->max_fds = NR_OPEN_DEFAULT;
@@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd,
553572
struct file *file)
554573
{
555574
struct fdtable *fdt;
556-
spin_lock(&files->file_lock);
557-
fdt = files_fdtable(files);
575+
576+
might_sleep();
577+
rcu_read_lock_sched();
578+
579+
while (unlikely(files->resize_in_progress)) {
580+
rcu_read_unlock_sched();
581+
wait_event(files->resize_wait, !files->resize_in_progress);
582+
rcu_read_lock_sched();
583+
}
584+
/* coupled with smp_wmb() in expand_fdtable() */
585+
smp_rmb();
586+
fdt = rcu_dereference_sched(files->fdt);
558587
BUG_ON(fdt->fd[fd] != NULL);
559588
rcu_assign_pointer(fdt->fd[fd], file);
560-
spin_unlock(&files->file_lock);
589+
rcu_read_unlock_sched();
561590
}
562591

563592
void fd_install(unsigned int fd, struct file *file)

include/linux/fdtable.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ struct files_struct {
4747
* read mostly part
4848
*/
4949
atomic_t count;
50+
bool resize_in_progress;
51+
wait_queue_head_t resize_wait;
52+
5053
struct fdtable __rcu *fdt;
5154
struct fdtable fdtab;
5255
/*

0 commit comments

Comments
 (0)