Skip to content

Commit d6cffbb

Browse files
koct9iebiederm
authored andcommitted
proc/sysctl: prune stale dentries during unregistering
Currently unregistering sysctl table does not prune its dentries. Stale dentries could slowdown sysctl operations significantly. For example, command: # for i in {1..100000} ; do unshare -n -- sysctl -a &> /dev/null ; done creates a millions of stale denties around sysctls of loopback interface: # sysctl fs.dentry-state fs.dentry-state = 25812579 24724135 45 0 0 0 All of them have matching names thus lookup have to scan though whole hash chain and call d_compare (proc_sys_compare) which checks them under system-wide spinlock (sysctl_lock). # time sysctl -a > /dev/null real 1m12.806s user 0m0.016s sys 1m12.400s Currently only memory reclaimer could remove this garbage. But without significant memory pressure this never happens. This patch collects sysctl inodes into list on sysctl table header and prunes all their dentries once that table unregisters. Konstantin Khlebnikov <[email protected]> writes: > On 10.02.2017 10:47, Al Viro wrote: >> how about >> the matching stats *after* that patch? > > dcache size doesn't grow endlessly, so stats are fine > > # sysctl fs.dentry-state > fs.dentry-state = 92712 58376 45 0 0 0 > > # time sysctl -a &>/dev/null > > real 0m0.013s > user 0m0.004s > sys 0m0.008s Signed-off-by: Konstantin Khlebnikov <[email protected]> Suggested-by: Al Viro <[email protected]> Signed-off-by: Eric W. Biederman <[email protected]>
1 parent 1064f87 commit d6cffbb

File tree

4 files changed

+51
-19
lines changed

4 files changed

+51
-19
lines changed

fs/proc/inode.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,11 @@ static void proc_evict_inode(struct inode *inode)
4343
de = PDE(inode);
4444
if (de)
4545
pde_put(de);
46+
4647
head = PROC_I(inode)->sysctl;
4748
if (head) {
4849
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
49-
sysctl_head_put(head);
50+
proc_sys_evict_inode(inode, head);
5051
}
5152
}
5253

fs/proc/internal.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ struct proc_inode {
6565
struct proc_dir_entry *pde;
6666
struct ctl_table_header *sysctl;
6767
struct ctl_table *sysctl_entry;
68+
struct list_head sysctl_inodes;
6869
const struct proc_ns_operations *ns_ops;
6970
struct inode vfs_inode;
7071
};
@@ -237,10 +238,12 @@ extern void proc_thread_self_init(void);
237238
*/
238239
#ifdef CONFIG_PROC_SYSCTL
239240
extern int proc_sys_init(void);
240-
extern void sysctl_head_put(struct ctl_table_header *);
241+
extern void proc_sys_evict_inode(struct inode *inode,
242+
struct ctl_table_header *head);
241243
#else
242244
static inline void proc_sys_init(void) { }
243-
static inline void sysctl_head_put(struct ctl_table_header *head) { }
245+
static inline void proc_sys_evict_inode(struct inode *inode,
246+
struct ctl_table_header *head) { }
244247
#endif
245248

246249
/*

fs/proc/proc_sysctl.c

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ static void init_header(struct ctl_table_header *head,
190190
head->set = set;
191191
head->parent = NULL;
192192
head->node = node;
193+
INIT_LIST_HEAD(&head->inodes);
193194
if (node) {
194195
struct ctl_table *entry;
195196
for (entry = table; entry->procname; entry++, node++)
@@ -259,6 +260,29 @@ static void unuse_table(struct ctl_table_header *p)
259260
complete(p->unregistering);
260261
}
261262

263+
/* called under sysctl_lock */
264+
static void proc_sys_prune_dcache(struct ctl_table_header *head)
265+
{
266+
struct inode *inode, *prev = NULL;
267+
struct proc_inode *ei;
268+
269+
list_for_each_entry(ei, &head->inodes, sysctl_inodes) {
270+
inode = igrab(&ei->vfs_inode);
271+
if (inode) {
272+
spin_unlock(&sysctl_lock);
273+
iput(prev);
274+
prev = inode;
275+
d_prune_aliases(inode);
276+
spin_lock(&sysctl_lock);
277+
}
278+
}
279+
if (prev) {
280+
spin_unlock(&sysctl_lock);
281+
iput(prev);
282+
spin_lock(&sysctl_lock);
283+
}
284+
}
285+
262286
/* called under sysctl_lock, will reacquire if has to wait */
263287
static void start_unregistering(struct ctl_table_header *p)
264288
{
@@ -277,28 +301,18 @@ static void start_unregistering(struct ctl_table_header *p)
277301
/* anything non-NULL; we'll never dereference it */
278302
p->unregistering = ERR_PTR(-EINVAL);
279303
}
304+
/*
305+
* Prune dentries for unregistered sysctls: namespaced sysctls
306+
* can have duplicate names and contaminate dcache very badly.
307+
*/
308+
proc_sys_prune_dcache(p);
280309
/*
281310
* do not remove from the list until nobody holds it; walking the
282311
* list in do_sysctl() relies on that.
283312
*/
284313
erase_header(p);
285314
}
286315

287-
static void sysctl_head_get(struct ctl_table_header *head)
288-
{
289-
spin_lock(&sysctl_lock);
290-
head->count++;
291-
spin_unlock(&sysctl_lock);
292-
}
293-
294-
void sysctl_head_put(struct ctl_table_header *head)
295-
{
296-
spin_lock(&sysctl_lock);
297-
if (!--head->count)
298-
kfree_rcu(head, rcu);
299-
spin_unlock(&sysctl_lock);
300-
}
301-
302316
static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
303317
{
304318
BUG_ON(!head);
@@ -440,11 +454,15 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
440454

441455
inode->i_ino = get_next_ino();
442456

443-
sysctl_head_get(head);
444457
ei = PROC_I(inode);
445458
ei->sysctl = head;
446459
ei->sysctl_entry = table;
447460

461+
spin_lock(&sysctl_lock);
462+
list_add(&ei->sysctl_inodes, &head->inodes);
463+
head->count++;
464+
spin_unlock(&sysctl_lock);
465+
448466
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
449467
inode->i_mode = table->mode;
450468
if (!S_ISDIR(table->mode)) {
@@ -466,6 +484,15 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
466484
return inode;
467485
}
468486

487+
void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
488+
{
489+
spin_lock(&sysctl_lock);
490+
list_del(&PROC_I(inode)->sysctl_inodes);
491+
if (!--head->count)
492+
kfree_rcu(head, rcu);
493+
spin_unlock(&sysctl_lock);
494+
}
495+
469496
static struct ctl_table_header *grab_header(struct inode *inode)
470497
{
471498
struct ctl_table_header *head = PROC_I(inode)->sysctl;

include/linux/sysctl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ struct ctl_table_header
143143
struct ctl_table_set *set;
144144
struct ctl_dir *parent;
145145
struct ctl_node *node;
146+
struct list_head inodes; /* head for proc_inode->sysctl_inodes */
146147
};
147148

148149
struct ctl_dir {

0 commit comments

Comments
 (0)