Skip to content

Commit 9bdebc2

Browse files
author
Al Viro
committed
Teach shrink_dcache_parent() to cope with mixed-filesystem shrink lists
Currently, running into a shrink list that contains dentries from different filesystems can cause several unpleasant things for shrink_dcache_parent() and for umount(2). The first problem is that there's a window during shrink_dentry_list() between __dentry_kill() takes a victim out and dropping reference to its parent. During that window the parent looks like a genuine busy dentry. shrink_dcache_parent() (or, worse yet, shrink_dcache_for_umount()) coming at that time will see no eviction candidates and no indication that it needs to wait for some shrink_dentry_list() to proceed further. That applies for any shrink list that might intersect with the subtree we are trying to shrink; the only reason it does not blow on umount(2) in the mainline is that we unregister the memory shrinker before hitting shrink_dcache_for_umount(). Another problem happens if something in a mixed-filesystem shrink list gets be stuck in e.g. iput(), getting umount of unrelated fs to spin waiting for the stuck shrinker to get around to our dentries. Solution: 1) have shrink_dentry_list() decrement the parent's refcount and make sure it's on a shrink list (ours unless it already had been on some other) before calling __dentry_kill(). That eliminates the window when shrink_dcache_parent() would've blown past the entire subtree without noticing anything with zero refcount not on shrink lists. 2) when shrink_dcache_parent() has found no eviction candidates, but some dentries are still sitting on shrink lists, rather than repeating the scan in hope that shrinkers have progressed, scan looking for something on shrink lists with zero refcount. If such a thing is found, grab rcu_read_lock() and stop the scan, with caller locking it for eviction, dropping out of RCU and doing __dentry_kill(), with the same treatment for parent as shrink_dentry_list() would do. Note that right now mixed-filesystem shrink lists do not occur, so this is not a mainline bug. Howevere, there's a bunch of uses for such beasts (e.g. the "try and evict everything we can out of given page" patches; there are potential uses in mount-related code, considerably simplifying the life in fs/namespace.c, etc.) Signed-off-by: Al Viro <[email protected]>
1 parent e4e5990 commit 9bdebc2

File tree

2 files changed

+85
-17
lines changed

2 files changed

+85
-17
lines changed

fs/dcache.c

Lines changed: 83 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,32 @@ void dput(struct dentry *dentry)
861861
}
862862
EXPORT_SYMBOL(dput);
863863

864+
static void __dput_to_list(struct dentry *dentry, struct list_head *list)
865+
__must_hold(&dentry->d_lock)
866+
{
867+
if (dentry->d_flags & DCACHE_SHRINK_LIST) {
868+
/* let the owner of the list it's on deal with it */
869+
--dentry->d_lockref.count;
870+
} else {
871+
if (dentry->d_flags & DCACHE_LRU_LIST)
872+
d_lru_del(dentry);
873+
if (!--dentry->d_lockref.count)
874+
d_shrink_add(dentry, list);
875+
}
876+
}
877+
878+
void dput_to_list(struct dentry *dentry, struct list_head *list)
879+
{
880+
rcu_read_lock();
881+
if (likely(fast_dput(dentry))) {
882+
rcu_read_unlock();
883+
return;
884+
}
885+
rcu_read_unlock();
886+
if (!retain_dentry(dentry))
887+
__dput_to_list(dentry, list);
888+
spin_unlock(&dentry->d_lock);
889+
}
864890

865891
/* This must be called with d_lock held */
866892
static inline void __dget_dlock(struct dentry *dentry)
@@ -1067,7 +1093,7 @@ static bool shrink_lock_dentry(struct dentry *dentry)
10671093
return false;
10681094
}
10691095

1070-
static void shrink_dentry_list(struct list_head *list)
1096+
void shrink_dentry_list(struct list_head *list)
10711097
{
10721098
while (!list_empty(list)) {
10731099
struct dentry *dentry, *parent;
@@ -1089,18 +1115,9 @@ static void shrink_dentry_list(struct list_head *list)
10891115
rcu_read_unlock();
10901116
d_shrink_del(dentry);
10911117
parent = dentry->d_parent;
1118+
if (parent != dentry)
1119+
__dput_to_list(parent, list);
10921120
__dentry_kill(dentry);
1093-
if (parent == dentry)
1094-
continue;
1095-
/*
1096-
* We need to prune ancestors too. This is necessary to prevent
1097-
* quadratic behavior of shrink_dcache_parent(), but is also
1098-
* expected to be beneficial in reducing dentry cache
1099-
* fragmentation.
1100-
*/
1101-
dentry = parent;
1102-
while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
1103-
dentry = dentry_kill(dentry);
11041121
}
11051122
}
11061123

@@ -1445,8 +1462,11 @@ int d_set_mounted(struct dentry *dentry)
14451462

14461463
struct select_data {
14471464
struct dentry *start;
1465+
union {
1466+
long found;
1467+
struct dentry *victim;
1468+
};
14481469
struct list_head dispose;
1449-
int found;
14501470
};
14511471

14521472
static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
@@ -1478,6 +1498,37 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
14781498
return ret;
14791499
}
14801500

1501+
static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
1502+
{
1503+
struct select_data *data = _data;
1504+
enum d_walk_ret ret = D_WALK_CONTINUE;
1505+
1506+
if (data->start == dentry)
1507+
goto out;
1508+
1509+
if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1510+
if (!dentry->d_lockref.count) {
1511+
rcu_read_lock();
1512+
data->victim = dentry;
1513+
return D_WALK_QUIT;
1514+
}
1515+
} else {
1516+
if (dentry->d_flags & DCACHE_LRU_LIST)
1517+
d_lru_del(dentry);
1518+
if (!dentry->d_lockref.count)
1519+
d_shrink_add(dentry, &data->dispose);
1520+
}
1521+
/*
1522+
* We can return to the caller if we have found some (this
1523+
* ensures forward progress). We'll be coming back to find
1524+
* the rest.
1525+
*/
1526+
if (!list_empty(&data->dispose))
1527+
ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1528+
out:
1529+
return ret;
1530+
}
1531+
14811532
/**
14821533
* shrink_dcache_parent - prune dcache
14831534
* @parent: parent of entries to prune
@@ -1487,12 +1538,9 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
14871538
void shrink_dcache_parent(struct dentry *parent)
14881539
{
14891540
for (;;) {
1490-
struct select_data data;
1541+
struct select_data data = {.start = parent};
14911542

14921543
INIT_LIST_HEAD(&data.dispose);
1493-
data.start = parent;
1494-
data.found = 0;
1495-
14961544
d_walk(parent, &data, select_collect);
14971545

14981546
if (!list_empty(&data.dispose)) {
@@ -1503,6 +1551,24 @@ void shrink_dcache_parent(struct dentry *parent)
15031551
cond_resched();
15041552
if (!data.found)
15051553
break;
1554+
data.victim = NULL;
1555+
d_walk(parent, &data, select_collect2);
1556+
if (data.victim) {
1557+
struct dentry *parent;
1558+
spin_lock(&data.victim->d_lock);
1559+
if (!shrink_lock_dentry(data.victim)) {
1560+
spin_unlock(&data.victim->d_lock);
1561+
rcu_read_unlock();
1562+
} else {
1563+
rcu_read_unlock();
1564+
parent = data.victim->d_parent;
1565+
if (parent != data.victim)
1566+
__dput_to_list(parent, &data.dispose);
1567+
__dentry_kill(data.victim);
1568+
}
1569+
}
1570+
if (!list_empty(&data.dispose))
1571+
shrink_dentry_list(&data.dispose);
15061572
}
15071573
}
15081574
EXPORT_SYMBOL(shrink_dcache_parent);

fs/internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ extern int d_set_mounted(struct dentry *dentry);
156156
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
157157
extern struct dentry *d_alloc_cursor(struct dentry *);
158158
extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
159+
extern void dput_to_list(struct dentry *, struct list_head *);
160+
extern void shrink_dentry_list(struct list_head *);
159161

160162
/*
161163
* read_write.c

0 commit comments

Comments
 (0)