Skip to content

Commit e952837

Browse files
Florian Westphalklassert
authored andcommitted
xfrm: state: fix out-of-bounds read during lookup
lookup and resize can run in parallel. The xfrm_state_hash_generation seqlock ensures a retry, but the hash functions can observe a hmask value that is too large for the new hlist array. rehash does: rcu_assign_pointer(net->xfrm.state_bydst, ndst) [..] net->xfrm.state_hmask = nhashmask; While state lookup does: h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { This is only safe in case the update to state_bydst is larger than net->xfrm.xfrm_state_hmask (or if the lookup function gets serialized via state spinlock again). Fix this by prefetching state_hmask and the associated pointers. The xfrm_state_hash_generation seqlock retry will ensure that the pointer and the hmask will be consistent. The existing helpers, like xfrm_dst_hash(), are now unsafe for RCU side, add lockdep assertions to document that they are only safe for insert side. xfrm_state_lookup_byaddr() uses the spinlock rather than RCU. AFAICS this is an oversight from back when state lookup was converted to RCU, this lock should be replaced with RCU in a future patch. Reported-by: [email protected] Closes: https://lore.kernel.org/netdev/CACT4Y+azwfrE3uz6A5ZErov5YN2LYBN5KrsymBerT36VU8qzBA@mail.gmail.com/ Diagnosed-by: Dmitry Vyukov <[email protected]> Fixes: c2f672f ("xfrm: state lookup can be lockless") Signed-off-by: Florian Westphal <[email protected]> Signed-off-by: Steffen Klassert <[email protected]>
1 parent c05c5e5 commit e952837

File tree

1 file changed

+70
-19
lines changed

1 file changed

+70
-19
lines changed

net/xfrm/xfrm_state.c

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
#define xfrm_state_deref_prot(table, net) \
3636
rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
37+
#define xfrm_state_deref_check(table, net) \
38+
rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
3739

3840
static void xfrm_state_gc_task(struct work_struct *work);
3941

@@ -62,6 +64,8 @@ static inline unsigned int xfrm_dst_hash(struct net *net,
6264
u32 reqid,
6365
unsigned short family)
6466
{
67+
lockdep_assert_held(&net->xfrm.xfrm_state_lock);
68+
6569
return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
6670
}
6771

@@ -70,18 +74,24 @@ static inline unsigned int xfrm_src_hash(struct net *net,
7074
const xfrm_address_t *saddr,
7175
unsigned short family)
7276
{
77+
lockdep_assert_held(&net->xfrm.xfrm_state_lock);
78+
7379
return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
7480
}
7581

7682
static inline unsigned int
7783
xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
7884
__be32 spi, u8 proto, unsigned short family)
7985
{
86+
lockdep_assert_held(&net->xfrm.xfrm_state_lock);
87+
8088
return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
8189
}
8290

8391
static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
8492
{
93+
lockdep_assert_held(&net->xfrm.xfrm_state_lock);
94+
8595
return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
8696
}
8797

@@ -1041,16 +1051,38 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
10411051
x->props.family = tmpl->encap_family;
10421052
}
10431053

1044-
static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
1054+
struct xfrm_hash_state_ptrs {
1055+
const struct hlist_head *bydst;
1056+
const struct hlist_head *bysrc;
1057+
const struct hlist_head *byspi;
1058+
unsigned int hmask;
1059+
};
1060+
1061+
static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs)
1062+
{
1063+
unsigned int sequence;
1064+
1065+
do {
1066+
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
1067+
1068+
ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net);
1069+
ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net);
1070+
ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net);
1071+
ptrs->hmask = net->xfrm.state_hmask;
1072+
} while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence));
1073+
}
1074+
1075+
static struct xfrm_state *__xfrm_state_lookup_all(const struct xfrm_hash_state_ptrs *state_ptrs,
1076+
u32 mark,
10451077
const xfrm_address_t *daddr,
10461078
__be32 spi, u8 proto,
10471079
unsigned short family,
10481080
struct xfrm_dev_offload *xdo)
10491081
{
1050-
unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
1082+
unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask);
10511083
struct xfrm_state *x;
10521084

1053-
hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
1085+
hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) {
10541086
#ifdef CONFIG_XFRM_OFFLOAD
10551087
if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
10561088
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1084,15 +1116,16 @@ static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
10841116
return NULL;
10851117
}
10861118

1087-
static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
1119+
static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs,
1120+
u32 mark,
10881121
const xfrm_address_t *daddr,
10891122
__be32 spi, u8 proto,
10901123
unsigned short family)
10911124
{
1092-
unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
1125+
unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask);
10931126
struct xfrm_state *x;
10941127

1095-
hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
1128+
hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) {
10961129
if (x->props.family != family ||
10971130
x->id.spi != spi ||
10981131
x->id.proto != proto ||
@@ -1114,6 +1147,7 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
11141147
__be32 spi, u8 proto,
11151148
unsigned short family)
11161149
{
1150+
struct xfrm_hash_state_ptrs state_ptrs;
11171151
struct hlist_head *state_cache_input;
11181152
struct xfrm_state *x = NULL;
11191153
int cpu = get_cpu();
@@ -1135,7 +1169,9 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
11351169
goto out;
11361170
}
11371171

1138-
x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1172+
xfrm_hash_ptrs_get(net, &state_ptrs);
1173+
1174+
x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family);
11391175

11401176
if (x && x->km.state == XFRM_STATE_VALID) {
11411177
spin_lock_bh(&net->xfrm.xfrm_state_lock);
@@ -1155,15 +1191,16 @@ struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
11551191
}
11561192
EXPORT_SYMBOL(xfrm_input_state_lookup);
11571193

1158-
static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1194+
static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs,
1195+
u32 mark,
11591196
const xfrm_address_t *daddr,
11601197
const xfrm_address_t *saddr,
11611198
u8 proto, unsigned short family)
11621199
{
1163-
unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
1200+
unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask);
11641201
struct xfrm_state *x;
11651202

1166-
hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
1203+
hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) {
11671204
if (x->props.family != family ||
11681205
x->id.proto != proto ||
11691206
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -1183,14 +1220,17 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
11831220
static inline struct xfrm_state *
11841221
__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
11851222
{
1223+
struct xfrm_hash_state_ptrs state_ptrs;
11861224
struct net *net = xs_net(x);
11871225
u32 mark = x->mark.v & x->mark.m;
11881226

1227+
xfrm_hash_ptrs_get(net, &state_ptrs);
1228+
11891229
if (use_spi)
1190-
return __xfrm_state_lookup(net, mark, &x->id.daddr,
1230+
return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr,
11911231
x->id.spi, x->id.proto, family);
11921232
else
1193-
return __xfrm_state_lookup_byaddr(net, mark,
1233+
return __xfrm_state_lookup_byaddr(&state_ptrs, mark,
11941234
&x->id.daddr,
11951235
&x->props.saddr,
11961236
x->id.proto, family);
@@ -1264,6 +1304,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
12641304
unsigned short family, u32 if_id)
12651305
{
12661306
static xfrm_address_t saddr_wildcard = { };
1307+
struct xfrm_hash_state_ptrs state_ptrs;
12671308
struct net *net = xp_net(pol);
12681309
unsigned int h, h_wildcard;
12691310
struct xfrm_state *x, *x0, *to_put;
@@ -1328,8 +1369,10 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
13281369
else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
13291370
WARN_ON(1);
13301371

1331-
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
1332-
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
1372+
xfrm_hash_ptrs_get(net, &state_ptrs);
1373+
1374+
h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask);
1375+
hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) {
13331376
#ifdef CONFIG_XFRM_OFFLOAD
13341377
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
13351378
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1362,8 +1405,9 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
13621405
if (best || acquire_in_progress)
13631406
goto found;
13641407

1365-
h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
1366-
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
1408+
h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid,
1409+
encap_family, state_ptrs.hmask);
1410+
hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) {
13671411
#ifdef CONFIG_XFRM_OFFLOAD
13681412
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
13691413
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1401,7 +1445,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
14011445

14021446
if (!x && !error && !acquire_in_progress) {
14031447
if (tmpl->id.spi &&
1404-
(x0 = __xfrm_state_lookup_all(net, mark, daddr,
1448+
(x0 = __xfrm_state_lookup_all(&state_ptrs, mark, daddr,
14051449
tmpl->id.spi, tmpl->id.proto,
14061450
encap_family,
14071451
&pol->xdo)) != NULL) {
@@ -2180,10 +2224,13 @@ struct xfrm_state *
21802224
xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
21812225
u8 proto, unsigned short family)
21822226
{
2227+
struct xfrm_hash_state_ptrs state_ptrs;
21832228
struct xfrm_state *x;
21842229

21852230
rcu_read_lock();
2186-
x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
2231+
xfrm_hash_ptrs_get(net, &state_ptrs);
2232+
2233+
x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family);
21872234
rcu_read_unlock();
21882235
return x;
21892236
}
@@ -2194,10 +2241,14 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
21942241
const xfrm_address_t *daddr, const xfrm_address_t *saddr,
21952242
u8 proto, unsigned short family)
21962243
{
2244+
struct xfrm_hash_state_ptrs state_ptrs;
21972245
struct xfrm_state *x;
21982246

21992247
spin_lock_bh(&net->xfrm.xfrm_state_lock);
2200-
x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
2248+
2249+
xfrm_hash_ptrs_get(net, &state_ptrs);
2250+
2251+
x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family);
22012252
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
22022253
return x;
22032254
}

0 commit comments

Comments
 (0)