Skip to content

Commit 3fa6f61

Browse files
dsaherndavem330
authored andcommitted
net: ipv4: add second dif to inet socket lookups
Add a second device index, sdif, to inet socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the ingress index is obtained from IPCB using inet_sdif and after the cb move in tcp_v4_rcv the tcp_v4_sdif helper is used. Signed-off-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent fb74c27 commit 3fa6f61

File tree

7 files changed

+58
-35
lines changed

7 files changed

+58
-35
lines changed

include/net/inet_hashtables.h

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -221,16 +221,16 @@ struct sock *__inet_lookup_listener(struct net *net,
221221
const __be32 saddr, const __be16 sport,
222222
const __be32 daddr,
223223
const unsigned short hnum,
224-
const int dif);
224+
const int dif, const int sdif);
225225

226226
static inline struct sock *inet_lookup_listener(struct net *net,
227227
struct inet_hashinfo *hashinfo,
228228
struct sk_buff *skb, int doff,
229229
__be32 saddr, __be16 sport,
230-
__be32 daddr, __be16 dport, int dif)
230+
__be32 daddr, __be16 dport, int dif, int sdif)
231231
{
232232
return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
233-
daddr, ntohs(dport), dif);
233+
daddr, ntohs(dport), dif, sdif);
234234
}
235235

236236
/* Socket demux engine toys. */
@@ -262,22 +262,24 @@ static inline struct sock *inet_lookup_listener(struct net *net,
262262
(((__force __u64)(__be32)(__daddr)) << 32) | \
263263
((__force __u64)(__be32)(__saddr)))
264264
#endif /* __BIG_ENDIAN */
265-
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
265+
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
266266
(((__sk)->sk_portpair == (__ports)) && \
267267
((__sk)->sk_addrpair == (__cookie)) && \
268268
(!(__sk)->sk_bound_dev_if || \
269-
((__sk)->sk_bound_dev_if == (__dif))) && \
269+
((__sk)->sk_bound_dev_if == (__dif)) || \
270+
((__sk)->sk_bound_dev_if == (__sdif))) && \
270271
net_eq(sock_net(__sk), (__net)))
271272
#else /* 32-bit arch */
272273
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
273274
const int __name __deprecated __attribute__((unused))
274275

275-
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
276+
#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
276277
(((__sk)->sk_portpair == (__ports)) && \
277278
((__sk)->sk_daddr == (__saddr)) && \
278279
((__sk)->sk_rcv_saddr == (__daddr)) && \
279280
(!(__sk)->sk_bound_dev_if || \
280-
((__sk)->sk_bound_dev_if == (__dif))) && \
281+
((__sk)->sk_bound_dev_if == (__dif)) || \
282+
((__sk)->sk_bound_dev_if == (__sdif))) && \
281283
net_eq(sock_net(__sk), (__net)))
282284
#endif /* 64-bit arch */
283285

@@ -288,7 +290,7 @@ struct sock *__inet_lookup_established(struct net *net,
288290
struct inet_hashinfo *hashinfo,
289291
const __be32 saddr, const __be16 sport,
290292
const __be32 daddr, const u16 hnum,
291-
const int dif);
293+
const int dif, const int sdif);
292294

293295
static inline struct sock *
294296
inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
@@ -297,28 +299,28 @@ static inline struct sock *
297299
const int dif)
298300
{
299301
return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
300-
ntohs(dport), dif);
302+
ntohs(dport), dif, 0);
301303
}
302304

303305
static inline struct sock *__inet_lookup(struct net *net,
304306
struct inet_hashinfo *hashinfo,
305307
struct sk_buff *skb, int doff,
306308
const __be32 saddr, const __be16 sport,
307309
const __be32 daddr, const __be16 dport,
308-
const int dif,
310+
const int dif, const int sdif,
309311
bool *refcounted)
310312
{
311313
u16 hnum = ntohs(dport);
312314
struct sock *sk;
313315

314316
sk = __inet_lookup_established(net, hashinfo, saddr, sport,
315-
daddr, hnum, dif);
317+
daddr, hnum, dif, sdif);
316318
*refcounted = true;
317319
if (sk)
318320
return sk;
319321
*refcounted = false;
320322
return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
321-
sport, daddr, hnum, dif);
323+
sport, daddr, hnum, dif, sdif);
322324
}
323325

324326
static inline struct sock *inet_lookup(struct net *net,
@@ -332,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net,
332334
bool refcounted;
333335

334336
sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
335-
dport, dif, &refcounted);
337+
dport, dif, 0, &refcounted);
336338

337339
if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
338340
sk = NULL;
@@ -344,6 +346,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
344346
int doff,
345347
const __be16 sport,
346348
const __be16 dport,
349+
const int sdif,
347350
bool *refcounted)
348351
{
349352
struct sock *sk = skb_steal_sock(skb);
@@ -355,7 +358,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
355358

356359
return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
357360
doff, iph->saddr, sport,
358-
iph->daddr, dport, inet_iif(skb),
361+
iph->daddr, dport, inet_iif(skb), sdif,
359362
refcounted);
360363
}
361364

include/net/tcp.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,16 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
840840
return false;
841841
}
842842

843+
/* TCP_SKB_CB reference means this can not be used from early demux */
844+
static inline int tcp_v4_sdif(struct sk_buff *skb)
845+
{
846+
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
847+
if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
848+
return TCP_SKB_CB(skb)->header.h4.iif;
849+
#endif
850+
return 0;
851+
}
852+
843853
/* Due to TSO, an SKB can be composed of multiple actual
844854
* packets. To keep these tracked properly, we use this.
845855
*/

net/dccp/ipv4.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
256256
sk = __inet_lookup_established(net, &dccp_hashinfo,
257257
iph->daddr, dh->dccph_dport,
258258
iph->saddr, ntohs(dh->dccph_sport),
259-
inet_iif(skb));
259+
inet_iif(skb), 0);
260260
if (!sk) {
261261
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
262262
return;
@@ -804,7 +804,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
804804

805805
lookup:
806806
sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
807-
dh->dccph_sport, dh->dccph_dport, &refcounted);
807+
dh->dccph_sport, dh->dccph_dport, 0, &refcounted);
808808
if (!sk) {
809809
dccp_pr_debug("failed to look up flow ID in table and "
810810
"get corresponding socket\n");

net/ipv4/inet_hashtables.c

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
170170

171171
static inline int compute_score(struct sock *sk, struct net *net,
172172
const unsigned short hnum, const __be32 daddr,
173-
const int dif, bool exact_dif)
173+
const int dif, const int sdif, bool exact_dif)
174174
{
175175
int score = -1;
176176
struct inet_sock *inet = inet_sk(sk);
@@ -185,9 +185,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
185185
score += 4;
186186
}
187187
if (sk->sk_bound_dev_if || exact_dif) {
188-
if (sk->sk_bound_dev_if != dif)
188+
bool dev_match = (sk->sk_bound_dev_if == dif ||
189+
sk->sk_bound_dev_if == sdif);
190+
191+
if (exact_dif && !dev_match)
189192
return -1;
190-
score += 4;
193+
if (sk->sk_bound_dev_if && dev_match)
194+
score += 4;
191195
}
192196
if (sk->sk_incoming_cpu == raw_smp_processor_id())
193197
score++;
@@ -208,7 +212,7 @@ struct sock *__inet_lookup_listener(struct net *net,
208212
struct sk_buff *skb, int doff,
209213
const __be32 saddr, __be16 sport,
210214
const __be32 daddr, const unsigned short hnum,
211-
const int dif)
215+
const int dif, const int sdif)
212216
{
213217
unsigned int hash = inet_lhashfn(net, hnum);
214218
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -218,7 +222,8 @@ struct sock *__inet_lookup_listener(struct net *net,
218222
u32 phash = 0;
219223

220224
sk_for_each_rcu(sk, &ilb->head) {
221-
score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
225+
score = compute_score(sk, net, hnum, daddr,
226+
dif, sdif, exact_dif);
222227
if (score > hiscore) {
223228
reuseport = sk->sk_reuseport;
224229
if (reuseport) {
@@ -268,7 +273,7 @@ struct sock *__inet_lookup_established(struct net *net,
268273
struct inet_hashinfo *hashinfo,
269274
const __be32 saddr, const __be16 sport,
270275
const __be32 daddr, const u16 hnum,
271-
const int dif)
276+
const int dif, const int sdif)
272277
{
273278
INET_ADDR_COOKIE(acookie, saddr, daddr);
274279
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
@@ -286,11 +291,12 @@ struct sock *__inet_lookup_established(struct net *net,
286291
if (sk->sk_hash != hash)
287292
continue;
288293
if (likely(INET_MATCH(sk, net, acookie,
289-
saddr, daddr, ports, dif))) {
294+
saddr, daddr, ports, dif, sdif))) {
290295
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
291296
goto out;
292297
if (unlikely(!INET_MATCH(sk, net, acookie,
293-
saddr, daddr, ports, dif))) {
298+
saddr, daddr, ports,
299+
dif, sdif))) {
294300
sock_gen_put(sk);
295301
goto begin;
296302
}
@@ -321,9 +327,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
321327
__be32 daddr = inet->inet_rcv_saddr;
322328
__be32 saddr = inet->inet_daddr;
323329
int dif = sk->sk_bound_dev_if;
330+
struct net *net = sock_net(sk);
331+
int sdif = l3mdev_master_ifindex_by_index(net, dif);
324332
INET_ADDR_COOKIE(acookie, saddr, daddr);
325333
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
326-
struct net *net = sock_net(sk);
327334
unsigned int hash = inet_ehashfn(net, daddr, lport,
328335
saddr, inet->inet_dport);
329336
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -339,7 +346,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
339346
continue;
340347

341348
if (likely(INET_MATCH(sk2, net, acookie,
342-
saddr, daddr, ports, dif))) {
349+
saddr, daddr, ports, dif, sdif))) {
343350
if (sk2->sk_state == TCP_TIME_WAIT) {
344351
tw = inet_twsk(sk2);
345352
if (twsk_unique(sk, sk2, twp))

net/ipv4/tcp_ipv4.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
383383

384384
sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
385385
th->dest, iph->saddr, ntohs(th->source),
386-
inet_iif(icmp_skb));
386+
inet_iif(icmp_skb), 0);
387387
if (!sk) {
388388
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
389389
return;
@@ -659,7 +659,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
659659
sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
660660
ip_hdr(skb)->saddr,
661661
th->source, ip_hdr(skb)->daddr,
662-
ntohs(th->source), inet_iif(skb));
662+
ntohs(th->source), inet_iif(skb),
663+
tcp_v4_sdif(skb));
663664
/* don't send rst if it can't find key */
664665
if (!sk1)
665666
goto out;
@@ -1523,7 +1524,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
15231524
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
15241525
iph->saddr, th->source,
15251526
iph->daddr, ntohs(th->dest),
1526-
skb->skb_iif);
1527+
skb->skb_iif, inet_sdif(skb));
15271528
if (sk) {
15281529
skb->sk = sk;
15291530
skb->destructor = sock_edemux;
@@ -1588,6 +1589,7 @@ EXPORT_SYMBOL(tcp_filter);
15881589
int tcp_v4_rcv(struct sk_buff *skb)
15891590
{
15901591
struct net *net = dev_net(skb->dev);
1592+
int sdif = inet_sdif(skb);
15911593
const struct iphdr *iph;
15921594
const struct tcphdr *th;
15931595
bool refcounted;
@@ -1638,7 +1640,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
16381640

16391641
lookup:
16401642
sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1641-
th->dest, &refcounted);
1643+
th->dest, sdif, &refcounted);
16421644
if (!sk)
16431645
goto no_tcp_socket;
16441646

@@ -1766,7 +1768,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
17661768
__tcp_hdrlen(th),
17671769
iph->saddr, th->source,
17681770
iph->daddr, th->dest,
1769-
inet_iif(skb));
1771+
inet_iif(skb),
1772+
sdif);
17701773
if (sk2) {
17711774
inet_twsk_deschedule_put(inet_twsk(sk));
17721775
sk = sk2;

net/ipv4/udp.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2196,7 +2196,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
21962196
static struct sock *__udp4_lib_demux_lookup(struct net *net,
21972197
__be16 loc_port, __be32 loc_addr,
21982198
__be16 rmt_port, __be32 rmt_addr,
2199-
int dif)
2199+
int dif, int sdif)
22002200
{
22012201
unsigned short hnum = ntohs(loc_port);
22022202
unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
@@ -2208,7 +2208,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
22082208

22092209
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
22102210
if (INET_MATCH(sk, net, acookie, rmt_addr,
2211-
loc_addr, ports, dif))
2211+
loc_addr, ports, dif, sdif))
22122212
return sk;
22132213
/* Only check first socket in chain */
22142214
break;
@@ -2254,7 +2254,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
22542254
dif, sdif);
22552255
} else if (skb->pkt_type == PACKET_HOST) {
22562256
sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
2257-
uh->source, iph->saddr, dif);
2257+
uh->source, iph->saddr, dif, sdif);
22582258
}
22592259

22602260
if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))

net/netfilter/xt_TPROXY.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
125125
__tcp_hdrlen(tcph),
126126
saddr, sport,
127127
daddr, dport,
128-
in->ifindex);
128+
in->ifindex, 0);
129129

130130
if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
131131
sk = NULL;

0 commit comments

Comments
 (0)