Skip to content

Commit 40867d7

Browse files
dsahernkuba-moo
authored andcommitted
net: Add l3mdev index to flow struct and avoid oif reset for port devices
The fundamental premise of VRF and l3mdev core code is binding a socket to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope. Legacy code resets flowi_oif to the l3mdev losing any original port device binding. Ben (among others) has demonstrated use cases where the original port device binding is important and needs to be retained. This patch handles that by adding a new entry to the common flow struct that can indicate the l3mdev index for later rule and table matching avoiding the need to reset flowi_oif. In addition to allowing more use cases that require port device binds, this patch brings a few datapath simplications: 1. l3mdev_fib_rule_match is only called when walking fib rules and always after l3mdev_update_flow. That allows an optimization to bail early for non-VRF type uses cases when flowi_l3mdev is not set. Also, only that index needs to be checked for the FIB table id. 2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev (e.g., VRF) device. By resetting flowi_oif only for this case the FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed, removing several checks in the datapath. The flowi_iif path can be simplified to only be called if the it is not loopback (loopback can not be assigned to an L3 domain) and the l3mdev index is not already set. 3. Avoid another device lookup in the output path when the fib lookup returns a reject failure. Note: 2 functional tests for local traffic with reject fib rules are updated to reflect the new direct failure at FIB lookup time for ping rather than the failure on packet path. The current code fails like this: HINT: Fails since address on vrf device is out of device scope COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 ping: Warning: source address might be selected on device other than: eth1 PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data. --- 172.16.3.1 ping statistics --- 1 packets transmitted, 0 received, 100% packet loss, time 0ms where the test now directly fails: HINT: Fails since address on vrf device is out of device scope COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 ping: connect: No route to host Signed-off-by: David Ahern <[email protected]> Tested-by: Ben Greear <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent c84d86a commit 40867d7

File tree

12 files changed

+37
-63
lines changed

12 files changed

+37
-63
lines changed

drivers/net/vrf.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -472,14 +472,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
472472

473473
memset(&fl6, 0, sizeof(fl6));
474474
/* needed to match OIF rule */
475-
fl6.flowi6_oif = dev->ifindex;
475+
fl6.flowi6_l3mdev = dev->ifindex;
476476
fl6.flowi6_iif = LOOPBACK_IFINDEX;
477477
fl6.daddr = iph->daddr;
478478
fl6.saddr = iph->saddr;
479479
fl6.flowlabel = ip6_flowinfo(iph);
480480
fl6.flowi6_mark = skb->mark;
481481
fl6.flowi6_proto = iph->nexthdr;
482-
fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
483482

484483
dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL);
485484
if (IS_ERR(dst) || dst == dst_null)
@@ -551,10 +550,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
551550

552551
memset(&fl4, 0, sizeof(fl4));
553552
/* needed to match OIF rule */
554-
fl4.flowi4_oif = vrf_dev->ifindex;
553+
fl4.flowi4_l3mdev = vrf_dev->ifindex;
555554
fl4.flowi4_iif = LOOPBACK_IFINDEX;
556555
fl4.flowi4_tos = RT_TOS(ip4h->tos);
557-
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF;
556+
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
558557
fl4.flowi4_proto = ip4h->protocol;
559558
fl4.daddr = ip4h->daddr;
560559
fl4.saddr = ip4h->saddr;

include/net/flow.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ struct flowi_tunnel {
2929
struct flowi_common {
3030
int flowic_oif;
3131
int flowic_iif;
32+
int flowic_l3mdev;
3233
__u32 flowic_mark;
3334
__u8 flowic_tos;
3435
__u8 flowic_scope;
3536
__u8 flowic_proto;
3637
__u8 flowic_flags;
3738
#define FLOWI_FLAG_ANYSRC 0x01
3839
#define FLOWI_FLAG_KNOWN_NH 0x02
39-
#define FLOWI_FLAG_SKIP_NH_OIF 0x04
4040
__u32 flowic_secid;
4141
kuid_t flowic_uid;
4242
struct flowi_tunnel flowic_tun_key;
@@ -70,6 +70,7 @@ struct flowi4 {
7070
struct flowi_common __fl_common;
7171
#define flowi4_oif __fl_common.flowic_oif
7272
#define flowi4_iif __fl_common.flowic_iif
73+
#define flowi4_l3mdev __fl_common.flowic_l3mdev
7374
#define flowi4_mark __fl_common.flowic_mark
7475
#define flowi4_tos __fl_common.flowic_tos
7576
#define flowi4_scope __fl_common.flowic_scope
@@ -102,6 +103,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
102103
{
103104
fl4->flowi4_oif = oif;
104105
fl4->flowi4_iif = LOOPBACK_IFINDEX;
106+
fl4->flowi4_l3mdev = 0;
105107
fl4->flowi4_mark = mark;
106108
fl4->flowi4_tos = tos;
107109
fl4->flowi4_scope = scope;
@@ -132,6 +134,7 @@ struct flowi6 {
132134
struct flowi_common __fl_common;
133135
#define flowi6_oif __fl_common.flowic_oif
134136
#define flowi6_iif __fl_common.flowic_iif
137+
#define flowi6_l3mdev __fl_common.flowic_l3mdev
135138
#define flowi6_mark __fl_common.flowic_mark
136139
#define flowi6_scope __fl_common.flowic_scope
137140
#define flowi6_proto __fl_common.flowic_proto
@@ -177,6 +180,7 @@ struct flowi {
177180
} u;
178181
#define flowi_oif u.__fl_common.flowic_oif
179182
#define flowi_iif u.__fl_common.flowic_iif
183+
#define flowi_l3mdev u.__fl_common.flowic_l3mdev
180184
#define flowi_mark u.__fl_common.flowic_mark
181185
#define flowi_tos u.__fl_common.flowic_tos
182186
#define flowi_scope u.__fl_common.flowic_scope

net/ipv4/fib_frontend.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
291291
bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
292292
struct flowi4 fl4 = {
293293
.flowi4_iif = LOOPBACK_IFINDEX,
294-
.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
294+
.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
295295
.daddr = ip_hdr(skb)->saddr,
296296
.flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
297297
.flowi4_scope = scope,
@@ -353,9 +353,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
353353
bool dev_match;
354354

355355
fl4.flowi4_oif = 0;
356-
fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
357-
if (!fl4.flowi4_iif)
358-
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
356+
fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev);
357+
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
359358
fl4.daddr = src;
360359
fl4.saddr = dst;
361360
fl4.flowi4_tos = tos;

net/ipv4/fib_semantics.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2234,7 +2234,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
22342234
void fib_select_path(struct net *net, struct fib_result *res,
22352235
struct flowi4 *fl4, const struct sk_buff *skb)
22362236
{
2237-
if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
2237+
if (fl4->flowi4_oif)
22382238
goto check_saddr;
22392239

22402240
#ifdef CONFIG_IP_ROUTE_MULTIPATH

net/ipv4/fib_trie.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,11 +1429,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
14291429
!(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
14301430
return false;
14311431

1432-
if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1433-
if (flp->flowi4_oif &&
1434-
flp->flowi4_oif != nhc->nhc_oif)
1435-
return false;
1436-
}
1432+
if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif)
1433+
return false;
14371434

14381435
return true;
14391436
}

net/ipv4/route.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2263,6 +2263,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
22632263
/*
22642264
* Now we are ready to route packet.
22652265
*/
2266+
fl4.flowi4_l3mdev = 0;
22662267
fl4.flowi4_oif = 0;
22672268
fl4.flowi4_iif = dev->ifindex;
22682269
fl4.flowi4_mark = skb->mark;
@@ -2738,8 +2739,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
27382739
res->fi = NULL;
27392740
res->table = NULL;
27402741
if (fl4->flowi4_oif &&
2741-
(ipv4_is_multicast(fl4->daddr) ||
2742-
!netif_index_is_l3_master(net, fl4->flowi4_oif))) {
2742+
(ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) {
27432743
/* Apparently, routing tables are wrong. Assume,
27442744
* that the destination is on link.
27452745
*

net/ipv4/xfrm4_policy.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
2828
memset(fl4, 0, sizeof(*fl4));
2929
fl4->daddr = daddr->a4;
3030
fl4->flowi4_tos = tos;
31-
fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
31+
fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
3232
fl4->flowi4_mark = mark;
3333
if (saddr)
3434
fl4->saddr = saddr->a4;
3535

36-
fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;
37-
3836
rt = __ip_route_output_key(net, fl4);
3937
if (!IS_ERR(rt))
4038
return &rt->dst;

net/ipv6/ip6_output.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,8 +1035,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
10351035
#ifdef CONFIG_IPV6_SUBTREES
10361036
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
10371037
#endif
1038-
(!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1039-
(fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1038+
(fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
10401039
dst_release(dst);
10411040
dst = NULL;
10421041
}

net/ipv6/route.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,9 +1209,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
12091209
struct fib6_node *fn;
12101210
struct rt6_info *rt;
12111211

1212-
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1213-
flags &= ~RT6_LOOKUP_F_IFACE;
1214-
12151212
rcu_read_lock();
12161213
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
12171214
restart:
@@ -2181,9 +2178,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
21812178
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
21822179
saved_fn = fn;
21832180

2184-
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2185-
oif = 0;
2186-
21872181
redo_rt6_select:
21882182
rt6_select(net, fn, oif, res, strict);
21892183
if (res->f6i == net->ipv6.fib6_null_entry) {
@@ -3058,12 +3052,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
30583052
struct fib6_info *rt;
30593053
struct fib6_node *fn;
30603054

3061-
/* l3mdev_update_flow overrides oif if the device is enslaved; in
3062-
* this case we must match on the real ingress device, so reset it
3063-
*/
3064-
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
3065-
fl6->flowi6_oif = skb->dev->ifindex;
3066-
30673055
/* Get the "current" route for this destination and
30683056
* check if the redirect has come from appropriate router.
30693057
*

net/ipv6/xfrm6_policy.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
3333
int err;
3434

3535
memset(&fl6, 0, sizeof(fl6));
36-
fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
37-
fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
36+
fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
3837
fl6.flowi6_mark = mark;
3938
memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
4039
if (saddr)

net/l3mdev/l3mdev.c

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -250,25 +250,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
250250
struct net_device *dev;
251251
int rc = 0;
252252

253-
rcu_read_lock();
253+
/* update flow ensures flowi_l3mdev is set when relevant */
254+
if (!fl->flowi_l3mdev)
255+
return 0;
254256

255-
dev = dev_get_by_index_rcu(net, fl->flowi_oif);
256-
if (dev && netif_is_l3_master(dev) &&
257-
dev->l3mdev_ops->l3mdev_fib_table) {
258-
arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
259-
rc = 1;
260-
goto out;
261-
}
257+
rcu_read_lock();
262258

263-
dev = dev_get_by_index_rcu(net, fl->flowi_iif);
259+
dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev);
264260
if (dev && netif_is_l3_master(dev) &&
265261
dev->l3mdev_ops->l3mdev_fib_table) {
266262
arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
267263
rc = 1;
268-
goto out;
269264
}
270265

271-
out:
272266
rcu_read_unlock();
273267

274268
return rc;
@@ -277,31 +271,28 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
277271
void l3mdev_update_flow(struct net *net, struct flowi *fl)
278272
{
279273
struct net_device *dev;
280-
int ifindex;
281274

282275
rcu_read_lock();
283276

284277
if (fl->flowi_oif) {
285278
dev = dev_get_by_index_rcu(net, fl->flowi_oif);
286279
if (dev) {
287-
ifindex = l3mdev_master_ifindex_rcu(dev);
288-
if (ifindex) {
289-
fl->flowi_oif = ifindex;
290-
fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
291-
goto out;
292-
}
280+
if (!fl->flowi_l3mdev)
281+
fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
282+
283+
/* oif set to L3mdev directs lookup to its table;
284+
* reset to avoid oif match in fib_lookup
285+
*/
286+
if (netif_is_l3_master(dev))
287+
fl->flowi_oif = 0;
288+
goto out;
293289
}
294290
}
295291

296-
if (fl->flowi_iif) {
292+
if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) {
297293
dev = dev_get_by_index_rcu(net, fl->flowi_iif);
298-
if (dev) {
299-
ifindex = l3mdev_master_ifindex_rcu(dev);
300-
if (ifindex) {
301-
fl->flowi_iif = ifindex;
302-
fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
303-
}
304-
}
294+
if (dev)
295+
fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
305296
}
306297

307298
out:

tools/testing/selftests/net/fcnal-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,7 @@ ipv4_ping_vrf()
750750
log_start
751751
show_hint "Fails since address on vrf device is out of device scope"
752752
run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
753-
log_test_addr ${a} $? 1 "ping local, device bind"
753+
log_test_addr ${a} $? 2 "ping local, device bind"
754754
done
755755

756756
#

0 commit comments

Comments
 (0)