Skip to content

Commit 859844e

Browse files
committed
Merge branch 'net-ipv6-Address-checks-need-to-consider-the-L3-domain'
David Ahern says: ==================== net/ipv6: Address checks need to consider the L3 domain IPv6 prohibits a local address from being used as a gateway for a route. However, it is ok for the gateway to be a local address in a different L3 domain (e.g., VRF). This allows, for example, veth pairs to connect VRFs. ip6_route_info_create calls ipv6_chk_addr_and_flags for gateway addresses to determine if the address is a local one, but ipv6_chk_addr_and_flags does not currently consider L3 domains. As a result routes can not be added in one VRF with a nexthop that points to a local address in a second VRF. Resolve by comparing the l3mdev for the passed in device and requiring an l3mdev match with the device containing an address. The intent of checking for an address on the specified device versus any device in the domain is mantained by a new argument to skip the check between the passed in device and the device with the address. Patch 1 moves the gateway validation from ip6_route_info_create into a helper; the function is long enough and refactoring drops the indent level. Patch 2 adds a skip_dev_check argument to ipv6_chk_addr_and_flags to allow a device to always be passed yet skip the device check when looking at addresses and fixes up a few ipv6_chk_addr callers that pass a NULL device. Patch 3 adds l3mdev checks to ipv6_chk_addr_and_flags. Patches 4 and 5 do some refactoring to the fib_tests script and then patch 6 adds nexthop validation tests. v4 - separated l3mdev check into a separate patch (patch 3 of this set) as suggested by Kirill - consolidated dev and ipv6_chk_addr_and_flags call into 1 if (Kirill) - added a temp variable for gw type (Kirill) v3 - set skip_dev_check in ipv6_chk_addr based on dev == NULL (per comment from Ido) v2 - handle 2 variations of route spec with sane error path - add test cases ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 1ad2ff0 + 654d3a7 commit 859844e

File tree

8 files changed

+390
-158
lines changed

8 files changed

+390
-158
lines changed

include/net/addrconf.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
6969
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
7070
const struct net_device *dev, int strict);
7171
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
72-
const struct net_device *dev, int strict,
73-
u32 banned_flags);
72+
const struct net_device *dev, bool skip_dev_check,
73+
int strict, u32 banned_flags);
7474

7575
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
7676
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);

net/ipv6/addrconf.c

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1851,22 +1851,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
18511851
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
18521852
const struct net_device *dev, int strict)
18531853
{
1854-
return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
1854+
return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
1855+
strict, IFA_F_TENTATIVE);
18551856
}
18561857
EXPORT_SYMBOL(ipv6_chk_addr);
18571858

1859+
/* device argument is used to find the L3 domain of interest. If
1860+
* skip_dev_check is set, then the ifp device is not checked against
1861+
* the passed in dev argument. So the 2 cases for addresses checks are:
1862+
* 1. does the address exist in the L3 domain that dev is part of
1863+
* (skip_dev_check = true), or
1864+
*
1865+
* 2. does the address exist on the specific device
1866+
* (skip_dev_check = false)
1867+
*/
18581868
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
1859-
const struct net_device *dev, int strict,
1860-
u32 banned_flags)
1869+
const struct net_device *dev, bool skip_dev_check,
1870+
int strict, u32 banned_flags)
18611871
{
18621872
unsigned int hash = inet6_addr_hash(net, addr);
1873+
const struct net_device *l3mdev;
18631874
struct inet6_ifaddr *ifp;
18641875
u32 ifp_flags;
18651876

18661877
rcu_read_lock();
1878+
1879+
l3mdev = l3mdev_master_dev_rcu(dev);
1880+
if (skip_dev_check)
1881+
dev = NULL;
1882+
18671883
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
18681884
if (!net_eq(dev_net(ifp->idev->dev), net))
18691885
continue;
1886+
1887+
if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
1888+
continue;
1889+
18701890
/* Decouple optimistic from tentative for evaluation here.
18711891
* Ban optimistic addresses explicitly, when required.
18721892
*/

net/ipv6/anycast.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
6666
return -EPERM;
6767
if (ipv6_addr_is_multicast(addr))
6868
return -EINVAL;
69-
if (ipv6_chk_addr(net, addr, NULL, 0))
69+
70+
if (ifindex)
71+
dev = __dev_get_by_index(net, ifindex);
72+
73+
if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
7074
return -EINVAL;
7175

7276
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
9094
dev = __dev_get_by_flags(net, IFF_UP,
9195
IFF_UP | IFF_LOOPBACK);
9296
}
93-
} else
94-
dev = __dev_get_by_index(net, ifindex);
97+
}
9598

9699
if (!dev) {
97100
err = -ENODEV;

net/ipv6/datagram.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -801,8 +801,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
801801
if (addr_type != IPV6_ADDR_ANY) {
802802
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
803803
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
804-
!ipv6_chk_addr(net, &src_info->ipi6_addr,
805-
strict ? dev : NULL, 0) &&
804+
!ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
805+
dev, !strict, 0,
806+
IFA_F_TENTATIVE) &&
806807
!ipv6_chk_acast_addr_src(net, dev,
807808
&src_info->ipi6_addr))
808809
err = -EINVAL;

net/ipv6/ip6_tunnel.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
758758
ldev = dev_get_by_index_rcu(net, p->link);
759759

760760
if ((ipv6_addr_is_multicast(laddr) ||
761-
likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
761+
likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
762+
0, IFA_F_TENTATIVE))) &&
762763
((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
763-
likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
764+
likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
765+
0, IFA_F_TENTATIVE))))
764766
ret = 1;
765767
}
766768
return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
990992
if (p->link)
991993
ldev = dev_get_by_index_rcu(net, p->link);
992994

993-
if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
995+
if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
996+
0, IFA_F_TENTATIVE)))
994997
pr_warn("%s xmit: Local address not yet configured!\n",
995998
p->name);
996999
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
9971000
!ipv6_addr_is_multicast(raddr) &&
998-
unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
1001+
unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
1002+
true, 0, IFA_F_TENTATIVE)))
9991003
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
10001004
p->name);
10011005
else

net/ipv6/ndisc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
707707
int probes = atomic_read(&neigh->probes);
708708

709709
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
710-
dev, 1,
710+
dev, false, 1,
711711
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
712712
saddr = &ipv6_hdr(skb)->saddr;
713713
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);

net/ipv6/route.c

Lines changed: 77 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2550,7 +2550,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
25502550

25512551
static int ip6_route_check_nh_onlink(struct net *net,
25522552
struct fib6_config *cfg,
2553-
struct net_device *dev,
2553+
const struct net_device *dev,
25542554
struct netlink_ext_ack *extack)
25552555
{
25562556
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
@@ -2626,6 +2626,79 @@ static int ip6_route_check_nh(struct net *net,
26262626
return err;
26272627
}
26282628

2629+
static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2630+
struct net_device **_dev, struct inet6_dev **idev,
2631+
struct netlink_ext_ack *extack)
2632+
{
2633+
const struct in6_addr *gw_addr = &cfg->fc_gateway;
2634+
int gwa_type = ipv6_addr_type(gw_addr);
2635+
bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2636+
const struct net_device *dev = *_dev;
2637+
bool need_addr_check = !dev;
2638+
int err = -EINVAL;
2639+
2640+
/* if gw_addr is local we will fail to detect this in case
2641+
* address is still TENTATIVE (DAD in progress). rt6_lookup()
2642+
* will return already-added prefix route via interface that
2643+
* prefix route was assigned to, which might be non-loopback.
2644+
*/
2645+
if (dev &&
2646+
ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2647+
NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2648+
goto out;
2649+
}
2650+
2651+
if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2652+
/* IPv6 strictly inhibits using not link-local
2653+
* addresses as nexthop address.
2654+
* Otherwise, router will not able to send redirects.
2655+
* It is very good, but in some (rare!) circumstances
2656+
* (SIT, PtP, NBMA NOARP links) it is handy to allow
2657+
* some exceptions. --ANK
2658+
* We allow IPv4-mapped nexthops to support RFC4798-type
2659+
* addressing
2660+
*/
2661+
if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2662+
NL_SET_ERR_MSG(extack, "Invalid gateway address");
2663+
goto out;
2664+
}
2665+
2666+
if (cfg->fc_flags & RTNH_F_ONLINK)
2667+
err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2668+
else
2669+
err = ip6_route_check_nh(net, cfg, _dev, idev);
2670+
2671+
if (err)
2672+
goto out;
2673+
}
2674+
2675+
/* reload in case device was changed */
2676+
dev = *_dev;
2677+
2678+
err = -EINVAL;
2679+
if (!dev) {
2680+
NL_SET_ERR_MSG(extack, "Egress device not specified");
2681+
goto out;
2682+
} else if (dev->flags & IFF_LOOPBACK) {
2683+
NL_SET_ERR_MSG(extack,
2684+
"Egress device can not be loopback device for this route");
2685+
goto out;
2686+
}
2687+
2688+
/* if we did not check gw_addr above, do so now that the
2689+
* egress device has been resolved.
2690+
*/
2691+
if (need_addr_check &&
2692+
ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2693+
NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2694+
goto out;
2695+
}
2696+
2697+
err = 0;
2698+
out:
2699+
return err;
2700+
}
2701+
26292702
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
26302703
struct netlink_ext_ack *extack)
26312704
{
@@ -2808,61 +2881,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
28082881
}
28092882

28102883
if (cfg->fc_flags & RTF_GATEWAY) {
2811-
const struct in6_addr *gw_addr;
2812-
int gwa_type;
2813-
2814-
gw_addr = &cfg->fc_gateway;
2815-
gwa_type = ipv6_addr_type(gw_addr);
2816-
2817-
/* if gw_addr is local we will fail to detect this in case
2818-
* address is still TENTATIVE (DAD in progress). rt6_lookup()
2819-
* will return already-added prefix route via interface that
2820-
* prefix route was assigned to, which might be non-loopback.
2821-
*/
2822-
err = -EINVAL;
2823-
if (ipv6_chk_addr_and_flags(net, gw_addr,
2824-
gwa_type & IPV6_ADDR_LINKLOCAL ?
2825-
dev : NULL, 0, 0)) {
2826-
NL_SET_ERR_MSG(extack, "Invalid gateway address");
2884+
err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2885+
if (err)
28272886
goto out;
2828-
}
2829-
rt->rt6i_gateway = *gw_addr;
2830-
2831-
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2832-
/* IPv6 strictly inhibits using not link-local
2833-
addresses as nexthop address.
2834-
Otherwise, router will not able to send redirects.
2835-
It is very good, but in some (rare!) circumstances
2836-
(SIT, PtP, NBMA NOARP links) it is handy to allow
2837-
some exceptions. --ANK
2838-
We allow IPv4-mapped nexthops to support RFC4798-type
2839-
addressing
2840-
*/
2841-
if (!(gwa_type & (IPV6_ADDR_UNICAST |
2842-
IPV6_ADDR_MAPPED))) {
2843-
NL_SET_ERR_MSG(extack,
2844-
"Invalid gateway address");
2845-
goto out;
2846-
}
28472887

2848-
if (cfg->fc_flags & RTNH_F_ONLINK) {
2849-
err = ip6_route_check_nh_onlink(net, cfg, dev,
2850-
extack);
2851-
} else {
2852-
err = ip6_route_check_nh(net, cfg, &dev, &idev);
2853-
}
2854-
if (err)
2855-
goto out;
2856-
}
2857-
err = -EINVAL;
2858-
if (!dev) {
2859-
NL_SET_ERR_MSG(extack, "Egress device not specified");
2860-
goto out;
2861-
} else if (dev->flags & IFF_LOOPBACK) {
2862-
NL_SET_ERR_MSG(extack,
2863-
"Egress device can not be loopback device for this route");
2864-
goto out;
2865-
}
2888+
rt->rt6i_gateway = cfg->fc_gateway;
28662889
}
28672890

28682891
err = -ENODEV;

0 commit comments

Comments
 (0)