Skip to content

Commit 35bec72

Browse files
pmachatakuba-moo
authored andcommitted
net: ipv4: Add ip_mr_output()
Multicast routing is today handled in the input path. Locally generated MC packets don't hit the IPMR code today. Thus if a VXLAN remote address is multicast, the driver needs to set an OIF during route lookup. Thus MC routing configuration needs to be kept in sync with the VXLAN FDB and MDB. Ideally, the VXLAN packets would be routed by the MC routing code instead. To that end, this patch adds support to route locally generated multicast packets. The newly-added routines do largely what ip_mr_input() and ip_mr_forward() do: make an MR cache lookup to find where to send the packets, and use ip_mc_output() to send each of them. When no cache entry is found, the packet is punted to the daemon for resolution. However, an installation that uses a VXLAN underlay netdevice for which it also has matching MC routes, would get a different routing with this patch. Previously, the MC packets would be delivered directly to the underlay port, whereas now they would be MC-routed. In order to avoid this change in behavior, introduce an IPCB flag. Only if the flag is set will ip_mr_output() actually engage, otherwise it reverts to ip_mc_output(). This code is based on work by Roopa Prabhu and Nikolay Aleksandrov. Signed-off-by: Roopa Prabhu <[email protected]> Signed-off-by: Nikolay Aleksandrov <[email protected]> Signed-off-by: Benjamin Poirier <[email protected]> Signed-off-by: Petr Machata <[email protected]> Reviewed-by: Ido Schimmel <[email protected]> Link: https://patch.msgid.link/0aadbd49330471c0f758d54afb05eb3b6e3a6b65.1750113335.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski <[email protected]>
1 parent b2e653b commit 35bec72

File tree

3 files changed

+120
-1
lines changed

3 files changed

+120
-1
lines changed

include/net/ip.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct inet_skb_parm {
5959
#define IPSKB_L3SLAVE BIT(7)
6060
#define IPSKB_NOPOLICY BIT(8)
6161
#define IPSKB_MULTIPATH BIT(9)
62+
#define IPSKB_MCROUTE BIT(10)
6263

6364
u16 frag_max_size;
6465
};
@@ -167,6 +168,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
167168
int ip_local_deliver(struct sk_buff *skb);
168169
void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto);
169170
int ip_mr_input(struct sk_buff *skb);
171+
int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb);
170172
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
171173
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
172174
int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,

net/ipv4/ipmr.c

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1965,6 +1965,19 @@ static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt,
19651965
kfree_skb(skb);
19661966
}
19671967

1968+
static void ipmr_queue_output_xmit(struct net *net, struct mr_table *mrt,
1969+
struct sk_buff *skb, int vifi)
1970+
{
1971+
if (ipmr_prepare_xmit(net, mrt, skb, vifi))
1972+
goto out_free;
1973+
1974+
ip_mc_output(net, NULL, skb);
1975+
return;
1976+
1977+
out_free:
1978+
kfree_skb(skb);
1979+
}
1980+
19681981
/* Called with mrt_lock or rcu_read_lock() */
19691982
static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
19701983
{
@@ -2224,6 +2237,110 @@ int ip_mr_input(struct sk_buff *skb)
22242237
return 0;
22252238
}
22262239

2240+
static void ip_mr_output_finish(struct net *net, struct mr_table *mrt,
2241+
struct net_device *dev, struct sk_buff *skb,
2242+
struct mfc_cache *c)
2243+
{
2244+
int psend = -1;
2245+
int ct;
2246+
2247+
atomic_long_inc(&c->_c.mfc_un.res.pkt);
2248+
atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2249+
WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2250+
2251+
/* Forward the frame */
2252+
if (c->mfc_origin == htonl(INADDR_ANY) &&
2253+
c->mfc_mcastgrp == htonl(INADDR_ANY)) {
2254+
if (ip_hdr(skb)->ttl >
2255+
c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2256+
/* It's an (*,*) entry and the packet is not coming from
2257+
* the upstream: forward the packet to the upstream
2258+
* only.
2259+
*/
2260+
psend = c->_c.mfc_parent;
2261+
goto last_xmit;
2262+
}
2263+
goto dont_xmit;
2264+
}
2265+
2266+
for (ct = c->_c.mfc_un.res.maxvif - 1;
2267+
ct >= c->_c.mfc_un.res.minvif; ct--) {
2268+
if (ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
2269+
if (psend != -1) {
2270+
struct sk_buff *skb2;
2271+
2272+
skb2 = skb_clone(skb, GFP_ATOMIC);
2273+
if (skb2)
2274+
ipmr_queue_output_xmit(net, mrt,
2275+
skb2, psend);
2276+
}
2277+
psend = ct;
2278+
}
2279+
}
2280+
2281+
last_xmit:
2282+
if (psend != -1) {
2283+
ipmr_queue_output_xmit(net, mrt, skb, psend);
2284+
return;
2285+
}
2286+
2287+
dont_xmit:
2288+
kfree_skb(skb);
2289+
}
2290+
2291+
/* Multicast packets for forwarding arrive here
2292+
* Called with rcu_read_lock();
2293+
*/
2294+
int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2295+
{
2296+
struct rtable *rt = skb_rtable(skb);
2297+
struct mfc_cache *cache;
2298+
struct net_device *dev;
2299+
struct mr_table *mrt;
2300+
int vif;
2301+
2302+
WARN_ON_ONCE(!rcu_read_lock_held());
2303+
dev = rt->dst.dev;
2304+
2305+
if (IPCB(skb)->flags & IPSKB_FORWARDED)
2306+
goto mc_output;
2307+
if (!(IPCB(skb)->flags & IPSKB_MCROUTE))
2308+
goto mc_output;
2309+
2310+
skb->dev = dev;
2311+
2312+
mrt = ipmr_rt_fib_lookup(net, skb);
2313+
if (IS_ERR(mrt))
2314+
goto mc_output;
2315+
2316+
/* already under rcu_read_lock() */
2317+
cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
2318+
if (!cache) {
2319+
vif = ipmr_find_vif(mrt, dev);
2320+
if (vif >= 0)
2321+
cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
2322+
vif);
2323+
}
2324+
2325+
/* No usable cache entry */
2326+
if (!cache) {
2327+
vif = ipmr_find_vif(mrt, dev);
2328+
if (vif >= 0)
2329+
return ipmr_cache_unresolved(mrt, vif, skb, dev);
2330+
goto mc_output;
2331+
}
2332+
2333+
vif = cache->_c.mfc_parent;
2334+
if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
2335+
goto mc_output;
2336+
2337+
ip_mr_output_finish(net, mrt, dev, skb, cache);
2338+
return 0;
2339+
2340+
mc_output:
2341+
return ip_mc_output(net, sk, skb);
2342+
}
2343+
22272344
#ifdef CONFIG_IP_PIMSM_V1
22282345
/* Handle IGMP messages of PIMv1 */
22292346
int pim_rcv_v1(struct sk_buff *skb)

net/ipv4/route.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2660,7 +2660,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
26602660
if (IN_DEV_MFORWARD(in_dev) &&
26612661
!ipv4_is_local_multicast(fl4->daddr)) {
26622662
rth->dst.input = ip_mr_input;
2663-
rth->dst.output = ip_mc_output;
2663+
rth->dst.output = ip_mr_output;
26642664
}
26652665
}
26662666
#endif

0 commit comments

Comments
 (0)