Skip to content

Commit ee28906

Browse files
sbrivio-rhdavem330
authored andcommitted
ipv4: Dump route exceptions if requested
Since commit 4895c77 ("ipv4: Add FIB nexthop exceptions."), cached exception routes are stored as a separate entity, so they are not dumped on a FIB dump, even if the RTM_F_CLONED flag is passed. This implies that the command 'ip route list cache' doesn't return any result anymore. If the RTM_F_CLONED is passed, and strict checking requested, retrieve nexthop exception routes and dump them. If no strict checking is requested, filtering can't be performed consistently: dump everything in that case. With this, we need to add an argument to the netlink callback in order to track how many entries were already dumped for the last leaf included in a partial netlink dump. A single additional argument is sufficient, even if we traverse logically nested structures (nexthop objects, hash table buckets, bucket chains): it doesn't matter if we stop in the middle of any of those, because they are always traversed the same way. As an example, s_i values in [], s_fa values in (): node (fa) #1 [1] nexthop #1 bucket #1 -> #0 in chain (1) bucket #2 -> #0 in chain (2) -> #1 in chain (3) -> #2 in chain (4) bucket #3 -> #0 in chain (5) -> #1 in chain (6) nexthop #2 bucket #1 -> #0 in chain (7) -> #1 in chain (8) bucket #2 -> #0 in chain (9) -- node (fa) #2 [2] nexthop #1 bucket #1 -> #0 in chain (1) -> #1 in chain (2) bucket #2 -> #0 in chain (3) it doesn't matter if we stop at (3), (4), (7) for "node #1", or at (2) for "node #2": walking flattens all that. It would even be possible to drop the distinction between the in-tree (s_i) and in-node (s_fa) counter, but a further improvement might advise against this. This is only as accurate as the existing tracking mechanism for leaves: if a partial dump is restarted after exceptions are removed or expired, we might skip some non-dumped entries. To improve this, we could attach a 'sernum' attribute (similar to the one used for IPv6) to nexthop entities, and bump this counter whenever exceptions change: having a distinction between the two counters would make this more convenient. Listing of exception routes (modified routes pre-3.5) was tested against these versions of kernel and iproute2: iproute2 kernel 4.14.0 4.15.0 4.19.0 5.0.0 5.1.0 3.5-rc4 + + + + + 4.4 4.9 4.14 4.15 4.19 5.0 5.1 fixed + + + + + v7: - Move loop over nexthop objects to route.c, and pass struct fib_info and table ID to it, not a struct fib_alias (suggested by David Ahern) - While at it, note that the NULL check on fa->fa_info is redundant, and the check on RTNH_F_DEAD is also not consistent with what's done with regular route listing: just keep it for nhc_flags - Rename entry point function for dumping exceptions to fib_dump_info_fnhe(), and rearrange arguments for consistency with fib_dump_info() - Rename fnhe_dump_buckets() to fnhe_dump_bucket() and make it handle one bucket at a time - Expand commit message to describe why we can have a single "skip" counter for all exceptions stored in bucket chains in nexthop objects (suggested by David Ahern) v6: - Rebased onto net-next - Loop over nexthop paths too. Move loop over fnhe buckets to route.c, avoids need to export rt_fill_info() and to touch exceptions from fib_trie.c. Pass NULL as flow to rt_fill_info(), it now allows that (suggested by David Ahern) Fixes: 4895c77 ("ipv4: Add FIB nexthop exceptions.") Signed-off-by: Stefano Brivio <[email protected]> Reviewed-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent d948974 commit ee28906

File tree

3 files changed

+108
-13
lines changed

3 files changed

+108
-13
lines changed

include/net/route.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,10 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric);
230230
void rt_add_uncached_list(struct rtable *rt);
231231
void rt_del_uncached_list(struct rtable *rt);
232232

233+
int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
234+
u32 table_id, struct fib_info *fi,
235+
int *fa_index, int fa_start);
236+
233237
static inline void ip_rt_put(struct rtable *rt)
234238
{
235239
/* dst_release() accepts a NULL parameter.

net/ipv4/fib_trie.c

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2090,22 +2090,26 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
20902090
{
20912091
unsigned int flags = NLM_F_MULTI;
20922092
__be32 xkey = htonl(l->key);
2093+
int i, s_i, i_fa, s_fa, err;
20932094
struct fib_alias *fa;
2094-
int i, s_i;
20952095

2096-
if (filter->filter_set)
2096+
if (filter->filter_set ||
2097+
!filter->dump_exceptions || !filter->dump_routes)
20972098
flags |= NLM_F_DUMP_FILTERED;
20982099

20992100
s_i = cb->args[4];
2101+
s_fa = cb->args[5];
21002102
i = 0;
21012103

21022104
/* rcu_read_lock is hold by caller */
21032105
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2104-
int err;
2106+
struct fib_info *fi = fa->fa_info;
21052107

21062108
if (i < s_i)
21072109
goto next;
21082110

2111+
i_fa = 0;
2112+
21092113
if (tb->tb_id != fa->tb_id)
21102114
goto next;
21112115

@@ -2114,29 +2118,43 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
21142118
goto next;
21152119

21162120
if ((filter->protocol &&
2117-
fa->fa_info->fib_protocol != filter->protocol))
2121+
fi->fib_protocol != filter->protocol))
21182122
goto next;
21192123

21202124
if (filter->dev &&
2121-
!fib_info_nh_uses_dev(fa->fa_info, filter->dev))
2125+
!fib_info_nh_uses_dev(fi, filter->dev))
21222126
goto next;
21232127
}
21242128

2125-
err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
2126-
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2127-
tb->tb_id, fa->fa_type,
2128-
xkey, KEYLENGTH - fa->fa_slen,
2129-
fa->fa_tos, fa->fa_info, flags);
2130-
if (err < 0) {
2131-
cb->args[4] = i;
2132-
return err;
2129+
if (filter->dump_routes && !s_fa) {
2130+
err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
2131+
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2132+
tb->tb_id, fa->fa_type,
2133+
xkey, KEYLENGTH - fa->fa_slen,
2134+
fa->fa_tos, fi, flags);
2135+
if (err < 0)
2136+
goto stop;
2137+
i_fa++;
21332138
}
2139+
2140+
if (filter->dump_exceptions) {
2141+
err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2142+
&i_fa, s_fa);
2143+
if (err < 0)
2144+
goto stop;
2145+
}
2146+
21342147
next:
21352148
i++;
21362149
}
21372150

21382151
cb->args[4] = i;
21392152
return skb->len;
2153+
2154+
stop:
2155+
cb->args[4] = i;
2156+
cb->args[5] = i_fa;
2157+
return err;
21402158
}
21412159

21422160
/* rcu_read_lock needs to be hold by caller from readside */

net/ipv4/route.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2812,6 +2812,79 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
28122812
return -EMSGSIZE;
28132813
}
28142814

2815+
static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
2816+
struct netlink_callback *cb, u32 table_id,
2817+
struct fnhe_hash_bucket *bucket, int genid,
2818+
int *fa_index, int fa_start)
2819+
{
2820+
int i;
2821+
2822+
for (i = 0; i < FNHE_HASH_SIZE; i++) {
2823+
struct fib_nh_exception *fnhe;
2824+
2825+
for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
2826+
fnhe = rcu_dereference(fnhe->fnhe_next)) {
2827+
struct rtable *rt;
2828+
int err;
2829+
2830+
if (*fa_index < fa_start)
2831+
goto next;
2832+
2833+
if (fnhe->fnhe_genid != genid)
2834+
goto next;
2835+
2836+
if (fnhe->fnhe_expires &&
2837+
time_after(jiffies, fnhe->fnhe_expires))
2838+
goto next;
2839+
2840+
rt = rcu_dereference(fnhe->fnhe_rth_input);
2841+
if (!rt)
2842+
rt = rcu_dereference(fnhe->fnhe_rth_output);
2843+
if (!rt)
2844+
goto next;
2845+
2846+
err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
2847+
table_id, NULL, skb,
2848+
NETLINK_CB(cb->skb).portid,
2849+
cb->nlh->nlmsg_seq);
2850+
if (err)
2851+
return err;
2852+
next:
2853+
(*fa_index)++;
2854+
}
2855+
}
2856+
2857+
return 0;
2858+
}
2859+
2860+
int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
2861+
u32 table_id, struct fib_info *fi,
2862+
int *fa_index, int fa_start)
2863+
{
2864+
struct net *net = sock_net(cb->skb->sk);
2865+
int nhsel, genid = fnhe_genid(net);
2866+
2867+
for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
2868+
struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
2869+
struct fnhe_hash_bucket *bucket;
2870+
int err;
2871+
2872+
if (nhc->nhc_flags & RTNH_F_DEAD)
2873+
continue;
2874+
2875+
bucket = rcu_dereference(nhc->nhc_exceptions);
2876+
if (!bucket)
2877+
continue;
2878+
2879+
err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, genid,
2880+
fa_index, fa_start);
2881+
if (err)
2882+
return err;
2883+
}
2884+
2885+
return 0;
2886+
}
2887+
28152888
static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
28162889
u8 ip_proto, __be16 sport,
28172890
__be16 dport)

0 commit comments

Comments
 (0)