Skip to content

Commit d7dedee

Browse files
idoschdavem330
authored andcommitted
ipv6: Calculate hash thresholds for IPv6 nexthops
Before we convert IPv6 to use hash-threshold instead of modulo-N, we first need each nexthop to store its region boundary in the hash function's output space. The boundary is calculated by dividing the output space equally between the different active nexthops. That is, nexthops that are not dead or linkdown. The boundaries are rebalanced whenever a nexthop is added or removed to a multipath route and whenever a nexthop becomes active or inactive. Signed-off-by: Ido Schimmel <[email protected]> Acked-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent e2b3b35 commit d7dedee

File tree

4 files changed

+106
-6
lines changed

4 files changed

+106
-6
lines changed

include/net/ip6_fib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ struct rt6_info {
149149
*/
150150
struct list_head rt6i_siblings;
151151
unsigned int rt6i_nsiblings;
152+
atomic_t rt6i_nh_upper_bound;
152153

153154
atomic_t rt6i_ref;
154155

include/net/ip6_route.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
6666
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
6767
}
6868

69+
static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
70+
{
71+
return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
72+
RTF_GATEWAY;
73+
}
74+
6975
void ip6_route_input(struct sk_buff *skb);
7076
struct dst_entry *ip6_route_input_lookup(struct net *net,
7177
struct net_device *dev,
@@ -171,6 +177,7 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
171177
void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
172178
void rt6_disable_ip(struct net_device *dev, unsigned long event);
173179
void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
180+
void rt6_multipath_rebalance(struct rt6_info *rt);
174181

175182
static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
176183
{

net/ipv6/ip6_fib.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -796,12 +796,6 @@ static struct fib6_node *fib6_add_1(struct net *net,
796796
return ln;
797797
}
798798

799-
static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
800-
{
801-
return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
802-
RTF_GATEWAY;
803-
}
804-
805799
static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
806800
{
807801
int i;
@@ -991,6 +985,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
991985
rt6i_nsiblings++;
992986
}
993987
BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
988+
rt6_multipath_rebalance(temp_sibling);
994989
}
995990

996991
/*
@@ -1672,6 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
16721667
sibling->rt6i_nsiblings--;
16731668
rt->rt6i_nsiblings = 0;
16741669
list_del_init(&rt->rt6i_siblings);
1670+
rt6_multipath_rebalance(next_sibling);
16751671
}
16761672

16771673
/* Adjust walkers */

net/ipv6/route.c

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3481,6 +3481,99 @@ struct arg_netdev_event {
34813481
};
34823482
};
34833483

3484+
static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3485+
{
3486+
struct rt6_info *iter;
3487+
struct fib6_node *fn;
3488+
3489+
fn = rcu_dereference_protected(rt->rt6i_node,
3490+
lockdep_is_held(&rt->rt6i_table->tb6_lock));
3491+
iter = rcu_dereference_protected(fn->leaf,
3492+
lockdep_is_held(&rt->rt6i_table->tb6_lock));
3493+
while (iter) {
3494+
if (iter->rt6i_metric == rt->rt6i_metric &&
3495+
rt6_qualify_for_ecmp(iter))
3496+
return iter;
3497+
iter = rcu_dereference_protected(iter->rt6_next,
3498+
lockdep_is_held(&rt->rt6i_table->tb6_lock));
3499+
}
3500+
3501+
return NULL;
3502+
}
3503+
3504+
static bool rt6_is_dead(const struct rt6_info *rt)
3505+
{
3506+
if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3507+
(rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3508+
rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3509+
return true;
3510+
3511+
return false;
3512+
}
3513+
3514+
static int rt6_multipath_total_weight(const struct rt6_info *rt)
3515+
{
3516+
struct rt6_info *iter;
3517+
int total = 0;
3518+
3519+
if (!rt6_is_dead(rt))
3520+
total++;
3521+
3522+
list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3523+
if (!rt6_is_dead(iter))
3524+
total++;
3525+
}
3526+
3527+
return total;
3528+
}
3529+
3530+
static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3531+
{
3532+
int upper_bound = -1;
3533+
3534+
if (!rt6_is_dead(rt)) {
3535+
(*weight)++;
3536+
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3537+
total) - 1;
3538+
}
3539+
atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3540+
}
3541+
3542+
static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3543+
{
3544+
struct rt6_info *iter;
3545+
int weight = 0;
3546+
3547+
rt6_upper_bound_set(rt, &weight, total);
3548+
3549+
list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3550+
rt6_upper_bound_set(iter, &weight, total);
3551+
}
3552+
3553+
void rt6_multipath_rebalance(struct rt6_info *rt)
3554+
{
3555+
struct rt6_info *first;
3556+
int total;
3557+
3558+
/* In case the entire multipath route was marked for flushing,
3559+
* then there is no need to rebalance upon the removal of every
3560+
* sibling route.
3561+
*/
3562+
if (!rt->rt6i_nsiblings || rt->should_flush)
3563+
return;
3564+
3565+
/* During lookup routes are evaluated in order, so we need to
3566+
* make sure upper bounds are assigned from the first sibling
3567+
* onwards.
3568+
*/
3569+
first = rt6_multipath_first_sibling(rt);
3570+
if (WARN_ON_ONCE(!first))
3571+
return;
3572+
3573+
total = rt6_multipath_total_weight(first);
3574+
rt6_multipath_upper_bound_set(first, total);
3575+
}
3576+
34843577
static int fib6_ifup(struct rt6_info *rt, void *p_arg)
34853578
{
34863579
const struct arg_netdev_event *arg = p_arg;
@@ -3489,6 +3582,7 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
34893582
if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
34903583
rt->rt6i_nh_flags &= ~arg->nh_flags;
34913584
fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
3585+
rt6_multipath_rebalance(rt);
34923586
}
34933587

34943588
return 0;
@@ -3588,13 +3682,15 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
35883682
rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
35893683
RTNH_F_LINKDOWN);
35903684
fib6_update_sernum(rt);
3685+
rt6_multipath_rebalance(rt);
35913686
}
35923687
return -2;
35933688
case NETDEV_CHANGE:
35943689
if (rt->dst.dev != dev ||
35953690
rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
35963691
break;
35973692
rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
3693+
rt6_multipath_rebalance(rt);
35983694
break;
35993695
}
36003696

0 commit comments

Comments
 (0)