@@ -455,36 +455,26 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
455
455
int strict )
456
456
{
457
457
struct rt6_info * sibling , * next_sibling ;
458
- int route_choosen ;
459
458
460
459
/* We might have already computed the hash for ICMPv6 errors. In such
461
460
* case it will always be non-zero. Otherwise now is the time to do it.
462
461
*/
463
462
if (!fl6 -> mp_hash )
464
463
fl6 -> mp_hash = rt6_multipath_hash (fl6 , NULL );
465
464
466
- route_choosen = fl6 -> mp_hash % (match -> rt6i_nsiblings + 1 );
467
- /* Don't change the route, if route_choosen == 0
468
- * (siblings does not include ourself)
469
- */
470
- if (route_choosen )
471
- list_for_each_entry_safe (sibling , next_sibling ,
472
- & match -> rt6i_siblings , rt6i_siblings ) {
473
- route_choosen -- ;
474
- if (route_choosen == 0 ) {
475
- struct inet6_dev * idev = sibling -> rt6i_idev ;
476
-
477
- if (sibling -> rt6i_nh_flags & RTNH_F_DEAD )
478
- break ;
479
- if (sibling -> rt6i_nh_flags & RTNH_F_LINKDOWN &&
480
- idev -> cnf .ignore_routes_with_linkdown )
481
- break ;
482
- if (rt6_score_route (sibling , oif , strict ) < 0 )
483
- break ;
484
- match = sibling ;
485
- break ;
486
- }
487
- }
465
+ if (fl6 -> mp_hash <= atomic_read (& match -> rt6i_nh_upper_bound ))
466
+ return match ;
467
+
468
+ list_for_each_entry_safe (sibling , next_sibling , & match -> rt6i_siblings ,
469
+ rt6i_siblings ) {
470
+ if (fl6 -> mp_hash > atomic_read (& sibling -> rt6i_nh_upper_bound ))
471
+ continue ;
472
+ if (rt6_score_route (sibling , oif , strict ) < 0 )
473
+ break ;
474
+ match = sibling ;
475
+ break ;
476
+ }
477
+
488
478
return match ;
489
479
}
490
480
@@ -1833,10 +1823,10 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1833
1823
1834
1824
if (skb ) {
1835
1825
ip6_multipath_l3_keys (skb , & hash_keys );
1836
- return flow_hash_from_keys (& hash_keys );
1826
+ return flow_hash_from_keys (& hash_keys ) >> 1 ;
1837
1827
}
1838
1828
1839
- return get_hash_from_flowi6 (fl6 );
1829
+ return get_hash_from_flowi6 (fl6 ) >> 1 ;
1840
1830
}
1841
1831
1842
1832
void ip6_route_input (struct sk_buff * skb )
@@ -2604,6 +2594,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2604
2594
#endif
2605
2595
2606
2596
rt -> rt6i_metric = cfg -> fc_metric ;
2597
+ rt -> rt6i_nh_weight = 1 ;
2607
2598
2608
2599
/* We cannot add true routes via loopback here,
2609
2600
they would result in kernel looping; promote them to reject routes
@@ -3481,6 +3472,99 @@ struct arg_netdev_event {
3481
3472
};
3482
3473
};
3483
3474
3475
+ static struct rt6_info * rt6_multipath_first_sibling (const struct rt6_info * rt )
3476
+ {
3477
+ struct rt6_info * iter ;
3478
+ struct fib6_node * fn ;
3479
+
3480
+ fn = rcu_dereference_protected (rt -> rt6i_node ,
3481
+ lockdep_is_held (& rt -> rt6i_table -> tb6_lock ));
3482
+ iter = rcu_dereference_protected (fn -> leaf ,
3483
+ lockdep_is_held (& rt -> rt6i_table -> tb6_lock ));
3484
+ while (iter ) {
3485
+ if (iter -> rt6i_metric == rt -> rt6i_metric &&
3486
+ rt6_qualify_for_ecmp (iter ))
3487
+ return iter ;
3488
+ iter = rcu_dereference_protected (iter -> rt6_next ,
3489
+ lockdep_is_held (& rt -> rt6i_table -> tb6_lock ));
3490
+ }
3491
+
3492
+ return NULL ;
3493
+ }
3494
+
3495
+ static bool rt6_is_dead (const struct rt6_info * rt )
3496
+ {
3497
+ if (rt -> rt6i_nh_flags & RTNH_F_DEAD ||
3498
+ (rt -> rt6i_nh_flags & RTNH_F_LINKDOWN &&
3499
+ rt -> rt6i_idev -> cnf .ignore_routes_with_linkdown ))
3500
+ return true;
3501
+
3502
+ return false;
3503
+ }
3504
+
3505
+ static int rt6_multipath_total_weight (const struct rt6_info * rt )
3506
+ {
3507
+ struct rt6_info * iter ;
3508
+ int total = 0 ;
3509
+
3510
+ if (!rt6_is_dead (rt ))
3511
+ total += rt -> rt6i_nh_weight ;
3512
+
3513
+ list_for_each_entry (iter , & rt -> rt6i_siblings , rt6i_siblings ) {
3514
+ if (!rt6_is_dead (iter ))
3515
+ total += iter -> rt6i_nh_weight ;
3516
+ }
3517
+
3518
+ return total ;
3519
+ }
3520
+
3521
+ static void rt6_upper_bound_set (struct rt6_info * rt , int * weight , int total )
3522
+ {
3523
+ int upper_bound = -1 ;
3524
+
3525
+ if (!rt6_is_dead (rt )) {
3526
+ * weight += rt -> rt6i_nh_weight ;
3527
+ upper_bound = DIV_ROUND_CLOSEST_ULL ((u64 ) (* weight ) << 31 ,
3528
+ total ) - 1 ;
3529
+ }
3530
+ atomic_set (& rt -> rt6i_nh_upper_bound , upper_bound );
3531
+ }
3532
+
3533
+ static void rt6_multipath_upper_bound_set (struct rt6_info * rt , int total )
3534
+ {
3535
+ struct rt6_info * iter ;
3536
+ int weight = 0 ;
3537
+
3538
+ rt6_upper_bound_set (rt , & weight , total );
3539
+
3540
+ list_for_each_entry (iter , & rt -> rt6i_siblings , rt6i_siblings )
3541
+ rt6_upper_bound_set (iter , & weight , total );
3542
+ }
3543
+
3544
+ void rt6_multipath_rebalance (struct rt6_info * rt )
3545
+ {
3546
+ struct rt6_info * first ;
3547
+ int total ;
3548
+
3549
+ /* In case the entire multipath route was marked for flushing,
3550
+ * then there is no need to rebalance upon the removal of every
3551
+ * sibling route.
3552
+ */
3553
+ if (!rt -> rt6i_nsiblings || rt -> should_flush )
3554
+ return ;
3555
+
3556
+ /* During lookup routes are evaluated in order, so we need to
3557
+ * make sure upper bounds are assigned from the first sibling
3558
+ * onwards.
3559
+ */
3560
+ first = rt6_multipath_first_sibling (rt );
3561
+ if (WARN_ON_ONCE (!first ))
3562
+ return ;
3563
+
3564
+ total = rt6_multipath_total_weight (first );
3565
+ rt6_multipath_upper_bound_set (first , total );
3566
+ }
3567
+
3484
3568
static int fib6_ifup (struct rt6_info * rt , void * p_arg )
3485
3569
{
3486
3570
const struct arg_netdev_event * arg = p_arg ;
@@ -3489,6 +3573,7 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3489
3573
if (rt != net -> ipv6 .ip6_null_entry && rt -> dst .dev == arg -> dev ) {
3490
3574
rt -> rt6i_nh_flags &= ~arg -> nh_flags ;
3491
3575
fib6_update_sernum_upto_root (dev_net (rt -> dst .dev ), rt );
3576
+ rt6_multipath_rebalance (rt );
3492
3577
}
3493
3578
3494
3579
return 0 ;
@@ -3588,13 +3673,15 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
3588
3673
rt6_multipath_nh_flags_set (rt , dev , RTNH_F_DEAD |
3589
3674
RTNH_F_LINKDOWN );
3590
3675
fib6_update_sernum (rt );
3676
+ rt6_multipath_rebalance (rt );
3591
3677
}
3592
3678
return -2 ;
3593
3679
case NETDEV_CHANGE :
3594
3680
if (rt -> dst .dev != dev ||
3595
3681
rt -> rt6i_flags & (RTF_LOCAL | RTF_ANYCAST ))
3596
3682
break ;
3597
3683
rt -> rt6i_nh_flags |= RTNH_F_LINKDOWN ;
3684
+ rt6_multipath_rebalance (rt );
3598
3685
break ;
3599
3686
}
3600
3687
@@ -3938,6 +4025,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
3938
4025
goto cleanup ;
3939
4026
}
3940
4027
4028
+ rt -> rt6i_nh_weight = rtnh -> rtnh_hops + 1 ;
4029
+
3941
4030
err = ip6_route_info_append (& rt6_nh_list , rt , & r_cfg );
3942
4031
if (err ) {
3943
4032
dst_release_immediate (& rt -> dst );
@@ -4160,7 +4249,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4160
4249
if (!rtnh )
4161
4250
goto nla_put_failure ;
4162
4251
4163
- rtnh -> rtnh_hops = 0 ;
4252
+ rtnh -> rtnh_hops = rt -> rt6i_nh_weight - 1 ;
4164
4253
rtnh -> rtnh_ifindex = rt -> dst .dev ? rt -> dst .dev -> ifindex : 0 ;
4165
4254
4166
4255
if (rt6_nexthop_info (skb , rt , & flags , true) < 0 )
0 commit comments