@@ -57,8 +57,7 @@ static unsigned int fib_info_cnt;
57
57
static struct hlist_head fib_info_devhash [DEVINDEX_HASHSIZE ];
58
58
59
59
#ifdef CONFIG_IP_ROUTE_MULTIPATH
60
-
61
- static DEFINE_SPINLOCK (fib_multipath_lock );
60
+ u32 fib_multipath_secret __read_mostly ;
62
61
63
62
#define for_nexthops (fi ) { \
64
63
int nhsel; const struct fib_nh *nh; \
@@ -532,7 +531,67 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
532
531
return ret ;
533
532
}
534
533
535
- #endif
534
+ static void fib_rebalance (struct fib_info * fi )
535
+ {
536
+ int total ;
537
+ int w ;
538
+ struct in_device * in_dev ;
539
+
540
+ if (fi -> fib_nhs < 2 )
541
+ return ;
542
+
543
+ total = 0 ;
544
+ for_nexthops (fi ) {
545
+ if (nh -> nh_flags & RTNH_F_DEAD )
546
+ continue ;
547
+
548
+ in_dev = __in_dev_get_rcu (nh -> nh_dev );
549
+
550
+ if (in_dev &&
551
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN (in_dev ) &&
552
+ nh -> nh_flags & RTNH_F_LINKDOWN )
553
+ continue ;
554
+
555
+ total += nh -> nh_weight ;
556
+ } endfor_nexthops (fi );
557
+
558
+ w = 0 ;
559
+ change_nexthops (fi ) {
560
+ int upper_bound ;
561
+
562
+ in_dev = __in_dev_get_rcu (nexthop_nh -> nh_dev );
563
+
564
+ if (nexthop_nh -> nh_flags & RTNH_F_DEAD ) {
565
+ upper_bound = -1 ;
566
+ } else if (in_dev &&
567
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN (in_dev ) &&
568
+ nexthop_nh -> nh_flags & RTNH_F_LINKDOWN ) {
569
+ upper_bound = -1 ;
570
+ } else {
571
+ w += nexthop_nh -> nh_weight ;
572
+ upper_bound = DIV_ROUND_CLOSEST (2147483648LL * w ,
573
+ total ) - 1 ;
574
+ }
575
+
576
+ atomic_set (& nexthop_nh -> nh_upper_bound , upper_bound );
577
+ } endfor_nexthops (fi );
578
+
579
+ net_get_random_once (& fib_multipath_secret ,
580
+ sizeof (fib_multipath_secret ));
581
+ }
582
+
583
+ static inline void fib_add_weight (struct fib_info * fi ,
584
+ const struct fib_nh * nh )
585
+ {
586
+ fi -> fib_weight += nh -> nh_weight ;
587
+ }
588
+
589
+ #else /* CONFIG_IP_ROUTE_MULTIPATH */
590
+
591
+ #define fib_rebalance (fi ) do { } while (0)
592
+ #define fib_add_weight (fi , nh ) do { } while (0)
593
+
594
+ #endif /* CONFIG_IP_ROUTE_MULTIPATH */
536
595
537
596
static int fib_encap_match (struct net * net , u16 encap_type ,
538
597
struct nlattr * encap ,
@@ -1094,8 +1153,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
1094
1153
1095
1154
change_nexthops (fi ) {
1096
1155
fib_info_update_nh_saddr (net , nexthop_nh );
1156
+ fib_add_weight (fi , nexthop_nh );
1097
1157
} endfor_nexthops (fi )
1098
1158
1159
+ fib_rebalance (fi );
1160
+
1099
1161
link_it :
1100
1162
ofi = fib_find_info (fi );
1101
1163
if (ofi ) {
@@ -1317,12 +1379,6 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
1317
1379
nexthop_nh -> nh_flags |= RTNH_F_LINKDOWN ;
1318
1380
break ;
1319
1381
}
1320
- #ifdef CONFIG_IP_ROUTE_MULTIPATH
1321
- spin_lock_bh (& fib_multipath_lock );
1322
- fi -> fib_power -= nexthop_nh -> nh_power ;
1323
- nexthop_nh -> nh_power = 0 ;
1324
- spin_unlock_bh (& fib_multipath_lock );
1325
- #endif
1326
1382
dead ++ ;
1327
1383
}
1328
1384
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1345,6 +1401,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
1345
1401
}
1346
1402
ret ++ ;
1347
1403
}
1404
+
1405
+ fib_rebalance (fi );
1348
1406
}
1349
1407
1350
1408
return ret ;
@@ -1467,83 +1525,35 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
1467
1525
!__in_dev_get_rtnl (dev ))
1468
1526
continue ;
1469
1527
alive ++ ;
1470
- #ifdef CONFIG_IP_ROUTE_MULTIPATH
1471
- spin_lock_bh (& fib_multipath_lock );
1472
- nexthop_nh -> nh_power = 0 ;
1473
- nexthop_nh -> nh_flags &= ~nh_flags ;
1474
- spin_unlock_bh (& fib_multipath_lock );
1475
- #else
1476
1528
nexthop_nh -> nh_flags &= ~nh_flags ;
1477
- #endif
1478
1529
} endfor_nexthops (fi )
1479
1530
1480
1531
if (alive > 0 ) {
1481
1532
fi -> fib_flags &= ~nh_flags ;
1482
1533
ret ++ ;
1483
1534
}
1535
+
1536
+ fib_rebalance (fi );
1484
1537
}
1485
1538
1486
1539
return ret ;
1487
1540
}
1488
1541
1489
1542
#ifdef CONFIG_IP_ROUTE_MULTIPATH
1490
1543
1491
- /*
1492
- * The algorithm is suboptimal, but it provides really
1493
- * fair weighted route distribution.
1494
- */
1495
- void fib_select_multipath (struct fib_result * res )
1544
+ void fib_select_multipath (struct fib_result * res , int hash )
1496
1545
{
1497
1546
struct fib_info * fi = res -> fi ;
1498
- struct in_device * in_dev ;
1499
- int w ;
1500
-
1501
- spin_lock_bh (& fib_multipath_lock );
1502
- if (fi -> fib_power <= 0 ) {
1503
- int power = 0 ;
1504
- change_nexthops (fi ) {
1505
- in_dev = __in_dev_get_rcu (nexthop_nh -> nh_dev );
1506
- if (nexthop_nh -> nh_flags & RTNH_F_DEAD )
1507
- continue ;
1508
- if (in_dev &&
1509
- IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN (in_dev ) &&
1510
- nexthop_nh -> nh_flags & RTNH_F_LINKDOWN )
1511
- continue ;
1512
- power += nexthop_nh -> nh_weight ;
1513
- nexthop_nh -> nh_power = nexthop_nh -> nh_weight ;
1514
- } endfor_nexthops (fi );
1515
- fi -> fib_power = power ;
1516
- if (power <= 0 ) {
1517
- spin_unlock_bh (& fib_multipath_lock );
1518
- /* Race condition: route has just become dead. */
1519
- res -> nh_sel = 0 ;
1520
- return ;
1521
- }
1522
- }
1523
-
1524
1547
1525
- /* w should be random number [0..fi->fib_power-1],
1526
- * it is pretty bad approximation.
1527
- */
1528
-
1529
- w = jiffies % fi -> fib_power ;
1548
+ for_nexthops (fi ) {
1549
+ if (hash > atomic_read (& nh -> nh_upper_bound ))
1550
+ continue ;
1530
1551
1531
- change_nexthops (fi ) {
1532
- if (!(nexthop_nh -> nh_flags & RTNH_F_DEAD ) &&
1533
- nexthop_nh -> nh_power ) {
1534
- w -= nexthop_nh -> nh_power ;
1535
- if (w <= 0 ) {
1536
- nexthop_nh -> nh_power -- ;
1537
- fi -> fib_power -- ;
1538
- res -> nh_sel = nhsel ;
1539
- spin_unlock_bh (& fib_multipath_lock );
1540
- return ;
1541
- }
1542
- }
1552
+ res -> nh_sel = nhsel ;
1553
+ return ;
1543
1554
} endfor_nexthops (fi );
1544
1555
1545
1556
/* Race condition: route has just become dead. */
1546
1557
res -> nh_sel = 0 ;
1547
- spin_unlock_bh (& fib_multipath_lock );
1548
1558
}
1549
1559
#endif
0 commit comments