Skip to content

Commit 0e884c7

Browse files
pchri03davem330
authored andcommitted
ipv4: L3 hash-based multipath
Replaces the per-packet multipath with a hash-based multipath using source and destination address. Signed-off-by: Peter Nørlund <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 2472186 commit 0e884c7

File tree

3 files changed

+98
-72
lines changed

3 files changed

+98
-72
lines changed

include/net/ip_fib.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ struct fib_nh {
7979
unsigned char nh_scope;
8080
#ifdef CONFIG_IP_ROUTE_MULTIPATH
8181
int nh_weight;
82-
int nh_power;
82+
atomic_t nh_upper_bound;
8383
#endif
8484
#ifdef CONFIG_IP_ROUTE_CLASSID
8585
__u32 nh_tclassid;
@@ -118,7 +118,7 @@ struct fib_info {
118118
#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
119119
int fib_nhs;
120120
#ifdef CONFIG_IP_ROUTE_MULTIPATH
121-
int fib_power;
121+
int fib_weight;
122122
#endif
123123
struct rcu_head rcu;
124124
struct fib_nh fib_nh[0];
@@ -320,7 +320,15 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
320320
int fib_sync_down_dev(struct net_device *dev, unsigned long event);
321321
int fib_sync_down_addr(struct net *net, __be32 local);
322322
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
323-
void fib_select_multipath(struct fib_result *res);
323+
324+
extern u32 fib_multipath_secret __read_mostly;
325+
326+
static inline int fib_multipath_hash(__be32 saddr, __be32 daddr)
327+
{
328+
return jhash_2words(saddr, daddr, fib_multipath_secret) >> 1;
329+
}
330+
331+
void fib_select_multipath(struct fib_result *res, int hash);
324332

325333
/* Exported by fib_trie.c */
326334
void fib_trie_init(void);

net/ipv4/fib_semantics.c

Lines changed: 75 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@ static unsigned int fib_info_cnt;
5757
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
5858

5959
#ifdef CONFIG_IP_ROUTE_MULTIPATH
60-
61-
static DEFINE_SPINLOCK(fib_multipath_lock);
60+
u32 fib_multipath_secret __read_mostly;
6261

6362
#define for_nexthops(fi) { \
6463
int nhsel; const struct fib_nh *nh; \
@@ -532,7 +531,67 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
532531
return ret;
533532
}
534533

535-
#endif
534+
static void fib_rebalance(struct fib_info *fi)
535+
{
536+
int total;
537+
int w;
538+
struct in_device *in_dev;
539+
540+
if (fi->fib_nhs < 2)
541+
return;
542+
543+
total = 0;
544+
for_nexthops(fi) {
545+
if (nh->nh_flags & RTNH_F_DEAD)
546+
continue;
547+
548+
in_dev = __in_dev_get_rcu(nh->nh_dev);
549+
550+
if (in_dev &&
551+
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
552+
nh->nh_flags & RTNH_F_LINKDOWN)
553+
continue;
554+
555+
total += nh->nh_weight;
556+
} endfor_nexthops(fi);
557+
558+
w = 0;
559+
change_nexthops(fi) {
560+
int upper_bound;
561+
562+
in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
563+
564+
if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
565+
upper_bound = -1;
566+
} else if (in_dev &&
567+
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
568+
nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
569+
upper_bound = -1;
570+
} else {
571+
w += nexthop_nh->nh_weight;
572+
upper_bound = DIV_ROUND_CLOSEST(2147483648LL * w,
573+
total) - 1;
574+
}
575+
576+
atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
577+
} endfor_nexthops(fi);
578+
579+
net_get_random_once(&fib_multipath_secret,
580+
sizeof(fib_multipath_secret));
581+
}
582+
583+
static inline void fib_add_weight(struct fib_info *fi,
584+
const struct fib_nh *nh)
585+
{
586+
fi->fib_weight += nh->nh_weight;
587+
}
588+
589+
#else /* CONFIG_IP_ROUTE_MULTIPATH */
590+
591+
#define fib_rebalance(fi) do { } while (0)
592+
#define fib_add_weight(fi, nh) do { } while (0)
593+
594+
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
536595

537596
static int fib_encap_match(struct net *net, u16 encap_type,
538597
struct nlattr *encap,
@@ -1094,8 +1153,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
10941153

10951154
change_nexthops(fi) {
10961155
fib_info_update_nh_saddr(net, nexthop_nh);
1156+
fib_add_weight(fi, nexthop_nh);
10971157
} endfor_nexthops(fi)
10981158

1159+
fib_rebalance(fi);
1160+
10991161
link_it:
11001162
ofi = fib_find_info(fi);
11011163
if (ofi) {
@@ -1317,12 +1379,6 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
13171379
nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
13181380
break;
13191381
}
1320-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
1321-
spin_lock_bh(&fib_multipath_lock);
1322-
fi->fib_power -= nexthop_nh->nh_power;
1323-
nexthop_nh->nh_power = 0;
1324-
spin_unlock_bh(&fib_multipath_lock);
1325-
#endif
13261382
dead++;
13271383
}
13281384
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1345,6 +1401,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
13451401
}
13461402
ret++;
13471403
}
1404+
1405+
fib_rebalance(fi);
13481406
}
13491407

13501408
return ret;
@@ -1467,83 +1525,35 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
14671525
!__in_dev_get_rtnl(dev))
14681526
continue;
14691527
alive++;
1470-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
1471-
spin_lock_bh(&fib_multipath_lock);
1472-
nexthop_nh->nh_power = 0;
1473-
nexthop_nh->nh_flags &= ~nh_flags;
1474-
spin_unlock_bh(&fib_multipath_lock);
1475-
#else
14761528
nexthop_nh->nh_flags &= ~nh_flags;
1477-
#endif
14781529
} endfor_nexthops(fi)
14791530

14801531
if (alive > 0) {
14811532
fi->fib_flags &= ~nh_flags;
14821533
ret++;
14831534
}
1535+
1536+
fib_rebalance(fi);
14841537
}
14851538

14861539
return ret;
14871540
}
14881541

14891542
#ifdef CONFIG_IP_ROUTE_MULTIPATH
14901543

1491-
/*
1492-
* The algorithm is suboptimal, but it provides really
1493-
* fair weighted route distribution.
1494-
*/
1495-
void fib_select_multipath(struct fib_result *res)
1544+
void fib_select_multipath(struct fib_result *res, int hash)
14961545
{
14971546
struct fib_info *fi = res->fi;
1498-
struct in_device *in_dev;
1499-
int w;
1500-
1501-
spin_lock_bh(&fib_multipath_lock);
1502-
if (fi->fib_power <= 0) {
1503-
int power = 0;
1504-
change_nexthops(fi) {
1505-
in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
1506-
if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1507-
continue;
1508-
if (in_dev &&
1509-
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1510-
nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
1511-
continue;
1512-
power += nexthop_nh->nh_weight;
1513-
nexthop_nh->nh_power = nexthop_nh->nh_weight;
1514-
} endfor_nexthops(fi);
1515-
fi->fib_power = power;
1516-
if (power <= 0) {
1517-
spin_unlock_bh(&fib_multipath_lock);
1518-
/* Race condition: route has just become dead. */
1519-
res->nh_sel = 0;
1520-
return;
1521-
}
1522-
}
1523-
15241547

1525-
/* w should be random number [0..fi->fib_power-1],
1526-
* it is pretty bad approximation.
1527-
*/
1528-
1529-
w = jiffies % fi->fib_power;
1548+
for_nexthops(fi) {
1549+
if (hash > atomic_read(&nh->nh_upper_bound))
1550+
continue;
15301551

1531-
change_nexthops(fi) {
1532-
if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
1533-
nexthop_nh->nh_power) {
1534-
w -= nexthop_nh->nh_power;
1535-
if (w <= 0) {
1536-
nexthop_nh->nh_power--;
1537-
fi->fib_power--;
1538-
res->nh_sel = nhsel;
1539-
spin_unlock_bh(&fib_multipath_lock);
1540-
return;
1541-
}
1542-
}
1552+
res->nh_sel = nhsel;
1553+
return;
15431554
} endfor_nexthops(fi);
15441555

15451556
/* Race condition: route has just become dead. */
15461557
res->nh_sel = 0;
1547-
spin_unlock_bh(&fib_multipath_lock);
15481558
}
15491559
#endif

net/ipv4/route.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1658,8 +1658,12 @@ static int ip_mkroute_input(struct sk_buff *skb,
16581658
__be32 daddr, __be32 saddr, u32 tos)
16591659
{
16601660
#ifdef CONFIG_IP_ROUTE_MULTIPATH
1661-
if (res->fi && res->fi->fib_nhs > 1)
1662-
fib_select_multipath(res);
1661+
if (res->fi && res->fi->fib_nhs > 1) {
1662+
int h;
1663+
1664+
h = fib_multipath_hash(saddr, daddr);
1665+
fib_select_multipath(res, h);
1666+
}
16631667
#endif
16641668

16651669
/* create a routing cache entry */
@@ -2189,8 +2193,12 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
21892193
}
21902194

21912195
#ifdef CONFIG_IP_ROUTE_MULTIPATH
2192-
if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
2193-
fib_select_multipath(&res);
2196+
if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
2197+
int h;
2198+
2199+
h = fib_multipath_hash(fl4->saddr, fl4->daddr);
2200+
fib_select_multipath(&res, h);
2201+
}
21942202
else
21952203
#endif
21962204
if (!res.prefixlen &&

0 commit comments

Comments
 (0)