Skip to content

Commit 7866a62

Browse files
noureddine-aristadavem330
authored andcommitted
dev: add per net_device packet type chains
When many pf_packet listeners are created on a lot of interfaces the current implementation using global packet type lists scales poorly. This patch adds per net_device packet type lists to fix this problem. The patch was originally written by Eric Biederman for linux-2.6.29. Tested on linux-3.16. Signed-off-by: "Eric W. Biederman" <[email protected]> Signed-off-by: Salam Noureddine <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 7b4ce69 commit 7866a62

File tree

2 files changed

+86
-48
lines changed

2 files changed

+86
-48
lines changed

include/linux/netdevice.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,6 +1514,8 @@ struct net_device {
15141514
struct list_head napi_list;
15151515
struct list_head unreg_list;
15161516
struct list_head close_list;
1517+
struct list_head ptype_all;
1518+
struct list_head ptype_specific;
15171519

15181520
struct {
15191521
struct list_head upper;

net/core/dev.c

Lines changed: 84 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
371371
static inline struct list_head *ptype_head(const struct packet_type *pt)
372372
{
373373
if (pt->type == htons(ETH_P_ALL))
374-
return &ptype_all;
374+
return pt->dev ? &pt->dev->ptype_all : &ptype_all;
375375
else
376-
return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
376+
return pt->dev ? &pt->dev->ptype_specific :
377+
&ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
377378
}
378379

379380
/**
@@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
17341735
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
17351736
}
17361737

1738+
static inline void deliver_ptype_list_skb(struct sk_buff *skb,
1739+
struct packet_type **pt,
1740+
struct net_device *dev, __be16 type,
1741+
struct list_head *ptype_list)
1742+
{
1743+
struct packet_type *ptype, *pt_prev = *pt;
1744+
1745+
list_for_each_entry_rcu(ptype, ptype_list, list) {
1746+
if (ptype->type != type)
1747+
continue;
1748+
if (pt_prev)
1749+
deliver_skb(skb, pt_prev, dev);
1750+
pt_prev = ptype;
1751+
}
1752+
*pt = pt_prev;
1753+
}
1754+
17371755
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
17381756
{
17391757
if (!ptype->af_packet_priv || !skb->sk)
@@ -1757,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
17571775
struct packet_type *ptype;
17581776
struct sk_buff *skb2 = NULL;
17591777
struct packet_type *pt_prev = NULL;
1778+
struct list_head *ptype_list = &ptype_all;
17601779

17611780
rcu_read_lock();
1762-
list_for_each_entry_rcu(ptype, &ptype_all, list) {
1781+
again:
1782+
list_for_each_entry_rcu(ptype, ptype_list, list) {
17631783
/* Never send packets back to the socket
17641784
* they originated from - MvS ([email protected])
17651785
*/
1766-
if ((ptype->dev == dev || !ptype->dev) &&
1767-
(!skb_loop_sk(ptype, skb))) {
1768-
if (pt_prev) {
1769-
deliver_skb(skb2, pt_prev, skb->dev);
1770-
pt_prev = ptype;
1771-
continue;
1772-
}
1786+
if (skb_loop_sk(ptype, skb))
1787+
continue;
17731788

1774-
skb2 = skb_clone(skb, GFP_ATOMIC);
1775-
if (!skb2)
1776-
break;
1789+
if (pt_prev) {
1790+
deliver_skb(skb2, pt_prev, skb->dev);
1791+
pt_prev = ptype;
1792+
continue;
1793+
}
17771794

1778-
net_timestamp_set(skb2);
1795+
/* need to clone skb, done only once */
1796+
skb2 = skb_clone(skb, GFP_ATOMIC);
1797+
if (!skb2)
1798+
goto out_unlock;
17791799

1780-
/* skb->nh should be correctly
1781-
set by sender, so that the second statement is
1782-
just protection against buggy protocols.
1783-
*/
1784-
skb_reset_mac_header(skb2);
1785-
1786-
if (skb_network_header(skb2) < skb2->data ||
1787-
skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1788-
net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1789-
ntohs(skb2->protocol),
1790-
dev->name);
1791-
skb_reset_network_header(skb2);
1792-
}
1800+
net_timestamp_set(skb2);
17931801

1794-
skb2->transport_header = skb2->network_header;
1795-
skb2->pkt_type = PACKET_OUTGOING;
1796-
pt_prev = ptype;
1802+
/* skb->nh should be correctly
1803+
* set by sender, so that the second statement is
1804+
* just protection against buggy protocols.
1805+
*/
1806+
skb_reset_mac_header(skb2);
1807+
1808+
if (skb_network_header(skb2) < skb2->data ||
1809+
skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1810+
net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1811+
ntohs(skb2->protocol),
1812+
dev->name);
1813+
skb_reset_network_header(skb2);
17971814
}
1815+
1816+
skb2->transport_header = skb2->network_header;
1817+
skb2->pkt_type = PACKET_OUTGOING;
1818+
pt_prev = ptype;
1819+
}
1820+
1821+
if (ptype_list == &ptype_all) {
1822+
ptype_list = &dev->ptype_all;
1823+
goto again;
17981824
}
1825+
out_unlock:
17991826
if (pt_prev)
18001827
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
18011828
rcu_read_unlock();
@@ -2617,7 +2644,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
26172644
unsigned int len;
26182645
int rc;
26192646

2620-
if (!list_empty(&ptype_all))
2647+
if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
26212648
dev_queue_xmit_nit(skb, dev);
26222649

26232650
len = skb->len;
@@ -3615,7 +3642,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
36153642
struct packet_type *ptype, *pt_prev;
36163643
rx_handler_func_t *rx_handler;
36173644
struct net_device *orig_dev;
3618-
struct net_device *null_or_dev;
36193645
bool deliver_exact = false;
36203646
int ret = NET_RX_DROP;
36213647
__be16 type;
@@ -3658,11 +3684,15 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
36583684
goto skip_taps;
36593685

36603686
list_for_each_entry_rcu(ptype, &ptype_all, list) {
3661-
if (!ptype->dev || ptype->dev == skb->dev) {
3662-
if (pt_prev)
3663-
ret = deliver_skb(skb, pt_prev, orig_dev);
3664-
pt_prev = ptype;
3665-
}
3687+
if (pt_prev)
3688+
ret = deliver_skb(skb, pt_prev, orig_dev);
3689+
pt_prev = ptype;
3690+
}
3691+
3692+
list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
3693+
if (pt_prev)
3694+
ret = deliver_skb(skb, pt_prev, orig_dev);
3695+
pt_prev = ptype;
36663696
}
36673697

36683698
skip_taps:
@@ -3718,19 +3748,21 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
37183748
skb->vlan_tci = 0;
37193749
}
37203750

3751+
type = skb->protocol;
3752+
37213753
/* deliver only exact match when indicated */
3722-
null_or_dev = deliver_exact ? skb->dev : NULL;
3754+
if (likely(!deliver_exact)) {
3755+
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3756+
&ptype_base[ntohs(type) &
3757+
PTYPE_HASH_MASK]);
3758+
}
37233759

3724-
type = skb->protocol;
3725-
list_for_each_entry_rcu(ptype,
3726-
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3727-
if (ptype->type == type &&
3728-
(ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3729-
ptype->dev == orig_dev)) {
3730-
if (pt_prev)
3731-
ret = deliver_skb(skb, pt_prev, orig_dev);
3732-
pt_prev = ptype;
3733-
}
3760+
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3761+
&orig_dev->ptype_specific);
3762+
3763+
if (unlikely(skb->dev != orig_dev)) {
3764+
deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
3765+
&skb->dev->ptype_specific);
37343766
}
37353767

37363768
if (pt_prev) {
@@ -6579,6 +6611,8 @@ void netdev_run_todo(void)
65796611

65806612
/* paranoia */
65816613
BUG_ON(netdev_refcnt_read(dev));
6614+
BUG_ON(!list_empty(&dev->ptype_all));
6615+
BUG_ON(!list_empty(&dev->ptype_specific));
65826616
WARN_ON(rcu_access_pointer(dev->ip_ptr));
65836617
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
65846618
WARN_ON(dev->dn_ptr);
@@ -6761,6 +6795,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
67616795
INIT_LIST_HEAD(&dev->adj_list.lower);
67626796
INIT_LIST_HEAD(&dev->all_adj_list.upper);
67636797
INIT_LIST_HEAD(&dev->all_adj_list.lower);
6798+
INIT_LIST_HEAD(&dev->ptype_all);
6799+
INIT_LIST_HEAD(&dev->ptype_specific);
67646800
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
67656801
setup(dev);
67666802

0 commit comments

Comments
 (0)