Skip to content

Commit 353c9cb

Browse files
Peter Oskolkovdavem330
authored andcommitted
ip: add helpers to process in-order fragments faster.
This patch introduces several helper functions/macros that will be used in the follow-up patch. No runtime changes yet. The new logic (fully implemented in the second patch) is as follows: * Nodes in the rb-tree will now contain not single fragments, but lists of consecutive fragments ("runs"). * At each point in time, the current "active" run at the tail is maintained/tracked. Fragments that arrive in-order, adjacent to the previous tail fragment, are added to this tail run without triggering the re-balancing of the rb-tree. * If a fragment arrives out of order with the offset _before_ the tail run, it is inserted into the rb-tree as a single fragment. * If a fragment arrives after the current tail fragment (with a gap), it starts a new "tail" run, as is inserted into the rb-tree at the end as the head of the new run. skb->cb is used to store additional information needed here (suggested by Eric Dumazet). Reported-by: Willem de Bruijn <[email protected]> Signed-off-by: Peter Oskolkov <[email protected]> Cc: Eric Dumazet <[email protected]> Cc: Florian Westphal <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6a92ef0 commit 353c9cb

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed

include/net/inet_frag.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ struct frag_v6_compare_key {
5757
* @lock: spinlock protecting this frag
5858
* @refcnt: reference count of the queue
5959
* @fragments: received fragments head
60+
* @rb_fragments: received fragments rb-tree root
6061
* @fragments_tail: received fragments tail
62+
* @last_run_head: the head of the last "run". see ip_fragment.c
6163
* @stamp: timestamp of the last received fragment
6264
* @len: total length of the original datagram
6365
* @meat: length of received fragments so far
@@ -78,6 +80,7 @@ struct inet_frag_queue {
7880
struct sk_buff *fragments; /* Used in IPv6. */
7981
struct rb_root rb_fragments; /* Used in IPv4. */
8082
struct sk_buff *fragments_tail;
83+
struct sk_buff *last_run_head;
8184
ktime_t stamp;
8285
int len;
8386
int meat;
@@ -113,6 +116,9 @@ void inet_frag_kill(struct inet_frag_queue *q);
113116
void inet_frag_destroy(struct inet_frag_queue *q);
114117
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
115118

119+
/* Free all skbs in the queue; return the sum of their truesizes. */
120+
unsigned int inet_frag_rbtree_purge(struct rb_root *root);
121+
116122
static inline void inet_frag_put(struct inet_frag_queue *q)
117123
{
118124
if (refcount_dec_and_test(&q->refcnt))

net/ipv4/ip_fragment.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,57 @@
5757
*/
5858
static const char ip_frag_cache_name[] = "ip4-frags";
5959

60+
/* Use skb->cb to track consecutive/adjacent fragments coming at
61+
* the end of the queue. Nodes in the rb-tree queue will
62+
* contain "runs" of one or more adjacent fragments.
63+
*
64+
* Invariants:
65+
* - next_frag is NULL at the tail of a "run";
66+
* - the head of a "run" has the sum of all fragment lengths in frag_run_len.
67+
*/
68+
struct ipfrag_skb_cb {
69+
struct inet_skb_parm h;
70+
struct sk_buff *next_frag;
71+
int frag_run_len;
72+
};
73+
74+
#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
75+
76+
static void ip4_frag_init_run(struct sk_buff *skb)
77+
{
78+
BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
79+
80+
FRAG_CB(skb)->next_frag = NULL;
81+
FRAG_CB(skb)->frag_run_len = skb->len;
82+
}
83+
84+
/* Append skb to the last "run". */
85+
static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
86+
struct sk_buff *skb)
87+
{
88+
RB_CLEAR_NODE(&skb->rbnode);
89+
FRAG_CB(skb)->next_frag = NULL;
90+
91+
FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
92+
FRAG_CB(q->fragments_tail)->next_frag = skb;
93+
q->fragments_tail = skb;
94+
}
95+
96+
/* Create a new "run" with the skb. */
97+
static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
98+
{
99+
if (q->last_run_head)
100+
rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
101+
&q->last_run_head->rbnode.rb_right);
102+
else
103+
rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
104+
rb_insert_color(&skb->rbnode, &q->rb_fragments);
105+
106+
ip4_frag_init_run(skb);
107+
q->fragments_tail = skb;
108+
q->last_run_head = skb;
109+
}
110+
60111
/* Describe an entry in the "incomplete datagrams" queue. */
61112
struct ipq {
62113
struct inet_frag_queue q;
@@ -654,6 +705,28 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
654705
}
655706
EXPORT_SYMBOL(ip_check_defrag);
656707

708+
unsigned int inet_frag_rbtree_purge(struct rb_root *root)
709+
{
710+
struct rb_node *p = rb_first(root);
711+
unsigned int sum = 0;
712+
713+
while (p) {
714+
struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
715+
716+
p = rb_next(p);
717+
rb_erase(&skb->rbnode, root);
718+
while (skb) {
719+
struct sk_buff *next = FRAG_CB(skb)->next_frag;
720+
721+
sum += skb->truesize;
722+
kfree_skb(skb);
723+
skb = next;
724+
}
725+
}
726+
return sum;
727+
}
728+
EXPORT_SYMBOL(inet_frag_rbtree_purge);
729+
657730
#ifdef CONFIG_SYSCTL
658731
static int dist_min;
659732

0 commit comments

Comments
 (0)