Skip to content

Commit 2e71a6f

Browse files
edumazetdavem330
authored andcommitted
net: gro: selective flush of packets
Current GRO can hold packets in gro_list for almost unlimited time, in case napi->poll() handler consumes its budget over and over. In this case, napi_complete()/napi_gro_flush() are not called. Another problem is that gro_list is flushed in non friendly way : We scan the list and complete packets in the reverse order. (youngest packets first, oldest packets last) This defeats priorities that sender could have cooked. Since GRO currently only store TCP packets, we dont really notice the bug because of retransmits, but this behavior can add unexpected latencies, particularly on mice flows clamped by elephant flows. This patch makes sure no packet can stay more than 1 ms in queue, and only in stress situations. It also complete packets in the right order to minimize latencies. Signed-off-by: Eric Dumazet <[email protected]> Cc: Herbert Xu <[email protected]> Cc: Jesse Gross <[email protected]> Cc: Tom Herbert <[email protected]> Cc: Yuchung Cheng <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a2af139 commit 2e71a6f

File tree

4 files changed

+42
-15
lines changed

4 files changed

+42
-15
lines changed

drivers/net/ethernet/marvell/skge.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3189,7 +3189,7 @@ static int skge_poll(struct napi_struct *napi, int to_do)
31893189
if (work_done < to_do) {
31903190
unsigned long flags;
31913191

3192-
napi_gro_flush(napi);
3192+
napi_gro_flush(napi, false);
31933193
spin_lock_irqsave(&hw->hw_lock, flags);
31943194
__napi_complete(napi);
31953195
hw->intr_mask |= napimask[skge->port];

drivers/net/ethernet/realtek/8139cp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ static int cp_rx_poll(struct napi_struct *napi, int budget)
563563
if (cpr16(IntrStatus) & cp_rx_intr_mask)
564564
goto rx_status_loop;
565565

566-
napi_gro_flush(napi);
566+
napi_gro_flush(napi, false);
567567
spin_lock_irqsave(&cp->lock, flags);
568568
__napi_complete(napi);
569569
cpw16_f(IntrMask, cp_intr_mask);

include/linux/netdevice.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,19 +1497,22 @@ struct napi_gro_cb {
14971497
/* This indicates where we are processing relative to skb->data. */
14981498
int data_offset;
14991499

1500-
/* This is non-zero if the packet may be of the same flow. */
1501-
int same_flow;
1502-
15031500
/* This is non-zero if the packet cannot be merged with the new skb. */
15041501
int flush;
15051502

15061503
/* Number of segments aggregated. */
1507-
int count;
1504+
u16 count;
1505+
1506+
/* This is non-zero if the packet may be of the same flow. */
1507+
u8 same_flow;
15081508

15091509
/* Free the skb? */
1510-
int free;
1510+
u8 free;
15111511
#define NAPI_GRO_FREE 1
15121512
#define NAPI_GRO_FREE_STOLEN_HEAD 2
1513+
1514+
/* jiffies when first packet was created/queued */
1515+
unsigned long age;
15131516
};
15141517

15151518
#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
@@ -2156,7 +2159,7 @@ extern gro_result_t dev_gro_receive(struct napi_struct *napi,
21562159
extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
21572160
extern gro_result_t napi_gro_receive(struct napi_struct *napi,
21582161
struct sk_buff *skb);
2159-
extern void napi_gro_flush(struct napi_struct *napi);
2162+
extern void napi_gro_flush(struct napi_struct *napi, bool flush_old);
21602163
extern struct sk_buff * napi_get_frags(struct napi_struct *napi);
21612164
extern gro_result_t napi_frags_finish(struct napi_struct *napi,
21622165
struct sk_buff *skb,

net/core/dev.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3471,17 +3471,31 @@ static int napi_gro_complete(struct sk_buff *skb)
34713471
return netif_receive_skb(skb);
34723472
}
34733473

3474-
inline void napi_gro_flush(struct napi_struct *napi)
3474+
/* napi->gro_list contains packets ordered by age.
3475+
* youngest packets at the head of it.
3476+
* Complete skbs in reverse order to reduce latencies.
3477+
*/
3478+
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
34753479
{
3476-
struct sk_buff *skb, *next;
3480+
struct sk_buff *skb, *prev = NULL;
34773481

3478-
for (skb = napi->gro_list; skb; skb = next) {
3479-
next = skb->next;
3482+
/* scan list and build reverse chain */
3483+
for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3484+
skb->prev = prev;
3485+
prev = skb;
3486+
}
3487+
3488+
for (skb = prev; skb; skb = prev) {
34803489
skb->next = NULL;
3490+
3491+
if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3492+
return;
3493+
3494+
prev = skb->prev;
34813495
napi_gro_complete(skb);
3496+
napi->gro_count--;
34823497
}
34833498

3484-
napi->gro_count = 0;
34853499
napi->gro_list = NULL;
34863500
}
34873501
EXPORT_SYMBOL(napi_gro_flush);
@@ -3542,6 +3556,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
35423556

35433557
napi->gro_count++;
35443558
NAPI_GRO_CB(skb)->count = 1;
3559+
NAPI_GRO_CB(skb)->age = jiffies;
35453560
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
35463561
skb->next = napi->gro_list;
35473562
napi->gro_list = skb;
@@ -3878,7 +3893,7 @@ void napi_complete(struct napi_struct *n)
38783893
if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
38793894
return;
38803895

3881-
napi_gro_flush(n);
3896+
napi_gro_flush(n, false);
38823897
local_irq_save(flags);
38833898
__napi_complete(n);
38843899
local_irq_restore(flags);
@@ -3983,8 +3998,17 @@ static void net_rx_action(struct softirq_action *h)
39833998
local_irq_enable();
39843999
napi_complete(n);
39854000
local_irq_disable();
3986-
} else
4001+
} else {
4002+
if (n->gro_list) {
4003+
/* flush too old packets
4004+
* If HZ < 1000, flush all packets.
4005+
*/
4006+
local_irq_enable();
4007+
napi_gro_flush(n, HZ >= 1000);
4008+
local_irq_disable();
4009+
}
39874010
list_move_tail(&n->poll_list, &sd->poll_list);
4011+
}
39884012
}
39894013

39904014
netpoll_poll_unlock(have);

0 commit comments

Comments
 (0)