Skip to content

Commit 9954729

Browse files
wdebruijdavem330
authored andcommitted
packet: rollover only to socket with headroom
Only migrate flows to sockets that have sufficient headroom, where sufficient is defined as having at least 25% empty space. The kernel has three different buffer types: a regular socket, a ring with frames (TPACKET_V[12]) or a ring with blocks (TPACKET_V3). The latter two do not expose a read pointer to the kernel, so headroom is not computed easily. All three needs a different implementation to estimate free space. Tested: Ran bench_rollover for 10 sec with 1.5 Mpps of single flow input. bench_rollover has as many sockets as there are NIC receive queues in the system. Each socket is owned by a process that is pinned to one of the receive cpus. RFS is disabled. RPS is enabled with an identity mapping (cpu x -> cpu x), to count drops with softnettop. lpbb5:/export/hda3/willemb# ./bench_rollover -r -l 1000 -s Press [Enter] to exit cpu rx rx.k drop.k rollover r.huge r.failed 0 16 16 0 0 0 0 1 21 21 0 0 0 0 2 5227502 5227502 0 0 0 0 3 18 18 0 0 0 0 4 6083289 6083289 0 5227496 0 0 5 22 22 0 0 0 0 6 21 21 0 0 0 0 7 9 9 0 0 0 0 Signed-off-by: Willem de Bruijn <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 0648ab7 commit 9954729

File tree

1 file changed

+59
-17
lines changed

1 file changed

+59
-17
lines changed

net/packet/af_packet.c

Lines changed: 59 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,27 +1234,68 @@ static void packet_free_pending(struct packet_sock *po)
12341234
free_percpu(po->tx_ring.pending_refcnt);
12351235
}
12361236

1237-
static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1237+
#define ROOM_POW_OFF 2
1238+
#define ROOM_NONE 0x0
1239+
#define ROOM_LOW 0x1
1240+
#define ROOM_NORMAL 0x2
1241+
1242+
static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
12381243
{
1239-
struct sock *sk = &po->sk;
1240-
bool has_room;
1244+
int idx, len;
1245+
1246+
len = po->rx_ring.frame_max + 1;
1247+
idx = po->rx_ring.head;
1248+
if (pow_off)
1249+
idx += len >> pow_off;
1250+
if (idx >= len)
1251+
idx -= len;
1252+
return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1253+
}
12411254

1242-
if (po->prot_hook.func != tpacket_rcv)
1243-
return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
1244-
<= sk->sk_rcvbuf;
1255+
static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
1256+
{
1257+
int idx, len;
1258+
1259+
len = po->rx_ring.prb_bdqc.knum_blocks;
1260+
idx = po->rx_ring.prb_bdqc.kactive_blk_num;
1261+
if (pow_off)
1262+
idx += len >> pow_off;
1263+
if (idx >= len)
1264+
idx -= len;
1265+
return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1266+
}
1267+
1268+
static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1269+
{
1270+
struct sock *sk = &po->sk;
1271+
int ret = ROOM_NONE;
1272+
1273+
if (po->prot_hook.func != tpacket_rcv) {
1274+
int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1275+
- skb->truesize;
1276+
if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
1277+
return ROOM_NORMAL;
1278+
else if (avail > 0)
1279+
return ROOM_LOW;
1280+
else
1281+
return ROOM_NONE;
1282+
}
12451283

12461284
spin_lock(&sk->sk_receive_queue.lock);
1247-
if (po->tp_version == TPACKET_V3)
1248-
has_room = prb_lookup_block(po, &po->rx_ring,
1249-
po->rx_ring.prb_bdqc.kactive_blk_num,
1250-
TP_STATUS_KERNEL);
1251-
else
1252-
has_room = packet_lookup_frame(po, &po->rx_ring,
1253-
po->rx_ring.head,
1254-
TP_STATUS_KERNEL);
1285+
if (po->tp_version == TPACKET_V3) {
1286+
if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
1287+
ret = ROOM_NORMAL;
1288+
else if (__tpacket_v3_has_room(po, 0))
1289+
ret = ROOM_LOW;
1290+
} else {
1291+
if (__tpacket_has_room(po, ROOM_POW_OFF))
1292+
ret = ROOM_NORMAL;
1293+
else if (__tpacket_has_room(po, 0))
1294+
ret = ROOM_LOW;
1295+
}
12551296
spin_unlock(&sk->sk_receive_queue.lock);
12561297

1257-
return has_room;
1298+
return ret;
12581299
}
12591300

12601301
static void packet_sock_destruct(struct sock *sk)
@@ -1325,12 +1366,13 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
13251366
unsigned int i, j;
13261367

13271368
po = pkt_sk(f->arr[idx]);
1328-
if (try_self && packet_rcv_has_room(po, skb))
1369+
if (try_self && packet_rcv_has_room(po, skb) != ROOM_NONE)
13291370
return idx;
13301371

13311372
i = j = min_t(int, po->rollover->sock, num - 1);
13321373
do {
1333-
if (i != idx && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
1374+
if (i != idx &&
1375+
packet_rcv_has_room(pkt_sk(f->arr[i]), skb) == ROOM_NORMAL) {
13341376
if (i != j)
13351377
po->rollover->sock = i;
13361378
return i;

0 commit comments

Comments
 (0)