Skip to content

Commit aa276dd

Browse files
committed
Merge branch 'TPACKET_V3-TX_RING-support'
Sowmini Varadhan says: ==================== TPACKET_V3 TX_RING support This patch series allows an application to use a single PF_PACKET descriptor and leverage the best implementations of TX_RING and RX_RING that exist today. Patch 1 adds the kernel/Documentation changes for TX_RING support and patch2 adds the associated test case in selftests. Changes since v2: additional sanity checks for setsockopt input for TX_RING/TPACKET_V3. Refactored psock_tpacket.c test code to avoid code duplication from V2. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents e7072f6 + fe878ca commit aa276dd

File tree

3 files changed

+111
-28
lines changed

3 files changed

+111
-28
lines changed

Documentation/networking/packet_mmap.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ TPACKET_V1 --> TPACKET_V2:
565565
(void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
566566

567567
TPACKET_V2 --> TPACKET_V3:
568-
- Flexible buffer implementation:
568+
- Flexible buffer implementation for RX_RING:
569569
1. Blocks can be configured with non-static frame-size
570570
2. Read/poll is at a block-level (as opposed to packet-level)
571571
3. Added poll timeout to avoid indefinite user-space wait
@@ -574,7 +574,12 @@ TPACKET_V2 --> TPACKET_V3:
574574
4.1 block::timeout
575575
4.2 tpkt_hdr::sk_rxhash
576576
- RX Hash data available in user space
577-
- Currently only RX_RING available
577+
- TX_RING semantics are conceptually similar to TPACKET_V2;
578+
use tpacket3_hdr instead of tpacket2_hdr, and TPACKET3_HDRLEN
579+
instead of TPACKET2_HDRLEN. In the current implementation,
580+
the tp_next_offset field in the tpacket3_hdr MUST be set to
581+
zero, indicating that the ring does not hold variable sized frames.
582+
Packets with non-zero values of tp_next_offset will be dropped.
578583

579584
-------------------------------------------------------------------------------
580585
+ AF_PACKET fanout mode

net/packet/af_packet.c

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
409409
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
410410
break;
411411
case TPACKET_V3:
412+
h.h3->tp_status = status;
413+
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
414+
break;
412415
default:
413416
WARN(1, "TPACKET version not supported.\n");
414417
BUG();
@@ -432,6 +435,8 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
432435
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
433436
return h.h2->tp_status;
434437
case TPACKET_V3:
438+
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
439+
return h.h3->tp_status;
435440
default:
436441
WARN(1, "TPACKET version not supported.\n");
437442
BUG();
@@ -2497,6 +2502,13 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
24972502
ph.raw = frame;
24982503

24992504
switch (po->tp_version) {
2505+
case TPACKET_V3:
2506+
if (ph.h3->tp_next_offset != 0) {
2507+
pr_warn_once("variable sized slot not supported");
2508+
return -EINVAL;
2509+
}
2510+
tp_len = ph.h3->tp_len;
2511+
break;
25002512
case TPACKET_V2:
25012513
tp_len = ph.h2->tp_len;
25022514
break;
@@ -2516,6 +2528,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
25162528
off_max = po->tx_ring.frame_size - tp_len;
25172529
if (po->sk.sk_type == SOCK_DGRAM) {
25182530
switch (po->tp_version) {
2531+
case TPACKET_V3:
2532+
off = ph.h3->tp_net;
2533+
break;
25192534
case TPACKET_V2:
25202535
off = ph.h2->tp_net;
25212536
break;
@@ -2525,6 +2540,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
25252540
}
25262541
} else {
25272542
switch (po->tp_version) {
2543+
case TPACKET_V3:
2544+
off = ph.h3->tp_mac;
2545+
break;
25282546
case TPACKET_V2:
25292547
off = ph.h2->tp_mac;
25302548
break;
@@ -4113,11 +4131,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
41134131
struct tpacket_req *req = &req_u->req;
41144132

41154133
lock_sock(sk);
4116-
/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
4117-
if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
4118-
net_warn_ratelimited("Tx-ring is not supported.\n");
4119-
goto out;
4120-
}
41214134

41224135
rb = tx_ring ? &po->tx_ring : &po->rx_ring;
41234136
rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
@@ -4177,11 +4190,19 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
41774190
goto out;
41784191
switch (po->tp_version) {
41794192
case TPACKET_V3:
4180-
/* Transmit path is not supported. We checked
4181-
* it above but just being paranoid
4182-
*/
4183-
if (!tx_ring)
4193+
/* Block transmit is not supported yet */
4194+
if (!tx_ring) {
41844195
init_prb_bdqc(po, rb, pg_vec, req_u);
4196+
} else {
4197+
struct tpacket_req3 *req3 = &req_u->req3;
4198+
4199+
if (req3->tp_retire_blk_tov ||
4200+
req3->tp_sizeof_priv ||
4201+
req3->tp_feature_req_word) {
4202+
err = -EINVAL;
4203+
goto out;
4204+
}
4205+
}
41854206
break;
41864207
default:
41874208
break;

tools/testing/selftests/net/psock_tpacket.c

Lines changed: 74 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -311,20 +311,33 @@ static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
311311
__sync_synchronize();
312312
}
313313

314-
static inline int __v1_v2_tx_kernel_ready(void *base, int version)
314+
static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr)
315+
{
316+
return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
317+
}
318+
319+
static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr)
320+
{
321+
hdr->tp_status = TP_STATUS_SEND_REQUEST;
322+
__sync_synchronize();
323+
}
324+
325+
static inline int __tx_kernel_ready(void *base, int version)
315326
{
316327
switch (version) {
317328
case TPACKET_V1:
318329
return __v1_tx_kernel_ready(base);
319330
case TPACKET_V2:
320331
return __v2_tx_kernel_ready(base);
332+
case TPACKET_V3:
333+
return __v3_tx_kernel_ready(base);
321334
default:
322335
bug_on(1);
323336
return 0;
324337
}
325338
}
326339

327-
static inline void __v1_v2_tx_user_ready(void *base, int version)
340+
static inline void __tx_user_ready(void *base, int version)
328341
{
329342
switch (version) {
330343
case TPACKET_V1:
@@ -333,6 +346,9 @@ static inline void __v1_v2_tx_user_ready(void *base, int version)
333346
case TPACKET_V2:
334347
__v2_tx_user_ready(base);
335348
break;
349+
case TPACKET_V3:
350+
__v3_tx_user_ready(base);
351+
break;
336352
}
337353
}
338354

@@ -348,7 +364,22 @@ static void __v1_v2_set_packet_loss_discard(int sock)
348364
}
349365
}
350366

351-
static void walk_v1_v2_tx(int sock, struct ring *ring)
367+
static inline void *get_next_frame(struct ring *ring, int n)
368+
{
369+
uint8_t *f0 = ring->rd[0].iov_base;
370+
371+
switch (ring->version) {
372+
case TPACKET_V1:
373+
case TPACKET_V2:
374+
return ring->rd[n].iov_base;
375+
case TPACKET_V3:
376+
return f0 + (n * ring->req3.tp_frame_size);
377+
default:
378+
bug_on(1);
379+
}
380+
}
381+
382+
static void walk_tx(int sock, struct ring *ring)
352383
{
353384
struct pollfd pfd;
354385
int rcv_sock, ret;
@@ -360,9 +391,19 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
360391
.sll_family = PF_PACKET,
361392
.sll_halen = ETH_ALEN,
362393
};
394+
int nframes;
395+
396+
/* TPACKET_V{1,2} sets up the ring->rd* related variables based
397+
* on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these
398+
* up based on blocks (e.g, rd_num is tp_block_nr)
399+
*/
400+
if (ring->version <= TPACKET_V2)
401+
nframes = ring->rd_num;
402+
else
403+
nframes = ring->req3.tp_frame_nr;
363404

364405
bug_on(ring->type != PACKET_TX_RING);
365-
bug_on(ring->rd_num < NUM_PACKETS);
406+
bug_on(nframes < NUM_PACKETS);
366407

367408
rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
368409
if (rcv_sock == -1) {
@@ -388,10 +429,11 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
388429
create_payload(packet, &packet_len);
389430

390431
while (total_packets > 0) {
391-
while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base,
392-
ring->version) &&
432+
void *next = get_next_frame(ring, frame_num);
433+
434+
while (__tx_kernel_ready(next, ring->version) &&
393435
total_packets > 0) {
394-
ppd.raw = ring->rd[frame_num].iov_base;
436+
ppd.raw = next;
395437

396438
switch (ring->version) {
397439
case TPACKET_V1:
@@ -413,14 +455,27 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
413455
packet_len);
414456
total_bytes += ppd.v2->tp_h.tp_snaplen;
415457
break;
458+
case TPACKET_V3: {
459+
struct tpacket3_hdr *tx = next;
460+
461+
tx->tp_snaplen = packet_len;
462+
tx->tp_len = packet_len;
463+
tx->tp_next_offset = 0;
464+
465+
memcpy((uint8_t *)tx + TPACKET3_HDRLEN -
466+
sizeof(struct sockaddr_ll), packet,
467+
packet_len);
468+
total_bytes += tx->tp_snaplen;
469+
break;
470+
}
416471
}
417472

418473
status_bar_update();
419474
total_packets--;
420475

421-
__v1_v2_tx_user_ready(ppd.raw, ring->version);
476+
__tx_user_ready(next, ring->version);
422477

423-
frame_num = (frame_num + 1) % ring->rd_num;
478+
frame_num = (frame_num + 1) % nframes;
424479
}
425480

426481
poll(&pfd, 1, 1);
@@ -460,7 +515,7 @@ static void walk_v1_v2(int sock, struct ring *ring)
460515
if (ring->type == PACKET_RX_RING)
461516
walk_v1_v2_rx(sock, ring);
462517
else
463-
walk_v1_v2_tx(sock, ring);
518+
walk_tx(sock, ring);
464519
}
465520

466521
static uint64_t __v3_prev_block_seq_num = 0;
@@ -583,7 +638,7 @@ static void walk_v3(int sock, struct ring *ring)
583638
if (ring->type == PACKET_RX_RING)
584639
walk_v3_rx(sock, ring);
585640
else
586-
bug_on(1);
641+
walk_tx(sock, ring);
587642
}
588643

589644
static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
@@ -602,12 +657,13 @@ static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
602657
ring->flen = ring->req.tp_frame_size;
603658
}
604659

605-
static void __v3_fill(struct ring *ring, unsigned int blocks)
660+
static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
606661
{
607-
ring->req3.tp_retire_blk_tov = 64;
608-
ring->req3.tp_sizeof_priv = 0;
609-
ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
610-
662+
if (type == PACKET_RX_RING) {
663+
ring->req3.tp_retire_blk_tov = 64;
664+
ring->req3.tp_sizeof_priv = 0;
665+
ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
666+
}
611667
ring->req3.tp_block_size = getpagesize() << 2;
612668
ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
613669
ring->req3.tp_block_nr = blocks;
@@ -641,7 +697,7 @@ static void setup_ring(int sock, struct ring *ring, int version, int type)
641697
break;
642698

643699
case TPACKET_V3:
644-
__v3_fill(ring, blocks);
700+
__v3_fill(ring, blocks, type);
645701
ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
646702
sizeof(ring->req3));
647703
break;
@@ -796,6 +852,7 @@ int main(void)
796852
ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
797853

798854
ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
855+
ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING);
799856

800857
if (ret)
801858
return 1;

0 commit comments

Comments
 (0)