Skip to content

Commit 7f953ab

Browse files
sowminivdavem330
authored andcommitted
af_packet: TX_RING support for TPACKET_V3
Although TPACKET_V3 Rx has some benefits over TPACKET_V2 Rx, *_v3 does not currently have TX_RING support. As a result an application that wants the best perf for Tx and Rx (e.g. to handle request/response transacations) ends up needing 2 sockets, one with *_v2 for Tx and another with *_v3 for Rx. This patch enables TPACKET_V2 compatible Tx features in TPACKET_V3 so that an application can use a single descriptor to get the benefits of _v3 RX_RING and _v2 TX_RING. An application may do a block-send by first filling up multiple frames in the Tx ring and then triggering a transmit. This patch only support fixed size Tx frames for TPACKET_V3, and requires that tp_next_offset must be zero. Signed-off-by: Sowmini Varadhan <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent e7072f6 commit 7f953ab

File tree

2 files changed

+37
-11
lines changed

2 files changed

+37
-11
lines changed

Documentation/networking/packet_mmap.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ TPACKET_V1 --> TPACKET_V2:
565565
(void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
566566

567567
TPACKET_V2 --> TPACKET_V3:
568-
- Flexible buffer implementation:
568+
- Flexible buffer implementation for RX_RING:
569569
1. Blocks can be configured with non-static frame-size
570570
2. Read/poll is at a block-level (as opposed to packet-level)
571571
3. Added poll timeout to avoid indefinite user-space wait
@@ -574,7 +574,12 @@ TPACKET_V2 --> TPACKET_V3:
574574
4.1 block::timeout
575575
4.2 tpkt_hdr::sk_rxhash
576576
- RX Hash data available in user space
577-
- Currently only RX_RING available
577+
- TX_RING semantics are conceptually similar to TPACKET_V2;
578+
use tpacket3_hdr instead of tpacket2_hdr, and TPACKET3_HDRLEN
579+
instead of TPACKET2_HDRLEN. In the current implementation,
580+
the tp_next_offset field in the tpacket3_hdr MUST be set to
581+
zero, indicating that the ring does not hold variable sized frames.
582+
Packets with non-zero values of tp_next_offset will be dropped.
578583

579584
-------------------------------------------------------------------------------
580585
+ AF_PACKET fanout mode

net/packet/af_packet.c

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
409409
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
410410
break;
411411
case TPACKET_V3:
412+
h.h3->tp_status = status;
413+
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
414+
break;
412415
default:
413416
WARN(1, "TPACKET version not supported.\n");
414417
BUG();
@@ -432,6 +435,8 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
432435
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
433436
return h.h2->tp_status;
434437
case TPACKET_V3:
438+
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
439+
return h.h3->tp_status;
435440
default:
436441
WARN(1, "TPACKET version not supported.\n");
437442
BUG();
@@ -2497,6 +2502,13 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
24972502
ph.raw = frame;
24982503

24992504
switch (po->tp_version) {
2505+
case TPACKET_V3:
2506+
if (ph.h3->tp_next_offset != 0) {
2507+
pr_warn_once("variable sized slot not supported");
2508+
return -EINVAL;
2509+
}
2510+
tp_len = ph.h3->tp_len;
2511+
break;
25002512
case TPACKET_V2:
25012513
tp_len = ph.h2->tp_len;
25022514
break;
@@ -2516,6 +2528,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
25162528
off_max = po->tx_ring.frame_size - tp_len;
25172529
if (po->sk.sk_type == SOCK_DGRAM) {
25182530
switch (po->tp_version) {
2531+
case TPACKET_V3:
2532+
off = ph.h3->tp_net;
2533+
break;
25192534
case TPACKET_V2:
25202535
off = ph.h2->tp_net;
25212536
break;
@@ -2525,6 +2540,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
25252540
}
25262541
} else {
25272542
switch (po->tp_version) {
2543+
case TPACKET_V3:
2544+
off = ph.h3->tp_mac;
2545+
break;
25282546
case TPACKET_V2:
25292547
off = ph.h2->tp_mac;
25302548
break;
@@ -4113,11 +4131,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
41134131
struct tpacket_req *req = &req_u->req;
41144132

41154133
lock_sock(sk);
4116-
/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
4117-
if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
4118-
net_warn_ratelimited("Tx-ring is not supported.\n");
4119-
goto out;
4120-
}
41214134

41224135
rb = tx_ring ? &po->tx_ring : &po->rx_ring;
41234136
rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
@@ -4177,11 +4190,19 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
41774190
goto out;
41784191
switch (po->tp_version) {
41794192
case TPACKET_V3:
4180-
/* Transmit path is not supported. We checked
4181-
* it above but just being paranoid
4182-
*/
4183-
if (!tx_ring)
4193+
/* Block transmit is not supported yet */
4194+
if (!tx_ring) {
41844195
init_prb_bdqc(po, rb, pg_vec, req_u);
4196+
} else {
4197+
struct tpacket_req3 *req3 = &req_u->req3;
4198+
4199+
if (req3->tp_retire_blk_tov ||
4200+
req3->tp_sizeof_priv ||
4201+
req3->tp_feature_req_word) {
4202+
err = -EINVAL;
4203+
goto out;
4204+
}
4205+
}
41854206
break;
41864207
default:
41874208
break;

0 commit comments

Comments
 (0)