@@ -87,7 +87,8 @@ int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy)
87
87
rx_bd = (struct eth_rx_bd * )qed_chain_produce (& rxq -> rx_bd_ring );
88
88
WARN_ON (!rx_bd );
89
89
rx_bd -> addr .hi = cpu_to_le32 (upper_32_bits (mapping ));
90
- rx_bd -> addr .lo = cpu_to_le32 (lower_32_bits (mapping ));
90
+ rx_bd -> addr .lo = cpu_to_le32 (lower_32_bits (mapping ) +
91
+ rxq -> rx_headroom );
91
92
92
93
rxq -> sw_rx_prod ++ ;
93
94
rxq -> filled_buffers ++ ;
@@ -360,7 +361,8 @@ static int qede_xdp_xmit(struct qede_dev *edev, struct qede_fastpath *fp,
360
361
metadata -> mapping + padding ,
361
362
length , PCI_DMA_TODEVICE );
362
363
363
- txq -> sw_tx_ring .pages [idx ] = metadata -> data ;
364
+ txq -> sw_tx_ring .xdp [idx ].page = metadata -> data ;
365
+ txq -> sw_tx_ring .xdp [idx ].mapping = metadata -> mapping ;
364
366
txq -> sw_tx_prod ++ ;
365
367
366
368
/* Mark the fastpath for future XDP doorbell */
@@ -384,19 +386,19 @@ int qede_txq_has_work(struct qede_tx_queue *txq)
384
386
385
387
static void qede_xdp_tx_int (struct qede_dev * edev , struct qede_tx_queue * txq )
386
388
{
387
- struct eth_tx_1st_bd * bd ;
388
- u16 hw_bd_cons ;
389
+ u16 hw_bd_cons , idx ;
389
390
390
391
hw_bd_cons = le16_to_cpu (* txq -> hw_cons_ptr );
391
392
barrier ();
392
393
393
394
while (hw_bd_cons != qed_chain_get_cons_idx (& txq -> tx_pbl )) {
394
- bd = (struct eth_tx_1st_bd * )qed_chain_consume (& txq -> tx_pbl );
395
+ qed_chain_consume (& txq -> tx_pbl );
396
+ idx = txq -> sw_tx_cons & NUM_TX_BDS_MAX ;
395
397
396
- dma_unmap_single (& edev -> pdev -> dev , BD_UNMAP_ADDR ( bd ) ,
397
- PAGE_SIZE , DMA_BIDIRECTIONAL );
398
- __free_page ( txq -> sw_tx_ring . pages [ txq -> sw_tx_cons &
399
- NUM_TX_BDS_MAX ] );
398
+ dma_unmap_page (& edev -> pdev -> dev ,
399
+ txq -> sw_tx_ring . xdp [ idx ]. mapping ,
400
+ PAGE_SIZE , DMA_BIDIRECTIONAL );
401
+ __free_page ( txq -> sw_tx_ring . xdp [ idx ]. page );
400
402
401
403
txq -> sw_tx_cons ++ ;
402
404
txq -> xmit_pkts ++ ;
@@ -508,7 +510,8 @@ static inline void qede_reuse_page(struct qede_rx_queue *rxq,
508
510
new_mapping = curr_prod -> mapping + curr_prod -> page_offset ;
509
511
510
512
rx_bd_prod -> addr .hi = cpu_to_le32 (upper_32_bits (new_mapping ));
511
- rx_bd_prod -> addr .lo = cpu_to_le32 (lower_32_bits (new_mapping ));
513
+ rx_bd_prod -> addr .lo = cpu_to_le32 (lower_32_bits (new_mapping ) +
514
+ rxq -> rx_headroom );
512
515
513
516
rxq -> sw_rx_prod ++ ;
514
517
curr_cons -> data = NULL ;
@@ -624,7 +627,6 @@ static inline void qede_skb_receive(struct qede_dev *edev,
624
627
__vlan_hwaccel_put_tag (skb , htons (ETH_P_8021Q ), vlan_tag );
625
628
626
629
napi_gro_receive (& fp -> napi , skb );
627
- rxq -> rcv_pkts ++ ;
628
630
}
629
631
630
632
static void qede_set_gro_params (struct qede_dev * edev ,
@@ -884,9 +886,9 @@ static inline void qede_tpa_cont(struct qede_dev *edev,
884
886
"Strange - TPA cont with more than a single len_list entry\n" );
885
887
}
886
888
887
- static void qede_tpa_end (struct qede_dev * edev ,
888
- struct qede_fastpath * fp ,
889
- struct eth_fast_path_rx_tpa_end_cqe * cqe )
889
+ static int qede_tpa_end (struct qede_dev * edev ,
890
+ struct qede_fastpath * fp ,
891
+ struct eth_fast_path_rx_tpa_end_cqe * cqe )
890
892
{
891
893
struct qede_rx_queue * rxq = fp -> rxq ;
892
894
struct qede_agg_info * tpa_info ;
@@ -934,11 +936,12 @@ static void qede_tpa_end(struct qede_dev *edev,
934
936
935
937
tpa_info -> state = QEDE_AGG_STATE_NONE ;
936
938
937
- return ;
939
+ return 1 ;
938
940
err :
939
941
tpa_info -> state = QEDE_AGG_STATE_NONE ;
940
942
dev_kfree_skb_any (tpa_info -> skb );
941
943
tpa_info -> skb = NULL ;
944
+ return 0 ;
942
945
}
943
946
944
947
static u8 qede_check_notunn_csum (u16 flag )
@@ -990,14 +993,15 @@ static bool qede_rx_xdp(struct qede_dev *edev,
990
993
struct qede_rx_queue * rxq ,
991
994
struct bpf_prog * prog ,
992
995
struct sw_rx_data * bd ,
993
- struct eth_fast_path_rx_reg_cqe * cqe )
996
+ struct eth_fast_path_rx_reg_cqe * cqe ,
997
+ u16 * data_offset , u16 * len )
994
998
{
995
- u16 len = le16_to_cpu (cqe -> len_on_first_bd );
996
999
struct xdp_buff xdp ;
997
1000
enum xdp_action act ;
998
1001
999
- xdp .data = page_address (bd -> data ) + cqe -> placement_offset ;
1000
- xdp .data_end = xdp .data + len ;
1002
+ xdp .data_hard_start = page_address (bd -> data );
1003
+ xdp .data = xdp .data_hard_start + * data_offset ;
1004
+ xdp .data_end = xdp .data + * len ;
1001
1005
1002
1006
/* Queues always have a full reset currently, so for the time
1003
1007
* being until there's atomic program replace just mark read
@@ -1007,6 +1011,10 @@ static bool qede_rx_xdp(struct qede_dev *edev,
1007
1011
act = bpf_prog_run_xdp (prog , & xdp );
1008
1012
rcu_read_unlock ();
1009
1013
1014
+ /* Recalculate, as XDP might have changed the headers */
1015
+ * data_offset = xdp .data - xdp .data_hard_start ;
1016
+ * len = xdp .data_end - xdp .data ;
1017
+
1010
1018
if (act == XDP_PASS )
1011
1019
return true;
1012
1020
@@ -1025,7 +1033,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
1025
1033
/* Now if there's a transmission problem, we'd still have to
1026
1034
* throw current buffer, as replacement was already allocated.
1027
1035
*/
1028
- if (qede_xdp_xmit (edev , fp , bd , cqe -> placement_offset , len )) {
1036
+ if (qede_xdp_xmit (edev , fp , bd , * data_offset , * len )) {
1029
1037
dma_unmap_page (rxq -> dev , bd -> mapping ,
1030
1038
PAGE_SIZE , DMA_BIDIRECTIONAL );
1031
1039
__free_page (bd -> data );
@@ -1052,7 +1060,7 @@ static struct sk_buff *qede_rx_allocate_skb(struct qede_dev *edev,
1052
1060
struct sw_rx_data * bd , u16 len ,
1053
1061
u16 pad )
1054
1062
{
1055
- unsigned int offset = bd -> page_offset ;
1063
+ unsigned int offset = bd -> page_offset + pad ;
1056
1064
struct skb_frag_struct * frag ;
1057
1065
struct page * page = bd -> data ;
1058
1066
unsigned int pull_len ;
@@ -1069,15 +1077,15 @@ static struct sk_buff *qede_rx_allocate_skb(struct qede_dev *edev,
1069
1077
*/
1070
1078
if (len + pad <= edev -> rx_copybreak ) {
1071
1079
memcpy (skb_put (skb , len ),
1072
- page_address (page ) + pad + offset , len );
1080
+ page_address (page ) + offset , len );
1073
1081
qede_reuse_page (rxq , bd );
1074
1082
goto out ;
1075
1083
}
1076
1084
1077
1085
frag = & skb_shinfo (skb )-> frags [0 ];
1078
1086
1079
1087
skb_add_rx_frag (skb , skb_shinfo (skb )-> nr_frags ,
1080
- page , pad + offset , len , rxq -> rx_buf_seg_size );
1088
+ page , offset , len , rxq -> rx_buf_seg_size );
1081
1089
1082
1090
va = skb_frag_address (frag );
1083
1091
pull_len = eth_get_headlen (va , QEDE_RX_HDR_SIZE );
@@ -1178,8 +1186,7 @@ static int qede_rx_process_tpa_cqe(struct qede_dev *edev,
1178
1186
qede_tpa_cont (edev , rxq , & cqe -> fast_path_tpa_cont );
1179
1187
return 0 ;
1180
1188
case ETH_RX_CQE_TYPE_TPA_END :
1181
- qede_tpa_end (edev , fp , & cqe -> fast_path_tpa_end );
1182
- return 1 ;
1189
+ return qede_tpa_end (edev , fp , & cqe -> fast_path_tpa_end );
1183
1190
default :
1184
1191
return 0 ;
1185
1192
}
@@ -1224,12 +1231,13 @@ static int qede_rx_process_cqe(struct qede_dev *edev,
1224
1231
1225
1232
fp_cqe = & cqe -> fast_path_regular ;
1226
1233
len = le16_to_cpu (fp_cqe -> len_on_first_bd );
1227
- pad = fp_cqe -> placement_offset ;
1234
+ pad = fp_cqe -> placement_offset + rxq -> rx_headroom ;
1228
1235
1229
1236
/* Run eBPF program if one is attached */
1230
1237
if (xdp_prog )
1231
- if (!qede_rx_xdp (edev , fp , rxq , xdp_prog , bd , fp_cqe ))
1232
- return 1 ;
1238
+ if (!qede_rx_xdp (edev , fp , rxq , xdp_prog , bd , fp_cqe ,
1239
+ & pad , & len ))
1240
+ return 0 ;
1233
1241
1234
1242
/* If this is an error packet then drop it */
1235
1243
flags = cqe -> fast_path_regular .pars_flags .flags ;
@@ -1290,8 +1298,8 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
1290
1298
{
1291
1299
struct qede_rx_queue * rxq = fp -> rxq ;
1292
1300
struct qede_dev * edev = fp -> edev ;
1301
+ int work_done = 0 , rcv_pkts = 0 ;
1293
1302
u16 hw_comp_cons , sw_comp_cons ;
1294
- int work_done = 0 ;
1295
1303
1296
1304
hw_comp_cons = le16_to_cpu (* rxq -> hw_cons_ptr );
1297
1305
sw_comp_cons = qed_chain_get_cons_idx (& rxq -> rx_comp_ring );
@@ -1305,12 +1313,14 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
1305
1313
1306
1314
/* Loop to complete all indicated BDs */
1307
1315
while ((sw_comp_cons != hw_comp_cons ) && (work_done < budget )) {
1308
- qede_rx_process_cqe (edev , fp , rxq );
1316
+ rcv_pkts += qede_rx_process_cqe (edev , fp , rxq );
1309
1317
qed_chain_recycle_consumed (& rxq -> rx_comp_ring );
1310
1318
sw_comp_cons = qed_chain_get_cons_idx (& rxq -> rx_comp_ring );
1311
1319
work_done ++ ;
1312
1320
}
1313
1321
1322
+ rxq -> rcv_pkts += rcv_pkts ;
1323
+
1314
1324
/* Allocate replacement buffers */
1315
1325
while (rxq -> num_rx_buffers - rxq -> filled_buffers )
1316
1326
if (qede_alloc_rx_buffer (rxq , false))
0 commit comments