Skip to content

Commit bf25146

Browse files
chaudronAlexei Starovoitov
authored andcommitted
bpf: add frags support to the bpf_xdp_adjust_tail() API
This change adds support for tail growing and shrinking for XDP frags. When called on a non-linear packet with a grow request, it will work on the last fragment of the packet. So the maximum grow size is the last fragments tailroom, i.e. no new buffer will be allocated. A XDP frags capable driver is expected to set frag_size in xdp_rxq_info data structure to notify the XDP core the fragment size. frag_size set to 0 is interpreted by the XDP core as tail growing is not allowed. Introduce __xdp_rxq_info_reg utility routine to initialize frag_size field. When shrinking, it will work from the last fragment, all the way down to the base buffer depending on the shrinking size. It's important to mention that once you shrink down the fragment(s) are freed, so you can not grow again to the original size. Acked-by: Toke Hoiland-Jorgensen <[email protected]> Acked-by: John Fastabend <[email protected]> Acked-by: Jakub Kicinski <[email protected]> Co-developed-by: Lorenzo Bianconi <[email protected]> Signed-off-by: Lorenzo Bianconi <[email protected]> Signed-off-by: Eelco Chaudron <[email protected]> Link: https://lore.kernel.org/r/eabda3485dda4f2f158b477729337327e609461d.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 0165cc8 commit bf25146

File tree

4 files changed

+88
-8
lines changed

4 files changed

+88
-8
lines changed

drivers/net/ethernet/marvell/mvneta.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3298,7 +3298,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
32983298
return err;
32993299
}
33003300

3301-
err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
3301+
err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
3302+
PAGE_SIZE);
33023303
if (err < 0)
33033304
goto err_free_pp;
33043305

include/net/xdp.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct xdp_rxq_info {
6060
u32 reg_state;
6161
struct xdp_mem_info mem;
6262
unsigned int napi_id;
63+
u32 frag_size;
6364
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
6465

6566
struct xdp_txq_info {
@@ -304,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
304305
return xdp_frame;
305306
}
306307

308+
void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
309+
struct xdp_buff *xdp);
307310
void xdp_return_frame(struct xdp_frame *xdpf);
308311
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
309312
void xdp_return_buff(struct xdp_buff *xdp);
@@ -340,8 +343,17 @@ static inline void xdp_release_frame(struct xdp_frame *xdpf)
340343
__xdp_release_frame(xdpf->data, mem);
341344
}
342345

343-
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
344-
struct net_device *dev, u32 queue_index, unsigned int napi_id);
346+
int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
347+
struct net_device *dev, u32 queue_index,
348+
unsigned int napi_id, u32 frag_size);
349+
static inline int
350+
xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
351+
struct net_device *dev, u32 queue_index,
352+
unsigned int napi_id)
353+
{
354+
return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
355+
}
356+
345357
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
346358
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
347359
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);

net/core/filter.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3830,11 +3830,76 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
38303830
.arg2_type = ARG_ANYTHING,
38313831
};
38323832

3833+
static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
3834+
{
3835+
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
3836+
skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
3837+
struct xdp_rxq_info *rxq = xdp->rxq;
3838+
unsigned int tailroom;
3839+
3840+
if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
3841+
return -EOPNOTSUPP;
3842+
3843+
tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
3844+
if (unlikely(offset > tailroom))
3845+
return -EINVAL;
3846+
3847+
memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
3848+
skb_frag_size_add(frag, offset);
3849+
sinfo->xdp_frags_size += offset;
3850+
3851+
return 0;
3852+
}
3853+
3854+
static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
3855+
{
3856+
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
3857+
int i, n_frags_free = 0, len_free = 0;
3858+
3859+
if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
3860+
return -EINVAL;
3861+
3862+
for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
3863+
skb_frag_t *frag = &sinfo->frags[i];
3864+
int shrink = min_t(int, offset, skb_frag_size(frag));
3865+
3866+
len_free += shrink;
3867+
offset -= shrink;
3868+
3869+
if (skb_frag_size(frag) == shrink) {
3870+
struct page *page = skb_frag_page(frag);
3871+
3872+
__xdp_return(page_address(page), &xdp->rxq->mem,
3873+
false, NULL);
3874+
n_frags_free++;
3875+
} else {
3876+
skb_frag_size_sub(frag, shrink);
3877+
break;
3878+
}
3879+
}
3880+
sinfo->nr_frags -= n_frags_free;
3881+
sinfo->xdp_frags_size -= len_free;
3882+
3883+
if (unlikely(!sinfo->nr_frags)) {
3884+
xdp_buff_clear_frags_flag(xdp);
3885+
xdp->data_end -= offset;
3886+
}
3887+
3888+
return 0;
3889+
}
3890+
38333891
BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
38343892
{
38353893
void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
38363894
void *data_end = xdp->data_end + offset;
38373895

3896+
if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
3897+
if (offset < 0)
3898+
return bpf_xdp_frags_shrink_tail(xdp, -offset);
3899+
3900+
return bpf_xdp_frags_increase_tail(xdp, offset);
3901+
}
3902+
38383903
/* Notice that xdp_data_hard_end have reserved some tailroom */
38393904
if (unlikely(data_end > data_hard_end))
38403905
return -EINVAL;

net/core/xdp.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
162162
}
163163

164164
/* Returns 0 on success, negative on failure */
165-
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
166-
struct net_device *dev, u32 queue_index, unsigned int napi_id)
165+
int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
166+
struct net_device *dev, u32 queue_index,
167+
unsigned int napi_id, u32 frag_size)
167168
{
168169
if (!dev) {
169170
WARN(1, "Missing net_device from driver");
@@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
185186
xdp_rxq->dev = dev;
186187
xdp_rxq->queue_index = queue_index;
187188
xdp_rxq->napi_id = napi_id;
189+
xdp_rxq->frag_size = frag_size;
188190

189191
xdp_rxq->reg_state = REG_STATE_REGISTERED;
190192
return 0;
191193
}
192-
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
194+
EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
193195

194196
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
195197
{
@@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
369371
* is used for those calls sites. Thus, allowing for faster recycling
370372
* of xdp_frames/pages in those cases.
371373
*/
372-
static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
373-
struct xdp_buff *xdp)
374+
void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
375+
struct xdp_buff *xdp)
374376
{
375377
struct xdp_mem_allocator *xa;
376378
struct page *page;

0 commit comments

Comments
 (0)