Skip to content

Commit 4a54877

Browse files
committed
Merge branch 'sk_buff-add-extension-infrastructure'
Florian Westphal says: ==================== sk_buff: add extension infrastructure TL;DR: - objdiff shows no change if CONFIG_XFRM=n && BR_NETFILTER=n - small size reduction when one or both options are set - no changes in ipsec performance Changes since v1: - Allocate entire extension space from a kmem_cache. - Avoid atomic_dec_and_test operation on skb_ext_put() for refcnt == 1 case. (similar to kfree_skbmem() fclone_ref use). This adds an optional extension infrastructure, with ispec (xfrm) and bridge netfilter as first users. The third (future) user is Multipath TCP which is still out-of-tree. MPTCP needs to map logical mptcp sequence numbers to the tcp sequence numbers used by individual subflows. This DSS mapping is read/written from tcp option space on receive and written to tcp option space on transmitted tcp packets that are part of and MPTCP connection. Extending skb_shared_info or adding a private data field to skb fclones doesn't work for incoming skb, so a different DSS propagation method would be required for the receive side. mptcp has same requirements as secpath/bridge netfilter: 1. extension memory is released when the sk_buff is free'd. 2. data is shared after cloning an skb (clone inherits extension) 3. adding extension to an skb will COW the extension buffer if needed. Two new members are added to sk_buff: 1. 'active_extensions' byte (filling a hole), telling which extensions are available for this skb. This has two purposes. a) avoids the need to initialize the pointer. b) allows to "delete" an extension by clearing its bit value in ->active_extensions. While it would be possible to store the active_extensions byte in the extension struct instead of sk_buff, there is one problem with this: When an extension has to be disabled, we can always clear the bit in skb->active_extensions. But in case it would be stored in the extension buffer itself, we might have to COW it first, if we are dealing with a cloned skb. On kmalloc failure we would be unable to turn an extension off. 2. extension pointer, located at the end of the sk_buff. If the active_extensions byte is 0, the pointer is undefined, it is not initialized on skb allocation. This adds extra code to skb clone and free paths (to deal with refcount/free of extension area) but this replaces similar code that manages skb->nf_bridge and skb->sp structs in the followup patches of the series. It is possible to add support for extensions that are not preseved on clones/copies: 1. define a bitmask of all extensions that need copy/cow on clone 2. change __skb_ext_copy() to check ->active_extensions & SKB_EXT_PRESERVE_ON_CLONE 3. set clone->active_extensions to 0 if test is false. This isn't done here because all extensions that get added here need the copy/cow semantics. Last patch converts skb->sp, secpath information gets stored as new SKB_EXT_SEC_PATH, so the 'sp' pointer is removed from skbuff. Extra code added to skb clone and free paths (to deal with refcount/free of extension area) replaces the existing code that does the same for skb->nf_bridge and skb->secpath. I don't see any other in-tree users that could benefit from this infrastructure, it doesn't make sense to add an extension just for the sake of a single flag bit (like skb->nf_trace). Adding a new extension is a good fit if all of the following are true: 1. Data is related to the skb/packet aggregate 2. Data should be freed when the skb is free'd 3. Data is not going to be relevant/needed in normal case (udp, tcp, forwarding workloads, ...) 4. There are no fancy action(s) needed on clone/free, such as callbacks into kernel modules. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 8239d57 + 4165079 commit 4a54877

39 files changed

+564
-286
lines changed

Documentation/networking/xfrm_device.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,10 @@ the stack in xfrm_input().
111111
xfrm_state_hold(xs);
112112

113113
store the state information into the skb
114-
skb->sp = secpath_dup(skb->sp);
115-
skb->sp->xvec[skb->sp->len++] = xs;
116-
skb->sp->olen++;
114+
sp = secpath_set(skb);
115+
if (!sp) return;
116+
sp->xvec[sp->len++] = xs;
117+
sp->olen++;
117118

118119
indicate the success and/or error status of the offload
119120
xo = xfrm_offload(skb);

drivers/crypto/chelsio/chcr_ipsec.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
570570
struct sge_eth_txq *q;
571571
struct port_info *pi;
572572
dma_addr_t addr[MAX_SKB_FRAGS + 1];
573+
struct sec_path *sp;
573574
bool immediate = false;
574575

575576
if (!x->xso.offload_handle)
@@ -578,7 +579,8 @@ int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
578579
sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
579580
kctx_len = sa_entry->kctx_len;
580581

581-
if (skb->sp->len != 1) {
582+
sp = skb_sec_path(skb);
583+
if (sp->len != 1) {
582584
out_free: dev_kfree_skb_any(skb);
583585
return NETDEV_TX_OK;
584586
}

drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,11 +1065,13 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
10651065
struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
10661066
struct ixgbe_ipsec *ipsec = adapter->ipsec;
10671067
struct xfrm_state *xs;
1068+
struct sec_path *sp;
10681069
struct tx_sa *tsa;
10691070

1070-
if (unlikely(!first->skb->sp->len)) {
1071+
sp = skb_sec_path(first->skb);
1072+
if (unlikely(!sp->len)) {
10711073
netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
1072-
__func__, first->skb->sp->len);
1074+
__func__, sp->len);
10731075
return 0;
10741076
}
10751077

@@ -1159,6 +1161,7 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
11591161
struct xfrm_state *xs = NULL;
11601162
struct ipv6hdr *ip6 = NULL;
11611163
struct iphdr *ip4 = NULL;
1164+
struct sec_path *sp;
11621165
void *daddr;
11631166
__be32 spi;
11641167
u8 *c_hdr;
@@ -1198,12 +1201,12 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
11981201
if (unlikely(!xs))
11991202
return;
12001203

1201-
skb->sp = secpath_dup(skb->sp);
1202-
if (unlikely(!skb->sp))
1204+
sp = secpath_set(skb);
1205+
if (unlikely(!sp))
12031206
return;
12041207

1205-
skb->sp->xvec[skb->sp->len++] = xs;
1206-
skb->sp->olen++;
1208+
sp->xvec[sp->len++] = xs;
1209+
sp->olen++;
12071210
xo = xfrm_offload(skb);
12081211
xo->flags = CRYPTO_DONE;
12091212
xo->status = CRYPTO_SUCCESS;

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8695,7 +8695,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
86958695
#endif /* IXGBE_FCOE */
86968696

86978697
#ifdef CONFIG_IXGBE_IPSEC
8698-
if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
8698+
if (secpath_exists(skb) &&
8699+
!ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
86998700
goto out_drop;
87008701
#endif
87018702
tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
@@ -10192,7 +10193,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
1019210193
*/
1019310194
if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
1019410195
#ifdef CONFIG_IXGBE_IPSEC
10195-
if (!skb->sp)
10196+
if (!secpath_exists(skb))
1019610197
#endif
1019710198
features &= ~NETIF_F_TSO;
1019810199
}

drivers/net/ethernet/intel/ixgbevf/ipsec.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -450,12 +450,14 @@ int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
450450
struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
451451
struct ixgbevf_ipsec *ipsec = adapter->ipsec;
452452
struct xfrm_state *xs;
453+
struct sec_path *sp;
453454
struct tx_sa *tsa;
454455
u16 sa_idx;
455456

456-
if (unlikely(!first->skb->sp->len)) {
457+
sp = skb_sec_path(first->skb);
458+
if (unlikely(!sp->len)) {
457459
netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
458-
__func__, first->skb->sp->len);
460+
__func__, sp->len);
459461
return 0;
460462
}
461463

@@ -546,6 +548,7 @@ void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
546548
struct xfrm_state *xs = NULL;
547549
struct ipv6hdr *ip6 = NULL;
548550
struct iphdr *ip4 = NULL;
551+
struct sec_path *sp;
549552
void *daddr;
550553
__be32 spi;
551554
u8 *c_hdr;
@@ -585,12 +588,12 @@ void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
585588
if (unlikely(!xs))
586589
return;
587590

588-
skb->sp = secpath_dup(skb->sp);
589-
if (unlikely(!skb->sp))
591+
sp = secpath_set(skb);
592+
if (unlikely(!sp))
590593
return;
591594

592-
skb->sp->xvec[skb->sp->len++] = xs;
593-
skb->sp->olen++;
595+
sp->xvec[sp->len++] = xs;
596+
sp->olen++;
594597
xo = xfrm_offload(skb);
595598
xo->flags = CRYPTO_DONE;
596599
xo->status = CRYPTO_SUCCESS;

drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4157,7 +4157,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
41574157
first->protocol = vlan_get_protocol(skb);
41584158

41594159
#ifdef CONFIG_IXGBEVF_IPSEC
4160-
if (skb->sp && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
4160+
if (secpath_exists(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
41614161
goto out_drop;
41624162
#endif
41634163
tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx);

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
254254
struct mlx5e_ipsec_metadata *mdata;
255255
struct mlx5e_ipsec_sa_entry *sa_entry;
256256
struct xfrm_state *x;
257+
struct sec_path *sp;
257258

258259
if (!xo)
259260
return skb;
260261

261-
if (unlikely(skb->sp->len != 1)) {
262+
sp = skb_sec_path(skb);
263+
if (unlikely(sp->len != 1)) {
262264
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
263265
goto drop;
264266
}
@@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
305307
struct mlx5e_priv *priv = netdev_priv(netdev);
306308
struct xfrm_offload *xo;
307309
struct xfrm_state *xs;
310+
struct sec_path *sp;
308311
u32 sa_handle;
309312

310-
skb->sp = secpath_dup(skb->sp);
311-
if (unlikely(!skb->sp)) {
313+
sp = secpath_set(skb);
314+
if (unlikely(!sp)) {
312315
atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
313316
return NULL;
314317
}
@@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
320323
return NULL;
321324
}
322325

323-
skb->sp->xvec[skb->sp->len++] = xs;
324-
skb->sp->olen++;
326+
sp = skb_sec_path(skb);
327+
sp->xvec[sp->len++] = xs;
328+
sp->olen++;
325329

326330
xo = xfrm_offload(skb);
327331
xo->flags = CRYPTO_DONE;
@@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
372376
bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
373377
netdev_features_t features)
374378
{
379+
struct sec_path *sp = skb_sec_path(skb);
375380
struct xfrm_state *x;
376381

377-
if (skb->sp && skb->sp->len) {
378-
x = skb->sp->xvec[0];
382+
if (sp && sp->len) {
383+
x = sp->xvec[0];
379384
if (x && x->xso.offload_handle)
380385
return true;
381386
}

drivers/net/netdevsim/ipsec.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,18 +227,19 @@ static const struct xfrmdev_ops nsim_xfrmdev_ops = {
227227

228228
bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb)
229229
{
230+
struct sec_path *sp = skb_sec_path(skb);
230231
struct nsim_ipsec *ipsec = &ns->ipsec;
231232
struct xfrm_state *xs;
232233
struct nsim_sa *tsa;
233234
u32 sa_idx;
234235

235236
/* do we even need to check this packet? */
236-
if (!skb->sp)
237+
if (!sp)
237238
return true;
238239

239-
if (unlikely(!skb->sp->len)) {
240+
if (unlikely(!sp->len)) {
240241
netdev_err(ns->netdev, "no xfrm state len = %d\n",
241-
skb->sp->len);
242+
sp->len);
242243
return false;
243244
}
244245

include/linux/netfilter_bridge.h

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,43 +17,58 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
1717
skb_dst_drop(skb);
1818
}
1919

20+
static inline struct nf_bridge_info *
21+
nf_bridge_info_get(const struct sk_buff *skb)
22+
{
23+
return skb_ext_find(skb, SKB_EXT_BRIDGE_NF);
24+
}
25+
26+
static inline bool nf_bridge_info_exists(const struct sk_buff *skb)
27+
{
28+
return skb_ext_exist(skb, SKB_EXT_BRIDGE_NF);
29+
}
30+
2031
static inline int nf_bridge_get_physinif(const struct sk_buff *skb)
2132
{
22-
struct nf_bridge_info *nf_bridge;
33+
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
2334

24-
if (skb->nf_bridge == NULL)
35+
if (!nf_bridge)
2536
return 0;
2637

27-
nf_bridge = skb->nf_bridge;
2838
return nf_bridge->physindev ? nf_bridge->physindev->ifindex : 0;
2939
}
3040

3141
static inline int nf_bridge_get_physoutif(const struct sk_buff *skb)
3242
{
33-
struct nf_bridge_info *nf_bridge;
43+
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
3444

35-
if (skb->nf_bridge == NULL)
45+
if (!nf_bridge)
3646
return 0;
3747

38-
nf_bridge = skb->nf_bridge;
3948
return nf_bridge->physoutdev ? nf_bridge->physoutdev->ifindex : 0;
4049
}
4150

4251
static inline struct net_device *
4352
nf_bridge_get_physindev(const struct sk_buff *skb)
4453
{
45-
return skb->nf_bridge ? skb->nf_bridge->physindev : NULL;
54+
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
55+
56+
return nf_bridge ? nf_bridge->physindev : NULL;
4657
}
4758

4859
static inline struct net_device *
4960
nf_bridge_get_physoutdev(const struct sk_buff *skb)
5061
{
51-
return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
62+
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
63+
64+
return nf_bridge ? nf_bridge->physoutdev : NULL;
5265
}
5366

5467
static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
5568
{
56-
return skb->nf_bridge && skb->nf_bridge->in_prerouting;
69+
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
70+
71+
return nf_bridge && nf_bridge->in_prerouting;
5772
}
5873
#else
5974
#define br_drop_fake_rtable(skb) do { } while (0)

0 commit comments

Comments
 (0)