Skip to content

Commit b193722

Browse files
edumazetdavem330
authored andcommitted
net: reorganize sk_buff for faster __copy_skb_header()
With proliferation of bit fields in sk_buff, __copy_skb_header() became quite expensive, showing as the most expensive function in a GSO workload. __copy_skb_header() performance is also critical for non GSO TCP operations, as it is used from skb_clone() This patch carefully moves all the fields that were not copied in a separate zone : cloned, nohdr, fclone, peeked, head_frag, xmit_more Then I moved all other fields and all other copied fields in a section delimited by headers_start[0]/headers_end[0] section so that we can use a single memcpy() call, inlined by compiler using long word load/stores. I also tried to make all copies in the natural orders of sk_buff, to help hardware prefetching. I made sure sk_buff size did not change. Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 842abe0 commit b193722

File tree

2 files changed

+113
-100
lines changed

2 files changed

+113
-100
lines changed

include/linux/skbuff.h

Lines changed: 72 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -527,27 +527,41 @@ struct sk_buff {
527527
char cb[48] __aligned(8);
528528

529529
unsigned long _skb_refdst;
530+
void (*destructor)(struct sk_buff *skb);
530531
#ifdef CONFIG_XFRM
531532
struct sec_path *sp;
533+
#endif
534+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
535+
struct nf_conntrack *nfct;
536+
#endif
537+
#ifdef CONFIG_BRIDGE_NETFILTER
538+
struct nf_bridge_info *nf_bridge;
532539
#endif
533540
unsigned int len,
534541
data_len;
535542
__u16 mac_len,
536543
hdr_len;
537-
union {
538-
__wsum csum;
539-
struct {
540-
__u16 csum_start;
541-
__u16 csum_offset;
542-
};
543-
};
544-
__u32 priority;
544+
545+
/* Following fields are _not_ copied in __copy_skb_header()
546+
* Note that queue_mapping is here mostly to fill a hole.
547+
*/
545548
kmemcheck_bitfield_begin(flags1);
546-
__u8 ignore_df:1,
547-
cloned:1,
548-
ip_summed:2,
549+
__u16 queue_mapping;
550+
__u8 cloned:1,
549551
nohdr:1,
550-
nfctinfo:3;
552+
fclone:2,
553+
peeked:1,
554+
head_frag:1,
555+
xmit_more:1;
556+
/* one bit hole */
557+
kmemcheck_bitfield_end(flags1);
558+
559+
560+
561+
/* fields enclosed in headers_start/headers_end are copied
562+
* using a single memcpy() in __copy_skb_header()
563+
*/
564+
__u32 headers_start[0];
551565

552566
/* if you move pkt_type around you also must adapt those constants */
553567
#ifdef __BIG_ENDIAN_BITFIELD
@@ -558,58 +572,53 @@ struct sk_buff {
558572
#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
559573

560574
__u8 __pkt_type_offset[0];
561-
__u8 pkt_type:3,
562-
fclone:2,
563-
ipvs_property:1,
564-
peeked:1,
565-
nf_trace:1;
566-
kmemcheck_bitfield_end(flags1);
567-
__be16 protocol;
568-
569-
void (*destructor)(struct sk_buff *skb);
570-
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
571-
struct nf_conntrack *nfct;
572-
#endif
573-
#ifdef CONFIG_BRIDGE_NETFILTER
574-
struct nf_bridge_info *nf_bridge;
575-
#endif
576-
577-
int skb_iif;
578-
579-
__u32 hash;
580-
581-
__be16 vlan_proto;
582-
__u16 vlan_tci;
583-
584-
#ifdef CONFIG_NET_SCHED
585-
__u16 tc_index; /* traffic control index */
586-
#ifdef CONFIG_NET_CLS_ACT
587-
__u16 tc_verd; /* traffic control verdict */
588-
#endif
589-
#endif
590-
591-
__u16 queue_mapping;
592-
kmemcheck_bitfield_begin(flags2);
593-
__u8 xmit_more:1;
594-
#ifdef CONFIG_IPV6_NDISC_NODETYPE
595-
__u8 ndisc_nodetype:2;
596-
#endif
575+
__u8 pkt_type:3;
597576
__u8 pfmemalloc:1;
577+
__u8 ignore_df:1;
578+
__u8 nfctinfo:3;
579+
580+
__u8 nf_trace:1;
581+
__u8 ip_summed:2;
598582
__u8 ooo_okay:1;
599583
__u8 l4_hash:1;
600584
__u8 sw_hash:1;
601585
__u8 wifi_acked_valid:1;
602586
__u8 wifi_acked:1;
587+
603588
__u8 no_fcs:1;
604-
__u8 head_frag:1;
605589
/* Indicates the inner headers are valid in the skbuff. */
606590
__u8 encapsulation:1;
607591
__u8 encap_hdr_csum:1;
608592
__u8 csum_valid:1;
609593
__u8 csum_complete_sw:1;
610-
/* 1/3 bit hole (depending on ndisc_nodetype presence) */
611-
kmemcheck_bitfield_end(flags2);
594+
__u8 csum_level:2;
595+
__u8 csum_bad:1;
612596

597+
#ifdef CONFIG_IPV6_NDISC_NODETYPE
598+
__u8 ndisc_nodetype:2;
599+
#endif
600+
__u8 ipvs_property:1;
601+
/* 5 or 7 bit hole */
602+
603+
#ifdef CONFIG_NET_SCHED
604+
__u16 tc_index; /* traffic control index */
605+
#ifdef CONFIG_NET_CLS_ACT
606+
__u16 tc_verd; /* traffic control verdict */
607+
#endif
608+
#endif
609+
610+
union {
611+
__wsum csum;
612+
struct {
613+
__u16 csum_start;
614+
__u16 csum_offset;
615+
};
616+
};
617+
__u32 priority;
618+
int skb_iif;
619+
__u32 hash;
620+
__be16 vlan_proto;
621+
__u16 vlan_tci;
613622
#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
614623
union {
615624
unsigned int napi_id;
@@ -625,19 +634,18 @@ struct sk_buff {
625634
__u32 reserved_tailroom;
626635
};
627636

628-
kmemcheck_bitfield_begin(flags3);
629-
__u8 csum_level:2;
630-
__u8 csum_bad:1;
631-
/* 13 bit hole */
632-
kmemcheck_bitfield_end(flags3);
633-
634637
__be16 inner_protocol;
635638
__u16 inner_transport_header;
636639
__u16 inner_network_header;
637640
__u16 inner_mac_header;
641+
642+
__be16 protocol;
638643
__u16 transport_header;
639644
__u16 network_header;
640645
__u16 mac_header;
646+
647+
__u32 headers_end[0];
648+
641649
/* These elements must be at the end, see alloc_skb() for details. */
642650
sk_buff_data_t tail;
643651
sk_buff_data_t end;
@@ -3040,19 +3048,22 @@ static inline void nf_reset_trace(struct sk_buff *skb)
30403048
}
30413049

30423050
/* Note: This doesn't put any conntrack and bridge info in dst. */
3043-
static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
3051+
static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
3052+
bool copy)
30443053
{
30453054
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
30463055
dst->nfct = src->nfct;
30473056
nf_conntrack_get(src->nfct);
3048-
dst->nfctinfo = src->nfctinfo;
3057+
if (copy)
3058+
dst->nfctinfo = src->nfctinfo;
30493059
#endif
30503060
#ifdef CONFIG_BRIDGE_NETFILTER
30513061
dst->nf_bridge = src->nf_bridge;
30523062
nf_bridge_get(src->nf_bridge);
30533063
#endif
30543064
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
3055-
dst->nf_trace = src->nf_trace;
3065+
if (copy)
3066+
dst->nf_trace = src->nf_trace;
30563067
#endif
30573068
}
30583069

@@ -3064,7 +3075,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
30643075
#ifdef CONFIG_BRIDGE_NETFILTER
30653076
nf_bridge_put(dst->nf_bridge);
30663077
#endif
3067-
__nf_copy(dst, src);
3078+
__nf_copy(dst, src, true);
30683079
}
30693080

30703081
#ifdef CONFIG_NETWORK_SECMARK

net/core/skbuff.c

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
261261
atomic_t *fclone_ref = (atomic_t *) (child + 1);
262262

263263
kmemcheck_annotate_bitfield(child, flags1);
264-
kmemcheck_annotate_bitfield(child, flags2);
265264
skb->fclone = SKB_FCLONE_ORIG;
266265
atomic_set(fclone_ref, 1);
267266

@@ -675,57 +674,61 @@ void consume_skb(struct sk_buff *skb)
675674
}
676675
EXPORT_SYMBOL(consume_skb);
677676

677+
/* Make sure a field is enclosed inside headers_start/headers_end section */
678+
#define CHECK_SKB_FIELD(field) \
679+
BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
680+
offsetof(struct sk_buff, headers_start)); \
681+
BUILD_BUG_ON(offsetof(struct sk_buff, field) > \
682+
offsetof(struct sk_buff, headers_end)); \
683+
678684
static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
679685
{
680686
new->tstamp = old->tstamp;
687+
/* We do not copy old->sk */
681688
new->dev = old->dev;
682-
new->transport_header = old->transport_header;
683-
new->network_header = old->network_header;
684-
new->mac_header = old->mac_header;
685-
new->inner_protocol = old->inner_protocol;
686-
new->inner_transport_header = old->inner_transport_header;
687-
new->inner_network_header = old->inner_network_header;
688-
new->inner_mac_header = old->inner_mac_header;
689+
memcpy(new->cb, old->cb, sizeof(old->cb));
689690
skb_dst_copy(new, old);
690-
skb_copy_hash(new, old);
691-
new->ooo_okay = old->ooo_okay;
692-
new->no_fcs = old->no_fcs;
693-
new->encapsulation = old->encapsulation;
694-
new->encap_hdr_csum = old->encap_hdr_csum;
695-
new->csum_valid = old->csum_valid;
696-
new->csum_complete_sw = old->csum_complete_sw;
697691
#ifdef CONFIG_XFRM
698692
new->sp = secpath_get(old->sp);
699693
#endif
700-
memcpy(new->cb, old->cb, sizeof(old->cb));
701-
new->csum = old->csum;
702-
new->ignore_df = old->ignore_df;
703-
new->pkt_type = old->pkt_type;
704-
new->ip_summed = old->ip_summed;
705-
skb_copy_queue_mapping(new, old);
706-
new->priority = old->priority;
707-
#if IS_ENABLED(CONFIG_IP_VS)
708-
new->ipvs_property = old->ipvs_property;
694+
__nf_copy(new, old, false);
695+
696+
/* Note : this field could be in headers_start/headers_end section
697+
* It is not yet because we do not want to have a 16 bit hole
698+
*/
699+
new->queue_mapping = old->queue_mapping;
700+
701+
memcpy(&new->headers_start, &old->headers_start,
702+
offsetof(struct sk_buff, headers_end) -
703+
offsetof(struct sk_buff, headers_start));
704+
CHECK_SKB_FIELD(protocol);
705+
CHECK_SKB_FIELD(csum);
706+
CHECK_SKB_FIELD(hash);
707+
CHECK_SKB_FIELD(priority);
708+
CHECK_SKB_FIELD(skb_iif);
709+
CHECK_SKB_FIELD(vlan_proto);
710+
CHECK_SKB_FIELD(vlan_tci);
711+
CHECK_SKB_FIELD(transport_header);
712+
CHECK_SKB_FIELD(network_header);
713+
CHECK_SKB_FIELD(mac_header);
714+
CHECK_SKB_FIELD(inner_protocol);
715+
CHECK_SKB_FIELD(inner_transport_header);
716+
CHECK_SKB_FIELD(inner_network_header);
717+
CHECK_SKB_FIELD(inner_mac_header);
718+
CHECK_SKB_FIELD(mark);
719+
#ifdef CONFIG_NETWORK_SECMARK
720+
CHECK_SKB_FIELD(secmark);
721+
#endif
722+
#ifdef CONFIG_NET_RX_BUSY_POLL
723+
CHECK_SKB_FIELD(napi_id);
709724
#endif
710-
new->pfmemalloc = old->pfmemalloc;
711-
new->protocol = old->protocol;
712-
new->mark = old->mark;
713-
new->skb_iif = old->skb_iif;
714-
__nf_copy(new, old);
715725
#ifdef CONFIG_NET_SCHED
716-
new->tc_index = old->tc_index;
726+
CHECK_SKB_FIELD(tc_index);
717727
#ifdef CONFIG_NET_CLS_ACT
718-
new->tc_verd = old->tc_verd;
728+
CHECK_SKB_FIELD(tc_verd);
719729
#endif
720730
#endif
721-
new->vlan_proto = old->vlan_proto;
722-
new->vlan_tci = old->vlan_tci;
723-
724-
skb_copy_secmark(new, old);
725731

726-
#ifdef CONFIG_NET_RX_BUSY_POLL
727-
new->napi_id = old->napi_id;
728-
#endif
729732
}
730733

731734
/*
@@ -876,7 +879,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
876879
return NULL;
877880

878881
kmemcheck_annotate_bitfield(n, flags1);
879-
kmemcheck_annotate_bitfield(n, flags2);
880882
n->fclone = SKB_FCLONE_UNAVAILABLE;
881883
}
882884

0 commit comments

Comments
 (0)