Skip to content

Commit e89e9cf

Browse files
Ananda RajuArnaldo Carvalho de Melo
authored andcommitted
[IPv4/IPv6]: UFO Scatter-gather approach
Attached is kernel patch for UDP Fragmentation Offload (UFO) feature. 1. This patch incorporate the review comments by Jeff Garzik. 2. Renamed USO as UFO (UDP Fragmentation Offload) 3. udp sendfile support with UFO This patches uses scatter-gather feature of skb to generate large UDP datagram. Below is a "how-to" on changes required in network device driver to use the UFO interface. UDP Fragmentation Offload (UFO) Interface: ------------------------------------------- UFO is a feature wherein the Linux kernel network stack will offload the IP fragmentation functionality of large UDP datagram to hardware. This will reduce the overhead of stack in fragmenting the large UDP datagram to MTU sized packets 1) Drivers indicate their capability of UFO using dev->features |= NETIF_F_UFO | NETIF_F_HW_CSUM | NETIF_F_SG NETIF_F_HW_CSUM is required for UFO over ipv6. 2) UFO packet will be submitted for transmission using driver xmit routine. UFO packet will have a non-zero value for "skb_shinfo(skb)->ufo_size" skb_shinfo(skb)->ufo_size will indicate the length of data part in each IP fragment going out of the adapter after IP fragmentation by hardware. skb->data will contain MAC/IP/UDP header and skb_shinfo(skb)->frags[] contains the data payload. The skb->ip_summed will be set to CHECKSUM_HW indicating that hardware has to do checksum calculation. Hardware should compute the UDP checksum of complete datagram and also ip header checksum of each fragmented IP packet. For IPV6 the UFO provides the fragment identification-id in skb_shinfo(skb)->ip6_frag_id. The adapter should use this ID for generating IPv6 fragments. Signed-off-by: Ananda Raju <[email protected]> Signed-off-by: Rusty Russell <[email protected]> (forwarded) Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent de51441 commit e89e9cf

File tree

8 files changed

+306
-6
lines changed

8 files changed

+306
-6
lines changed

include/linux/ethtool.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ u32 ethtool_op_get_tso(struct net_device *dev);
269269
int ethtool_op_set_tso(struct net_device *dev, u32 data);
270270
int ethtool_op_get_perm_addr(struct net_device *dev,
271271
struct ethtool_perm_addr *addr, u8 *data);
272+
u32 ethtool_op_get_ufo(struct net_device *dev);
273+
int ethtool_op_set_ufo(struct net_device *dev, u32 data);
272274

273275
/**
274276
* &ethtool_ops - Alter and report network device settings
@@ -298,6 +300,8 @@ int ethtool_op_get_perm_addr(struct net_device *dev,
298300
* set_sg: Turn scatter-gather on or off
299301
* get_tso: Report whether TCP segmentation offload is enabled
300302
* set_tso: Turn TCP segmentation offload on or off
303+
* get_ufo: Report whether UDP fragmentation offload is enabled
304+
* set_ufo: Turn UDP fragmentation offload on or off
301305
* self_test: Run specified self-tests
302306
* get_strings: Return a set of strings that describe the requested objects
303307
* phys_id: Identify the device
@@ -364,6 +368,8 @@ struct ethtool_ops {
364368
int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *);
365369
int (*begin)(struct net_device *);
366370
void (*complete)(struct net_device *);
371+
u32 (*get_ufo)(struct net_device *);
372+
int (*set_ufo)(struct net_device *, u32);
367373
};
368374

369375
/* CMDs currently supported */
@@ -400,6 +406,8 @@ struct ethtool_ops {
400406
#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */
401407
#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */
402408
#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */
409+
#define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */
410+
#define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */
403411

404412
/* compatibility with older code */
405413
#define SPARC_ETH_GSET ETHTOOL_GSET

include/linux/netdevice.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ struct net_device
308308
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
309309
#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
310310
#define NETIF_F_LLTX 4096 /* LockLess TX */
311+
#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/
311312

312313
struct net_device *next_sched;
313314

include/linux/skbuff.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ struct skb_shared_info {
137137
unsigned int nr_frags;
138138
unsigned short tso_size;
139139
unsigned short tso_segs;
140+
unsigned short ufo_size;
141+
unsigned int ip6_frag_id;
140142
struct sk_buff *frag_list;
141143
skb_frag_t frags[MAX_SKB_FRAGS];
142144
};
@@ -341,6 +343,11 @@ extern void skb_over_panic(struct sk_buff *skb, int len,
341343
extern void skb_under_panic(struct sk_buff *skb, int len,
342344
void *here);
343345

346+
extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
347+
int getfrag(void *from, char *to, int offset,
348+
int len,int odd, struct sk_buff *skb),
349+
void *from, int length);
350+
344351
struct skb_seq_state
345352
{
346353
__u32 lower_offset;

net/core/dev.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2717,6 +2717,20 @@ int register_netdevice(struct net_device *dev)
27172717
dev->name);
27182718
dev->features &= ~NETIF_F_TSO;
27192719
}
2720+
if (dev->features & NETIF_F_UFO) {
2721+
if (!(dev->features & NETIF_F_HW_CSUM)) {
2722+
printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2723+
"NETIF_F_HW_CSUM feature.\n",
2724+
dev->name);
2725+
dev->features &= ~NETIF_F_UFO;
2726+
}
2727+
if (!(dev->features & NETIF_F_SG)) {
2728+
printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2729+
"NETIF_F_SG feature.\n",
2730+
dev->name);
2731+
dev->features &= ~NETIF_F_UFO;
2732+
}
2733+
}
27202734

27212735
/*
27222736
* nil rebuild_header routine,

net/core/ethtool.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,20 @@ int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *a
9393
}
9494

9595

96+
u32 ethtool_op_get_ufo(struct net_device *dev)
97+
{
98+
return (dev->features & NETIF_F_UFO) != 0;
99+
}
100+
101+
int ethtool_op_set_ufo(struct net_device *dev, u32 data)
102+
{
103+
if (data)
104+
dev->features |= NETIF_F_UFO;
105+
else
106+
dev->features &= ~NETIF_F_UFO;
107+
return 0;
108+
}
109+
96110
/* Handlers for each ethtool command */
97111

98112
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -483,6 +497,11 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
483497
return err;
484498
}
485499

500+
if (!data && dev->ethtool_ops->set_ufo) {
501+
err = dev->ethtool_ops->set_ufo(dev, 0);
502+
if (err)
503+
return err;
504+
}
486505
return dev->ethtool_ops->set_sg(dev, data);
487506
}
488507

@@ -569,6 +588,32 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
569588
return dev->ethtool_ops->set_tso(dev, edata.data);
570589
}
571590

591+
static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
592+
{
593+
struct ethtool_value edata = { ETHTOOL_GTSO };
594+
595+
if (!dev->ethtool_ops->get_ufo)
596+
return -EOPNOTSUPP;
597+
edata.data = dev->ethtool_ops->get_ufo(dev);
598+
if (copy_to_user(useraddr, &edata, sizeof(edata)))
599+
return -EFAULT;
600+
return 0;
601+
}
602+
static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
603+
{
604+
struct ethtool_value edata;
605+
606+
if (!dev->ethtool_ops->set_ufo)
607+
return -EOPNOTSUPP;
608+
if (copy_from_user(&edata, useraddr, sizeof(edata)))
609+
return -EFAULT;
610+
if (edata.data && !(dev->features & NETIF_F_SG))
611+
return -EINVAL;
612+
if (edata.data && !(dev->features & NETIF_F_HW_CSUM))
613+
return -EINVAL;
614+
return dev->ethtool_ops->set_ufo(dev, edata.data);
615+
}
616+
572617
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
573618
{
574619
struct ethtool_test test;
@@ -854,6 +899,12 @@ int dev_ethtool(struct ifreq *ifr)
854899
case ETHTOOL_GPERMADDR:
855900
rc = ethtool_get_perm_addr(dev, useraddr);
856901
break;
902+
case ETHTOOL_GUFO:
903+
rc = ethtool_get_ufo(dev, useraddr);
904+
break;
905+
case ETHTOOL_SUFO:
906+
rc = ethtool_set_ufo(dev, useraddr);
907+
break;
857908
default:
858909
rc = -EOPNOTSUPP;
859910
}
@@ -882,3 +933,5 @@ EXPORT_SYMBOL(ethtool_op_set_sg);
882933
EXPORT_SYMBOL(ethtool_op_set_tso);
883934
EXPORT_SYMBOL(ethtool_op_set_tx_csum);
884935
EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
936+
EXPORT_SYMBOL(ethtool_op_set_ufo);
937+
EXPORT_SYMBOL(ethtool_op_get_ufo);

net/core/skbuff.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
176176
skb_shinfo(skb)->tso_size = 0;
177177
skb_shinfo(skb)->tso_segs = 0;
178178
skb_shinfo(skb)->frag_list = NULL;
179+
skb_shinfo(skb)->ufo_size = 0;
180+
skb_shinfo(skb)->ip6_frag_id = 0;
179181
out:
180182
return skb;
181183
nodata:
@@ -1696,6 +1698,78 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
16961698
return textsearch_find(config, state);
16971699
}
16981700

1701+
/**
1702+
* skb_append_datato_frags: - append the user data to a skb
1703+
* @sk: sock structure
1704+
* @skb: skb structure to be appened with user data.
1705+
* @getfrag: call back function to be used for getting the user data
1706+
* @from: pointer to user message iov
1707+
* @length: length of the iov message
1708+
*
1709+
* Description: This procedure append the user data in the fragment part
1710+
* of the skb if any page alloc fails user this procedure returns -ENOMEM
1711+
*/
1712+
int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
1713+
int getfrag(void *from, char *to, int offset,
1714+
int len, int odd, struct sk_buff *skb),
1715+
void *from, int length)
1716+
{
1717+
int frg_cnt = 0;
1718+
skb_frag_t *frag = NULL;
1719+
struct page *page = NULL;
1720+
int copy, left;
1721+
int offset = 0;
1722+
int ret;
1723+
1724+
do {
1725+
/* Return error if we don't have space for new frag */
1726+
frg_cnt = skb_shinfo(skb)->nr_frags;
1727+
if (frg_cnt >= MAX_SKB_FRAGS)
1728+
return -EFAULT;
1729+
1730+
/* allocate a new page for next frag */
1731+
page = alloc_pages(sk->sk_allocation, 0);
1732+
1733+
/* If alloc_page fails just return failure and caller will
1734+
* free previous allocated pages by doing kfree_skb()
1735+
*/
1736+
if (page == NULL)
1737+
return -ENOMEM;
1738+
1739+
/* initialize the next frag */
1740+
sk->sk_sndmsg_page = page;
1741+
sk->sk_sndmsg_off = 0;
1742+
skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
1743+
skb->truesize += PAGE_SIZE;
1744+
atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1745+
1746+
/* get the new initialized frag */
1747+
frg_cnt = skb_shinfo(skb)->nr_frags;
1748+
frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
1749+
1750+
/* copy the user data to page */
1751+
left = PAGE_SIZE - frag->page_offset;
1752+
copy = (length > left)? left : length;
1753+
1754+
ret = getfrag(from, (page_address(frag->page) +
1755+
frag->page_offset + frag->size),
1756+
offset, copy, 0, skb);
1757+
if (ret < 0)
1758+
return -EFAULT;
1759+
1760+
/* copy was successful so update the size parameters */
1761+
sk->sk_sndmsg_off += copy;
1762+
frag->size += copy;
1763+
skb->len += copy;
1764+
skb->data_len += copy;
1765+
offset += copy;
1766+
length -= copy;
1767+
1768+
} while (length > 0);
1769+
1770+
return 0;
1771+
}
1772+
16991773
void __init skb_init(void)
17001774
{
17011775
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
@@ -1747,3 +1821,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
17471821
EXPORT_SYMBOL(skb_seq_read);
17481822
EXPORT_SYMBOL(skb_abort_seq_read);
17491823
EXPORT_SYMBOL(skb_find_text);
1824+
EXPORT_SYMBOL(skb_append_datato_frags);

net/ipv4/ip_output.c

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb)
275275
{
276276
IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
277277

278-
if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size)
278+
if (skb->len > dst_mtu(skb->dst) &&
279+
!(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
279280
return ip_fragment(skb, ip_finish_output);
280281
else
281282
return ip_finish_output(skb);
@@ -688,6 +689,60 @@ csum_page(struct page *page, int offset, int copy)
688689
return csum;
689690
}
690691

692+
inline int ip_ufo_append_data(struct sock *sk,
693+
int getfrag(void *from, char *to, int offset, int len,
694+
int odd, struct sk_buff *skb),
695+
void *from, int length, int hh_len, int fragheaderlen,
696+
int transhdrlen, int mtu,unsigned int flags)
697+
{
698+
struct sk_buff *skb;
699+
int err;
700+
701+
/* There is support for UDP fragmentation offload by network
702+
* device, so create one single skb packet containing complete
703+
* udp datagram
704+
*/
705+
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
706+
skb = sock_alloc_send_skb(sk,
707+
hh_len + fragheaderlen + transhdrlen + 20,
708+
(flags & MSG_DONTWAIT), &err);
709+
710+
if (skb == NULL)
711+
return err;
712+
713+
/* reserve space for Hardware header */
714+
skb_reserve(skb, hh_len);
715+
716+
/* create space for UDP/IP header */
717+
skb_put(skb,fragheaderlen + transhdrlen);
718+
719+
/* initialize network header pointer */
720+
skb->nh.raw = skb->data;
721+
722+
/* initialize protocol header pointer */
723+
skb->h.raw = skb->data + fragheaderlen;
724+
725+
skb->ip_summed = CHECKSUM_HW;
726+
skb->csum = 0;
727+
sk->sk_sndmsg_off = 0;
728+
}
729+
730+
err = skb_append_datato_frags(sk,skb, getfrag, from,
731+
(length - transhdrlen));
732+
if (!err) {
733+
/* specify the length of each IP datagram fragment*/
734+
skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
735+
__skb_queue_tail(&sk->sk_write_queue, skb);
736+
737+
return 0;
738+
}
739+
/* There is not enough support do UFO ,
740+
* so follow normal path
741+
*/
742+
kfree_skb(skb);
743+
return err;
744+
}
745+
691746
/*
692747
* ip_append_data() and ip_append_page() can make one large IP datagram
693748
* from many pieces of data. Each pieces will be holded on the socket
@@ -777,6 +832,15 @@ int ip_append_data(struct sock *sk,
777832
csummode = CHECKSUM_HW;
778833

779834
inet->cork.length += length;
835+
if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
836+
(rt->u.dst.dev->features & NETIF_F_UFO)) {
837+
838+
if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
839+
fragheaderlen, transhdrlen, mtu, flags))
840+
goto error;
841+
842+
return 0;
843+
}
780844

781845
/* So, what's going on in the loop below?
782846
*
@@ -1008,14 +1072,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
10081072
return -EINVAL;
10091073

10101074
inet->cork.length += size;
1075+
if ((sk->sk_protocol == IPPROTO_UDP) &&
1076+
(rt->u.dst.dev->features & NETIF_F_UFO))
1077+
skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
1078+
10111079

10121080
while (size > 0) {
10131081
int i;
10141082

1015-
/* Check if the remaining data fits into current packet. */
1016-
len = mtu - skb->len;
1017-
if (len < size)
1018-
len = maxfraglen - skb->len;
1083+
if (skb_shinfo(skb)->ufo_size)
1084+
len = size;
1085+
else {
1086+
1087+
/* Check if the remaining data fits into current packet. */
1088+
len = mtu - skb->len;
1089+
if (len < size)
1090+
len = maxfraglen - skb->len;
1091+
}
10191092
if (len <= 0) {
10201093
struct sk_buff *skb_prev;
10211094
char *data;

0 commit comments

Comments
 (0)