Skip to content

Commit 3763a24

Browse files
arjunroydavem330
authored andcommitted
net-zerocopy: use vm_insert_pages() for tcp rcv zerocopy
Use vm_insert_pages() for tcp receive zerocopy. Spin lock cycles (as reported by perf) drop from a couple of percentage points to a fraction of a percent. This results in a roughly 6% increase in efficiency, measured roughly as zerocopy receive count divided by CPU utilization. The intention of this patchset is to reduce atomic ops for tcp zerocopy receives, which normally hits the same spinlock multiple times consecutively. [[email protected]: suppress gcc-7.2.0 warning] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Arjun Roy <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: Soheil Hassas Yeganeh <[email protected]> Cc: David Miller <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: Stephen Rothwell <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 0e6fbe3 commit 3763a24

File tree

1 file changed

+63
-7
lines changed

1 file changed

+63
-7
lines changed

net/ipv4/tcp.c

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,14 +1742,48 @@ int tcp_mmap(struct file *file, struct socket *sock,
17421742
}
17431743
EXPORT_SYMBOL(tcp_mmap);
17441744

1745+
static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
1746+
struct page **pages,
1747+
unsigned long pages_to_map,
1748+
unsigned long *insert_addr,
1749+
u32 *length_with_pending,
1750+
u32 *seq,
1751+
struct tcp_zerocopy_receive *zc)
1752+
{
1753+
unsigned long pages_remaining = pages_to_map;
1754+
int bytes_mapped;
1755+
int ret;
1756+
1757+
ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining);
1758+
bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining);
1759+
/* Even if vm_insert_pages fails, it may have partially succeeded in
1760+
* mapping (some but not all of the pages).
1761+
*/
1762+
*seq += bytes_mapped;
1763+
*insert_addr += bytes_mapped;
1764+
if (ret) {
1765+
/* But if vm_insert_pages did fail, we have to unroll some state
1766+
* we speculatively touched before.
1767+
*/
1768+
const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
1769+
*length_with_pending -= bytes_not_mapped;
1770+
zc->recv_skip_hint += bytes_not_mapped;
1771+
}
1772+
return ret;
1773+
}
1774+
17451775
static int tcp_zerocopy_receive(struct sock *sk,
17461776
struct tcp_zerocopy_receive *zc)
17471777
{
17481778
unsigned long address = (unsigned long)zc->address;
17491779
u32 length = 0, seq, offset, zap_len;
1780+
#define PAGE_BATCH_SIZE 8
1781+
struct page *pages[PAGE_BATCH_SIZE];
17501782
const skb_frag_t *frags = NULL;
17511783
struct vm_area_struct *vma;
17521784
struct sk_buff *skb = NULL;
1785+
unsigned long pg_idx = 0;
1786+
unsigned long curr_addr;
17531787
struct tcp_sock *tp;
17541788
int inq;
17551789
int ret;
@@ -1762,6 +1796,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
17621796

17631797
sock_rps_record_flow(sk);
17641798

1799+
tp = tcp_sk(sk);
1800+
17651801
down_read(&current->mm->mmap_sem);
17661802

17671803
vma = find_vma(current->mm, address);
@@ -1771,7 +1807,6 @@ static int tcp_zerocopy_receive(struct sock *sk,
17711807
}
17721808
zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
17731809

1774-
tp = tcp_sk(sk);
17751810
seq = tp->copied_seq;
17761811
inq = tcp_inq(sk);
17771812
zc->length = min_t(u32, zc->length, inq);
@@ -1783,8 +1818,20 @@ static int tcp_zerocopy_receive(struct sock *sk,
17831818
zc->recv_skip_hint = zc->length;
17841819
}
17851820
ret = 0;
1821+
curr_addr = address;
17861822
while (length + PAGE_SIZE <= zc->length) {
17871823
if (zc->recv_skip_hint < PAGE_SIZE) {
1824+
/* If we're here, finish the current batch. */
1825+
if (pg_idx) {
1826+
ret = tcp_zerocopy_vm_insert_batch(vma, pages,
1827+
pg_idx,
1828+
&curr_addr,
1829+
&length,
1830+
&seq, zc);
1831+
if (ret)
1832+
goto out;
1833+
pg_idx = 0;
1834+
}
17881835
if (skb) {
17891836
if (zc->recv_skip_hint > 0)
17901837
break;
@@ -1793,7 +1840,6 @@ static int tcp_zerocopy_receive(struct sock *sk,
17931840
} else {
17941841
skb = tcp_recv_skb(sk, seq, &offset);
17951842
}
1796-
17971843
zc->recv_skip_hint = skb->len - offset;
17981844
offset -= skb_headlen(skb);
17991845
if ((int)offset < 0 || skb_has_frag_list(skb))
@@ -1817,14 +1863,24 @@ static int tcp_zerocopy_receive(struct sock *sk,
18171863
zc->recv_skip_hint -= remaining;
18181864
break;
18191865
}
1820-
ret = vm_insert_page(vma, address + length,
1821-
skb_frag_page(frags));
1822-
if (ret)
1823-
break;
1866+
pages[pg_idx] = skb_frag_page(frags);
1867+
pg_idx++;
18241868
length += PAGE_SIZE;
1825-
seq += PAGE_SIZE;
18261869
zc->recv_skip_hint -= PAGE_SIZE;
18271870
frags++;
1871+
if (pg_idx == PAGE_BATCH_SIZE) {
1872+
ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
1873+
&curr_addr, &length,
1874+
&seq, zc);
1875+
if (ret)
1876+
goto out;
1877+
pg_idx = 0;
1878+
}
1879+
}
1880+
if (pg_idx) {
1881+
ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
1882+
&curr_addr, &length, &seq,
1883+
zc);
18281884
}
18291885
out:
18301886
up_read(&current->mm->mmap_sem);

0 commit comments

Comments
 (0)