Skip to content

Commit 5704075

Browse files
Paolo Abenidavem330
authored andcommitted
mptcp: allow collapsing consecutive sendpages on the same substream
If the current sendmsg() lands on the same subflow we used last, we can try to collapse the data. Signed-off-by: Paolo Abeni <[email protected]> Signed-off-by: Christoph Paasch <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 7a6a6cb commit 5704075

File tree

1 file changed

+60
-15
lines changed

1 file changed

+60
-15
lines changed

net/mptcp/protocol.c

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -122,14 +122,27 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
122122
return NULL;
123123
}
124124

125+
static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
126+
const struct sk_buff *skb,
127+
const struct mptcp_ext *mpext)
128+
{
129+
if (!tcp_skb_can_collapse_to(skb))
130+
return false;
131+
132+
/* can collapse only if MPTCP level sequence is in order */
133+
return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
134+
}
135+
125136
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
126-
struct msghdr *msg, long *timeo)
137+
struct msghdr *msg, long *timeo, int *pmss_now,
138+
int *ps_goal)
127139
{
128-
int mss_now = 0, size_goal = 0, ret = 0;
140+
int mss_now, avail_size, size_goal, ret;
129141
struct mptcp_sock *msk = mptcp_sk(sk);
130142
struct mptcp_ext *mpext = NULL;
143+
struct sk_buff *skb, *tail;
144+
bool can_collapse = false;
131145
struct page_frag *pfrag;
132-
struct sk_buff *skb;
133146
size_t psize;
134147

135148
/* use the mptcp page cache so that we can easily move the data
@@ -145,8 +158,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
145158

146159
/* compute copy limit */
147160
mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
148-
psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
161+
*pmss_now = mss_now;
162+
*ps_goal = size_goal;
163+
avail_size = size_goal;
164+
skb = tcp_write_queue_tail(ssk);
165+
if (skb) {
166+
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
167+
168+
/* Limit the write to the size available in the
169+
* current skb, if any, so that we create at most a new skb.
170+
* Explicitly tells TCP internals to avoid collapsing on later
171+
* queue management operation, to avoid breaking the ext <->
172+
* SSN association set here
173+
*/
174+
can_collapse = (size_goal - skb->len > 0) &&
175+
mptcp_skb_can_collapse_to(msk, skb, mpext);
176+
if (!can_collapse)
177+
TCP_SKB_CB(skb)->eor = 1;
178+
else
179+
avail_size = size_goal - skb->len;
180+
}
181+
psize = min_t(size_t, pfrag->size - pfrag->offset, avail_size);
149182

183+
/* Copy to page */
150184
pr_debug("left=%zu", msg_data_left(msg));
151185
psize = copy_page_from_iter(pfrag->page, pfrag->offset,
152186
min_t(size_t, msg_data_left(msg), psize),
@@ -155,21 +189,28 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
155189
if (!psize)
156190
return -EINVAL;
157191

158-
/* Mark the end of the previous write so the beginning of the
159-
* next write (with its own mptcp skb extension data) is not
160-
* collapsed.
192+
/* tell the TCP stack to delay the push so that we can safely
193+
* access the skb after the sendpages call
161194
*/
162-
skb = tcp_write_queue_tail(ssk);
163-
if (skb)
164-
TCP_SKB_CB(skb)->eor = 1;
165-
166195
ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
167196
msg->msg_flags | MSG_SENDPAGE_NOTLAST);
168197
if (ret <= 0)
169198
return ret;
170199
if (unlikely(ret < psize))
171200
iov_iter_revert(&msg->msg_iter, psize - ret);
172201

202+
/* if the tail skb extension is still the cached one, collapsing
203+
* really happened. Note: we can't check for 'same skb' as the sk_buff
204+
* hdr on tail can be transmitted, freed and re-allocated by the
205+
* do_tcp_sendpages() call
206+
*/
207+
tail = tcp_write_queue_tail(ssk);
208+
if (mpext && tail && mpext == skb_ext_find(tail, SKB_EXT_MPTCP)) {
209+
WARN_ON_ONCE(!can_collapse);
210+
mpext->data_len += ret;
211+
goto out;
212+
}
213+
173214
skb = tcp_write_queue_tail(ssk);
174215
mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
175216
msk->cached_ext = NULL;
@@ -185,11 +226,11 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
185226
mpext->data_seq, mpext->subflow_seq, mpext->data_len,
186227
mpext->dsn64);
187228

229+
out:
188230
pfrag->offset += ret;
189231
msk->write_seq += ret;
190232
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
191233

192-
tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
193234
return ret;
194235
}
195236

@@ -212,11 +253,11 @@ static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
212253

213254
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
214255
{
256+
int mss_now = 0, size_goal = 0, ret = 0;
215257
struct mptcp_sock *msk = mptcp_sk(sk);
216258
struct socket *ssock;
217259
size_t copied = 0;
218260
struct sock *ssk;
219-
int ret = 0;
220261
long timeo;
221262

222263
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
@@ -243,15 +284,19 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
243284

244285
lock_sock(ssk);
245286
while (msg_data_left(msg)) {
246-
ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
287+
ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
288+
&size_goal);
247289
if (ret < 0)
248290
break;
249291

250292
copied += ret;
251293
}
252294

253-
if (copied > 0)
295+
if (copied) {
254296
ret = copied;
297+
tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
298+
size_goal);
299+
}
255300

256301
ssk_check_wmem(msk, ssk);
257302
release_sock(ssk);

0 commit comments

Comments
 (0)