Skip to content

Commit b5b6ff7

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-sockmap-fixes'
John Fastabend says: ==================== When I added the test_sockmap to selftests I mistakenly changed the test logic a bit. The result of this was on redirect cases we ended up choosing the wrong sock from the BPF program and ended up sending to a socket that had no receive handler. The result was the actual receive handler, running on a different socket, is timing out and closing the socket. This results in errors (-EPIPE to be specific) on the sending side. Typically happening if the sender does not complete the send before the receive side times out. So depending on timing and the size of the send we may get errors. This exposed some bugs in the sockmap error path handling. This series fixes the errors. The primary issue is we did not do proper memory accounting in these cases which resulted in missing a sk_mem_uncharge(). This happened in the redirect path and in one case on the normal send path. See the three patches for the details. The other take-away from this is we need to fix the test_sockmap and also add more negative test cases. That will happen in bpf-next. Finally, I tested this using the existing test_sockmap program, the older sockmap sample test script, and a few real use cases with Cilium. All of these seem to be in working correctly. v2: fix compiler warning, drop iterator variable 'i' that is no longer used in patch 3. ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 0f58e58 + abaeb09 commit b5b6ff7

File tree

1 file changed

+26
-22
lines changed

1 file changed

+26
-22
lines changed

kernel/bpf/sockmap.c

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -326,15 +326,16 @@ static int bpf_tcp_push(struct sock *sk, int apply_bytes,
326326
if (ret > 0) {
327327
if (apply)
328328
apply_bytes -= ret;
329+
330+
sg->offset += ret;
331+
sg->length -= ret;
329332
size -= ret;
330333
offset += ret;
331334
if (uncharge)
332335
sk_mem_uncharge(sk, ret);
333336
goto retry;
334337
}
335338

336-
sg->length = size;
337-
sg->offset = offset;
338339
return ret;
339340
}
340341

@@ -392,7 +393,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
392393
} while (i != md->sg_end);
393394
}
394395

395-
static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
396+
static void free_bytes_sg(struct sock *sk, int bytes,
397+
struct sk_msg_buff *md, bool charge)
396398
{
397399
struct scatterlist *sg = md->sg_data;
398400
int i = md->sg_start, free;
@@ -402,11 +404,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
402404
if (bytes < free) {
403405
sg[i].length -= bytes;
404406
sg[i].offset += bytes;
405-
sk_mem_uncharge(sk, bytes);
407+
if (charge)
408+
sk_mem_uncharge(sk, bytes);
406409
break;
407410
}
408411

409-
sk_mem_uncharge(sk, sg[i].length);
412+
if (charge)
413+
sk_mem_uncharge(sk, sg[i].length);
410414
put_page(sg_page(&sg[i]));
411415
bytes -= sg[i].length;
412416
sg[i].length = 0;
@@ -417,6 +421,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
417421
if (i == MAX_SKB_FRAGS)
418422
i = 0;
419423
}
424+
md->sg_start = i;
420425
}
421426

422427
static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
@@ -575,10 +580,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
575580
struct sk_msg_buff *md,
576581
int flags)
577582
{
583+
bool ingress = !!(md->flags & BPF_F_INGRESS);
578584
struct smap_psock *psock;
579585
struct scatterlist *sg;
580-
int i, err, free = 0;
581-
bool ingress = !!(md->flags & BPF_F_INGRESS);
586+
int err = 0;
582587

583588
sg = md->sg_data;
584589

@@ -606,16 +611,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
606611
out_rcu:
607612
rcu_read_unlock();
608613
out:
609-
i = md->sg_start;
610-
while (sg[i].length) {
611-
free += sg[i].length;
612-
put_page(sg_page(&sg[i]));
613-
sg[i].length = 0;
614-
i++;
615-
if (i == MAX_SKB_FRAGS)
616-
i = 0;
617-
}
618-
return free;
614+
free_bytes_sg(NULL, send, md, false);
615+
return err;
619616
}
620617

621618
static inline void bpf_md_init(struct smap_psock *psock)
@@ -700,19 +697,26 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
700697
err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
701698
lock_sock(sk);
702699

700+
if (unlikely(err < 0)) {
701+
free_start_sg(sk, m);
702+
psock->sg_size = 0;
703+
if (!cork)
704+
*copied -= send;
705+
} else {
706+
psock->sg_size -= send;
707+
}
708+
703709
if (cork) {
704710
free_start_sg(sk, m);
711+
psock->sg_size = 0;
705712
kfree(m);
706713
m = NULL;
714+
err = 0;
707715
}
708-
if (unlikely(err))
709-
*copied -= err;
710-
else
711-
psock->sg_size -= send;
712716
break;
713717
case __SK_DROP:
714718
default:
715-
free_bytes_sg(sk, send, m);
719+
free_bytes_sg(sk, send, m, true);
716720
apply_bytes_dec(psock, send);
717721
*copied -= send;
718722
psock->sg_size -= send;

0 commit comments

Comments
 (0)