Skip to content

Commit 2f48771

Browse files
Jon Maloydavem330
authored andcommitted
tipc: guarantee that group broadcast doesn't bypass group unicast
We need a mechanism guaranteeing that group unicasts sent out from a socket are not bypassed by later sent broadcasts from the same socket. We do this as follows: - Each time a unicast is sent, we set a the broadcast method for the socket to "replicast" and "mandatory". This forces the first subsequent broadcast message to follow the same network and data path as the preceding unicast to a destination, hence preventing it from overtaking the latter. - In order to make the 'same data path' statement above true, we let group unicasts pass through the multicast link input queue, instead of as previously through the unicast link input queue. - In the first broadcast following a unicast, we set a new header flag, requiring all recipients to immediately acknowledge its reception. - During the period before all the expected acknowledges are received, the socket refuses to accept any more broadcast attempts, i.e., by blocking or returning EAGAIN. This period should typically not be longer than a few microseconds. - When all acknowledges have been received, the sending socket will open up for subsequent broadcasts, this time giving the link layer freedom to itself select the best transmission method. - The forced and/or abrupt transmission method changes described above may lead to broadcasts arriving out of order to the recipients. We remedy this by introducing code that checks and if necessary re-orders such messages at the receiving end. Signed-off-by: Jon Maloy <[email protected]> Acked-by: Ying Xue <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b87a5ea commit 2f48771

File tree

5 files changed

+94
-17
lines changed

5 files changed

+94
-17
lines changed

net/tipc/group.c

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ struct tipc_member {
7171
u16 advertised;
7272
u16 window;
7373
u16 bc_rcv_nxt;
74+
u16 bc_acked;
7475
bool usr_pending;
7576
};
7677

@@ -87,6 +88,7 @@ struct tipc_group {
8788
u32 portid;
8889
u16 member_cnt;
8990
u16 bc_snd_nxt;
91+
u16 bc_ackers;
9092
bool loopback;
9193
bool events;
9294
};
@@ -258,6 +260,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
258260
m->group = grp;
259261
m->node = node;
260262
m->port = port;
263+
m->bc_acked = grp->bc_snd_nxt - 1;
261264
grp->member_cnt++;
262265
tipc_group_add_to_tree(grp, m);
263266
tipc_nlist_add(&grp->dests, m->node);
@@ -275,6 +278,11 @@ static void tipc_group_delete_member(struct tipc_group *grp,
275278
{
276279
rb_erase(&m->tree_node, &grp->members);
277280
grp->member_cnt--;
281+
282+
/* Check if we were waiting for replicast ack from this member */
283+
if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
284+
grp->bc_ackers--;
285+
278286
list_del_init(&m->list);
279287
list_del_init(&m->congested);
280288

@@ -325,16 +333,23 @@ void tipc_group_update_member(struct tipc_member *m, int len)
325333
list_add_tail(&m->congested, &grp->congested);
326334
}
327335

328-
void tipc_group_update_bc_members(struct tipc_group *grp, int len)
336+
void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
329337
{
338+
u16 prev = grp->bc_snd_nxt - 1;
330339
struct tipc_member *m;
331340
struct rb_node *n;
332341

333342
for (n = rb_first(&grp->members); n; n = rb_next(n)) {
334343
m = container_of(n, struct tipc_member, tree_node);
335-
if (tipc_group_is_enabled(m))
344+
if (tipc_group_is_enabled(m)) {
336345
tipc_group_update_member(m, len);
346+
m->bc_acked = prev;
347+
}
337348
}
349+
350+
/* Mark number of acknowledges to expect, if any */
351+
if (ack)
352+
grp->bc_ackers = grp->member_cnt;
338353
grp->bc_snd_nxt++;
339354
}
340355

@@ -372,6 +387,10 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len)
372387
{
373388
struct tipc_member *m = NULL;
374389

390+
/* If prev bcast was replicast, reject until all receivers have acked */
391+
if (grp->bc_ackers)
392+
return true;
393+
375394
if (list_empty(&grp->congested))
376395
return false;
377396

@@ -391,7 +410,7 @@ static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
391410
struct sk_buff *_skb, *tmp;
392411
int mtyp = msg_type(hdr);
393412

394-
/* Bcast may be bypassed by unicast, - sort it in */
413+
/* Bcast may be bypassed by unicast or other bcast, - sort it in */
395414
if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
396415
skb_queue_walk_safe(defq, _skb, tmp) {
397416
_hdr = buf_msg(_skb);
@@ -412,10 +431,10 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
412431
struct sk_buff_head *xmitq)
413432
{
414433
struct sk_buff *skb = __skb_dequeue(inputq);
434+
bool ack, deliver, update;
415435
struct sk_buff_head *defq;
416436
struct tipc_member *m;
417437
struct tipc_msg *hdr;
418-
bool deliver, update;
419438
u32 node, port;
420439
int mtyp, blks;
421440

@@ -451,6 +470,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
451470
hdr = buf_msg(skb);
452471
mtyp = msg_type(hdr);
453472
deliver = true;
473+
ack = false;
454474
update = false;
455475

456476
if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
@@ -466,6 +486,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
466486
/* Fall thru */
467487
case TIPC_GRP_BCAST_MSG:
468488
m->bc_rcv_nxt++;
489+
ack = msg_grp_bc_ack_req(hdr);
469490
break;
470491
case TIPC_GRP_UCAST_MSG:
471492
break;
@@ -480,6 +501,9 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
480501
else
481502
kfree_skb(skb);
482503

504+
if (ack)
505+
tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
506+
483507
if (!update)
484508
continue;
485509

@@ -540,6 +564,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
540564
} else if (mtyp == GRP_ADV_MSG) {
541565
msg_set_adv_win(hdr, adv);
542566
m->advertised += adv;
567+
} else if (mtyp == GRP_ACK_MSG) {
568+
msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
543569
}
544570
__skb_queue_tail(xmitq, skb);
545571
}
@@ -593,7 +619,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
593619
}
594620
/* Otherwise deliver already received WITHDRAW event */
595621
__skb_queue_tail(inputq, m->event_msg);
596-
*usr_wakeup = m->usr_pending;
622+
*usr_wakeup = true;
597623
tipc_group_delete_member(grp, m);
598624
list_del_init(&m->congested);
599625
return;
@@ -605,6 +631,15 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
605631
m->usr_pending = false;
606632
list_del_init(&m->congested);
607633
return;
634+
case GRP_ACK_MSG:
635+
if (!m)
636+
return;
637+
m->bc_acked = msg_grp_bc_acked(hdr);
638+
if (--grp->bc_ackers)
639+
break;
640+
*usr_wakeup = true;
641+
m->usr_pending = false;
642+
return;
608643
default:
609644
pr_warn("Received unknown GROUP_PROTO message\n");
610645
}
@@ -678,7 +713,7 @@ void tipc_group_member_evt(struct tipc_group *grp,
678713

679714
TIPC_SKB_CB(skb)->orig_member = m->instance;
680715

681-
*usr_wakeup = m->usr_pending;
716+
*usr_wakeup = true;
682717
m->usr_pending = false;
683718

684719
/* Hold back event if more messages might be expected */

net/tipc/group.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,13 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
6161
struct tipc_msg *hdr,
6262
struct sk_buff_head *inputq,
6363
struct sk_buff_head *xmitq);
64-
void tipc_group_update_bc_members(struct tipc_group *grp, int len);
64+
void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
6565
bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
6666
int len, struct tipc_member **m);
6767
bool tipc_group_bc_cong(struct tipc_group *grp, int len);
6868
void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
6969
u32 port, struct sk_buff_head *xmitq);
7070
u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
7171
void tipc_group_update_member(struct tipc_member *m, int len);
72-
struct tipc_member *tipc_group_find_sender(struct tipc_group *grp,
73-
u32 node, u32 port);
7472
int tipc_group_size(struct tipc_group *grp);
7573
#endif

net/tipc/link.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,13 +1046,12 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
10461046
case TIPC_MEDIUM_IMPORTANCE:
10471047
case TIPC_HIGH_IMPORTANCE:
10481048
case TIPC_CRITICAL_IMPORTANCE:
1049-
if (unlikely(msg_mcast(hdr))) {
1049+
if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) {
10501050
skb_queue_tail(l->bc_rcvlink->inputq, skb);
10511051
return true;
10521052
}
1053-
case CONN_MANAGER:
10541053
case GROUP_PROTOCOL:
1055-
skb_queue_tail(inputq, skb);
1054+
case CONN_MANAGER:
10561055
return true;
10571056
case NAME_DISTRIBUTOR:
10581057
l->bc_rcvlink->state = LINK_ESTABLISHED;

net/tipc/msg.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
547547
#define GRP_JOIN_MSG 0
548548
#define GRP_LEAVE_MSG 1
549549
#define GRP_ADV_MSG 2
550+
#define GRP_ACK_MSG 3
550551

551552
/*
552553
* Word 1
@@ -839,6 +840,16 @@ static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n)
839840
msg_set_bits(m, 9, 16, 0xffff, n);
840841
}
841842

843+
static inline u16 msg_grp_bc_acked(struct tipc_msg *m)
844+
{
845+
return msg_bits(m, 9, 16, 0xffff);
846+
}
847+
848+
static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
849+
{
850+
msg_set_bits(m, 9, 16, 0xffff, n);
851+
}
852+
842853
/* Word 10
843854
*/
844855
static inline u16 msg_grp_evt(struct tipc_msg *m)
@@ -851,6 +862,16 @@ static inline void msg_set_grp_evt(struct tipc_msg *m, int n)
851862
msg_set_bits(m, 10, 0, 0x3, n);
852863
}
853864

865+
static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m)
866+
{
867+
return msg_bits(m, 10, 0, 0x1);
868+
}
869+
870+
static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n)
871+
{
872+
msg_set_bits(m, 10, 0, 0x1, n);
873+
}
874+
854875
static inline u16 msg_grp_bc_seqno(struct tipc_msg *m)
855876
{
856877
return msg_bits(m, 10, 16, 0xffff);

net/tipc/socket.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
831831
u32 dnode, u32 dport, int dlen)
832832
{
833833
u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
834+
struct tipc_mc_method *method = &tsk->mc_method;
834835
int blks = tsk_blocks(GROUP_H_SIZE + dlen);
835836
struct tipc_msg *hdr = &tsk->phdr;
836837
struct sk_buff_head pkts;
@@ -857,9 +858,12 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
857858
tsk->cong_link_cnt++;
858859
}
859860

860-
/* Update send window and sequence number */
861+
/* Update send window */
861862
tipc_group_update_member(mb, blks);
862863

864+
/* A broadcast sent within next EXPIRE period must follow same path */
865+
method->rcast = true;
866+
method->mandatory = true;
863867
return dlen;
864868
}
865869

@@ -1008,6 +1012,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
10081012
struct tipc_group *grp = tsk->group;
10091013
struct tipc_nlist *dsts = tipc_group_dests(grp);
10101014
struct tipc_mc_method *method = &tsk->mc_method;
1015+
bool ack = method->mandatory && method->rcast;
10111016
int blks = tsk_blocks(MCAST_H_SIZE + dlen);
10121017
struct tipc_msg *hdr = &tsk->phdr;
10131018
int mtu = tipc_bcast_get_mtu(net);
@@ -1036,20 +1041,27 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
10361041
msg_set_destnode(hdr, 0);
10371042
msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
10381043

1044+
/* Avoid getting stuck with repeated forced replicasts */
1045+
msg_set_grp_bc_ack_req(hdr, ack);
1046+
10391047
/* Build message as chain of buffers */
10401048
skb_queue_head_init(&pkts);
10411049
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
10421050
if (unlikely(rc != dlen))
10431051
return rc;
10441052

10451053
/* Send message */
1046-
rc = tipc_mcast_xmit(net, &pkts, method, dsts,
1047-
&tsk->cong_link_cnt);
1054+
rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
10481055
if (unlikely(rc))
10491056
return rc;
10501057

10511058
/* Update broadcast sequence number and send windows */
1052-
tipc_group_update_bc_members(tsk->group, blks);
1059+
tipc_group_update_bc_members(tsk->group, blks, ack);
1060+
1061+
/* Broadcast link is now free to choose method for next broadcast */
1062+
method->mandatory = false;
1063+
method->expires = jiffies;
1064+
10531065
return dlen;
10541066
}
10551067

@@ -1113,14 +1125,26 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
11131125
u32 portid, oport, onode;
11141126
struct list_head dports;
11151127
struct tipc_msg *msg;
1116-
int hsz;
1128+
int user, mtyp, hsz;
11171129

11181130
__skb_queue_head_init(&tmpq);
11191131
INIT_LIST_HEAD(&dports);
11201132

11211133
skb = tipc_skb_peek(arrvq, &inputq->lock);
11221134
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
11231135
msg = buf_msg(skb);
1136+
user = msg_user(msg);
1137+
mtyp = msg_type(msg);
1138+
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
1139+
spin_lock_bh(&inputq->lock);
1140+
if (skb_peek(arrvq) == skb) {
1141+
__skb_dequeue(arrvq);
1142+
__skb_queue_tail(inputq, skb);
1143+
}
1144+
refcount_dec(&skb->users);
1145+
spin_unlock_bh(&inputq->lock);
1146+
continue;
1147+
}
11241148
hsz = skb_headroom(skb) + msg_hdr_sz(msg);
11251149
oport = msg_origport(msg);
11261150
onode = msg_orignode(msg);

0 commit comments

Comments
 (0)