Skip to content

Commit 04d7b57

Browse files
Jon Maloydavem330
authored andcommitted
tipc: add multipoint-to-point flow control
We already have point-to-multipoint flow control within a group. But we even need the opposite; -a scheme which can handle that potentially hundreds of sources may try to send messages to the same destination simultaneously without causing buffer overflow at the recipient. This commit adds such a mechanism. The algorithm works as follows: - When a member detects a new, joining member, it initially set its state to JOINED and advertises a minimum window to the new member. This window is chosen so that the new member can send exactly one maximum sized message, or several smaller ones, to the recipient before it must stop and wait for an additional advertisement. This minimum window ADV_IDLE is set to 65 1kB blocks. - When a member receives the first data message from a JOINED member, it changes the state of the latter to ACTIVE, and advertises a larger window ADV_ACTIVE = 12 x ADV_IDLE blocks to the sender, so it can continue sending with minimal disturbances to the data flow. - The active members are kept in a dedicated linked list. Each time a message is received from an active member, it will be moved to the tail of that list. This way, we keep a record of which members have been most (tail) and least (head) recently active. - There is a maximum number (16) of permitted simultaneous active senders per receiver. When this limit is reached, the receiver will not advertise anything immediately to a new sender, but instead put it in a PENDING state, and add it to a corresponding queue. At the same time, it will pick the least recently active member, send it an advertisement RECLAIM message, and set this member to state RECLAIMING. - The reclaimee member has to respond with a REMIT message, meaning that it goes back to a send window of ADV_IDLE, and returns its unused advertised blocks beyond that value to the reclaiming member. - When the reclaiming member receives the REMIT message, it unlinks the reclaimee from its active list, resets its state to JOINED, and notes that it is now back at ADV_IDLE advertised blocks to that member. If there are still unread data messages sent out by reclaimee before the REMIT, the member goes into an intermediate state REMITTED, where it stays until the said messages have been consumed. - The returned advertised blocks can now be re-advertised to the pending member, which is now set to state ACTIVE and added to the active member list. - To be proactive, i.e., to minimize the risk that any member will end up in the pending queue, we start reclaiming resources already when the number of active members exceeds 3/4 of the permitted maximum. Signed-off-by: Jon Maloy <[email protected]> Acked-by: Ying Xue <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a3bada7 commit 04d7b57

File tree

2 files changed

+136
-5
lines changed

2 files changed

+136
-5
lines changed

net/tipc/group.c

Lines changed: 124 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ enum mbr_state {
5454
MBR_JOINING,
5555
MBR_PUBLISHED,
5656
MBR_JOINED,
57+
MBR_PENDING,
58+
MBR_ACTIVE,
59+
MBR_RECLAIMING,
60+
MBR_REMITTED,
5761
MBR_LEAVING
5862
};
5963

@@ -79,6 +83,9 @@ struct tipc_member {
7983
struct tipc_group {
8084
struct rb_root members;
8185
struct list_head congested;
86+
struct list_head pending;
87+
struct list_head active;
88+
struct list_head reclaiming;
8289
struct tipc_nlist dests;
8390
struct net *net;
8491
int subid;
@@ -88,6 +95,8 @@ struct tipc_group {
8895
u32 scope;
8996
u32 portid;
9097
u16 member_cnt;
98+
u16 active_cnt;
99+
u16 max_active;
91100
u16 bc_snd_nxt;
92101
u16 bc_ackers;
93102
bool loopback;
@@ -97,12 +106,29 @@ struct tipc_group {
97106
static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
98107
int mtyp, struct sk_buff_head *xmitq);
99108

109+
static void tipc_group_decr_active(struct tipc_group *grp,
110+
struct tipc_member *m)
111+
{
112+
if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
113+
grp->active_cnt--;
114+
}
115+
100116
static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
101117
{
118+
int max_active, active_pool, idle_pool;
102119
int mcnt = grp->member_cnt + 1;
103120

121+
/* Limit simultaneous reception from other members */
122+
max_active = min(mcnt / 8, 64);
123+
max_active = max(max_active, 16);
124+
grp->max_active = max_active;
125+
126+
/* Reserve blocks for active and idle members */
127+
active_pool = max_active * ADV_ACTIVE;
128+
idle_pool = (mcnt - max_active) * ADV_IDLE;
129+
104130
/* Scale to bytes, considering worst-case truesize/msgsize ratio */
105-
return mcnt * ADV_ACTIVE * FLOWCTL_BLK_SZ * 4;
131+
return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
106132
}
107133

108134
u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
@@ -143,6 +169,9 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid,
143169
return NULL;
144170
tipc_nlist_init(&grp->dests, tipc_own_addr(net));
145171
INIT_LIST_HEAD(&grp->congested);
172+
INIT_LIST_HEAD(&grp->active);
173+
INIT_LIST_HEAD(&grp->pending);
174+
INIT_LIST_HEAD(&grp->reclaiming);
146175
grp->members = RB_ROOT;
147176
grp->net = net;
148177
grp->portid = portid;
@@ -286,6 +315,7 @@ static void tipc_group_delete_member(struct tipc_group *grp,
286315

287316
list_del_init(&m->list);
288317
list_del_init(&m->congested);
318+
tipc_group_decr_active(grp, m);
289319

290320
/* If last member on a node, remove node from dest list */
291321
if (!tipc_group_find_node(grp, m->node))
@@ -378,6 +408,10 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
378408
return true;
379409
if (state == MBR_JOINED && adv == ADV_IDLE)
380410
return true;
411+
if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
412+
return true;
413+
if (state == MBR_PENDING && adv == ADV_IDLE)
414+
return true;
381415
skb_queue_head_init(&xmitq);
382416
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
383417
tipc_node_distr_xmit(grp->net, &xmitq);
@@ -523,7 +557,11 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
523557
void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
524558
u32 port, struct sk_buff_head *xmitq)
525559
{
526-
struct tipc_member *m;
560+
struct list_head *active = &grp->active;
561+
int max_active = grp->max_active;
562+
int reclaim_limit = max_active * 3 / 4;
563+
int active_cnt = grp->active_cnt;
564+
struct tipc_member *m, *rm;
527565

528566
m = tipc_group_find_member(grp, node, port);
529567
if (!m)
@@ -533,9 +571,41 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
533571

534572
switch (m->state) {
535573
case MBR_JOINED:
536-
if (m->advertised <= (ADV_ACTIVE - ADV_UNIT))
574+
/* Reclaim advertised space from least active member */
575+
if (!list_empty(active) && active_cnt >= reclaim_limit) {
576+
rm = list_first_entry(active, struct tipc_member, list);
577+
rm->state = MBR_RECLAIMING;
578+
list_move_tail(&rm->list, &grp->reclaiming);
579+
tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
580+
}
581+
/* If max active, become pending and wait for reclaimed space */
582+
if (active_cnt >= max_active) {
583+
m->state = MBR_PENDING;
584+
list_add_tail(&m->list, &grp->pending);
585+
break;
586+
}
587+
/* Otherwise become active */
588+
m->state = MBR_ACTIVE;
589+
list_add_tail(&m->list, &grp->active);
590+
grp->active_cnt++;
591+
/* Fall through */
592+
case MBR_ACTIVE:
593+
if (!list_is_last(&m->list, &grp->active))
594+
list_move_tail(&m->list, &grp->active);
595+
if (m->advertised > (ADV_ACTIVE * 3 / 4))
596+
break;
597+
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
598+
break;
599+
case MBR_REMITTED:
600+
if (m->advertised > ADV_IDLE)
601+
break;
602+
m->state = MBR_JOINED;
603+
if (m->advertised < ADV_IDLE) {
604+
pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
537605
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
606+
}
538607
break;
608+
case MBR_RECLAIMING:
539609
case MBR_DISCOVERED:
540610
case MBR_JOINING:
541611
case MBR_LEAVING:
@@ -557,8 +627,10 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
557627
if (!skb)
558628
return;
559629

560-
if (m->state == MBR_JOINED)
630+
if (m->state == MBR_ACTIVE)
561631
adv = ADV_ACTIVE - m->advertised;
632+
else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
633+
adv = ADV_IDLE - m->advertised;
562634

563635
hdr = buf_msg(skb);
564636

@@ -573,6 +645,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
573645
m->advertised += adv;
574646
} else if (mtyp == GRP_ACK_MSG) {
575647
msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
648+
} else if (mtyp == GRP_REMIT_MSG) {
649+
msg_set_grp_remitted(hdr, m->window);
576650
}
577651
__skb_queue_tail(xmitq, skb);
578652
}
@@ -583,8 +657,9 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
583657
{
584658
u32 node = msg_orignode(hdr);
585659
u32 port = msg_origport(hdr);
586-
struct tipc_member *m;
660+
struct tipc_member *m, *pm;
587661
struct tipc_msg *ehdr;
662+
u16 remitted, in_flight;
588663

589664
if (!grp)
590665
return;
@@ -626,6 +701,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
626701

627702
/* Wait until WITHDRAW event is received */
628703
if (m->state != MBR_LEAVING) {
704+
tipc_group_decr_active(grp, m);
629705
m->state = MBR_LEAVING;
630706
return;
631707
}
@@ -653,6 +729,48 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
653729
*usr_wakeup = true;
654730
m->usr_pending = false;
655731
return;
732+
case GRP_RECLAIM_MSG:
733+
if (!m)
734+
return;
735+
*usr_wakeup = m->usr_pending;
736+
m->usr_pending = false;
737+
tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
738+
m->window = ADV_IDLE;
739+
return;
740+
case GRP_REMIT_MSG:
741+
if (!m || m->state != MBR_RECLAIMING)
742+
return;
743+
744+
list_del_init(&m->list);
745+
grp->active_cnt--;
746+
remitted = msg_grp_remitted(hdr);
747+
748+
/* Messages preceding the REMIT still in receive queue */
749+
if (m->advertised > remitted) {
750+
m->state = MBR_REMITTED;
751+
in_flight = m->advertised - remitted;
752+
}
753+
/* All messages preceding the REMIT have been read */
754+
if (m->advertised <= remitted) {
755+
m->state = MBR_JOINED;
756+
in_flight = 0;
757+
}
758+
/* ..and the REMIT overtaken by more messages => re-advertise */
759+
if (m->advertised < remitted)
760+
tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
761+
762+
m->advertised = ADV_IDLE + in_flight;
763+
764+
/* Set oldest pending member to active and advertise */
765+
if (list_empty(&grp->pending))
766+
return;
767+
pm = list_first_entry(&grp->pending, struct tipc_member, list);
768+
pm->state = MBR_ACTIVE;
769+
list_move_tail(&pm->list, &grp->active);
770+
grp->active_cnt++;
771+
if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
772+
tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
773+
return;
656774
default:
657775
pr_warn("Received unknown GROUP_PROTO message\n");
658776
}
@@ -735,6 +853,7 @@ void tipc_group_member_evt(struct tipc_group *grp,
735853
/* Hold back event if more messages might be expected */
736854
if (m->state != MBR_LEAVING && node_up) {
737855
m->event_msg = skb;
856+
tipc_group_decr_active(grp, m);
738857
m->state = MBR_LEAVING;
739858
} else {
740859
if (node_up)

net/tipc/msg.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
548548
#define GRP_LEAVE_MSG 1
549549
#define GRP_ADV_MSG 2
550550
#define GRP_ACK_MSG 3
551+
#define GRP_RECLAIM_MSG 4
552+
#define GRP_REMIT_MSG 5
551553

552554
/*
553555
* Word 1
@@ -850,6 +852,16 @@ static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
850852
msg_set_bits(m, 9, 16, 0xffff, n);
851853
}
852854

855+
static inline u16 msg_grp_remitted(struct tipc_msg *m)
856+
{
857+
return msg_bits(m, 9, 16, 0xffff);
858+
}
859+
860+
static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n)
861+
{
862+
msg_set_bits(m, 9, 16, 0xffff, n);
863+
}
864+
853865
/* Word 10
854866
*/
855867
static inline u16 msg_grp_evt(struct tipc_msg *m)

0 commit comments

Comments
 (0)