Skip to content

Commit 1a20cc2

Browse files
Jon Paul Maloydavem330
authored andcommitted
tipc: introduce node contact FSM
The logics for determining when a node is permitted to establish and maintain contact with its peer node becomes non-trivial in the presence of multiple parallel links that may come and go independently. A known failure scenario is that one endpoint registers both its links to the peer lost, cleans up it binding table, and prepares for a table update once contact is re-establihed, while the other endpoint may see its links reset and re-established one by one, hence seeing no need to re-synchronize the binding table. To avoid this, a node must not allow re-establishing contact until it has confirmation that even the peer has lost both links. Currently, the mechanism for handling this consists of setting and resetting two state flags from different locations in the code. This solution is hard to understand and maintain. A closer analysis even reveals that it is not completely safe. In this commit we do instead introduce an FSM that keeps track of the conditions for when the node can establish and maintain links. It has six states and four events, and is strictly based on explicit knowledge about the own node's and the peer node's contact states. Only events leading to state change are shown as edges in the figure below. +--------------+ | SELF_UP/ | +---------------->| PEER_COMING |-----------------+ SELF_ | +--------------+ |PEER_ ESTBL_ | | |ESTBL_ CONTACT| SELF_LOST_CONTACT | |CONTACT | v | | +--------------+ | | PEER_ | SELF_DOWN/ | SELF_ | | LOST_ +--| PEER_LEAVING |<--+ LOST_ v +-------------+ CONTACT | +--------------+ | CONTACT +-----------+ | SELF_DOWN/ |<----------+ +----------| SELF_UP/ | | PEER_DOWN |<----------+ +----------| PEER_UP | +-------------+ SELF_ | +--------------+ | PEER_ +-----------+ | LOST_ +--| SELF_LEAVING/|<--+ LOST_ A | CONTACT | PEER_DOWN | CONTACT | | +--------------+ | | A | PEER_ | PEER_LOST_CONTACT | |SELF_ ESTBL_ | | |ESTBL_ CONTACT| +--------------+ |CONTACT +---------------->| PEER_UP/ |-----------------+ | SELF_COMING | +--------------+ Reviewed-by: Ying Xue <[email protected]> Signed-off-by: Jon Maloy <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 8a1577c commit 1a20cc2

File tree

4 files changed

+185
-54
lines changed

4 files changed

+185
-54
lines changed

net/tipc/link.c

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -911,9 +911,13 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
911911

912912
if (l_ptr->addr) {
913913
/* Handle failure on standard link */
914-
link_print(l_ptr, "Resetting link\n");
914+
link_print(l_ptr, "Resetting link ");
915+
pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
916+
msg_user(msg), msg_type(msg), msg_size(msg),
917+
msg_errcode(msg));
918+
pr_info("sqno %u, prev: %x, src: %x\n",
919+
msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg));
915920
tipc_link_reset(l_ptr);
916-
917921
} else {
918922
/* Handle failure on broadcast link */
919923
struct tipc_node *n_ptr;
@@ -1067,15 +1071,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
10671071
if (unlikely(!l_ptr))
10681072
goto unlock;
10691073

1070-
/* Verify that communication with node is currently allowed */
1071-
if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) &&
1072-
msg_user(msg) == LINK_PROTOCOL &&
1073-
(msg_type(msg) == RESET_MSG ||
1074-
msg_type(msg) == ACTIVATE_MSG) &&
1075-
!msg_redundant_link(msg))
1076-
n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN;
1077-
1078-
if (tipc_node_blocked(n_ptr))
1074+
/* Is reception of this pkt permitted at the moment ? */
1075+
if (!tipc_node_filter_skb(n_ptr, msg))
10791076
goto unlock;
10801077

10811078
/* Validate message sequence number info */
@@ -1371,15 +1368,6 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
13711368
if (less_eq(msg_session(msg), l_ptr->peer_session))
13721369
break; /* duplicate or old reset: ignore */
13731370
}
1374-
1375-
if (!msg_redundant_link(msg) && (link_working(l_ptr) ||
1376-
link_probing(l_ptr))) {
1377-
/* peer has lost contact -- don't allow peer's links
1378-
* to reactivate before we recognize loss & clean up
1379-
*/
1380-
l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN;
1381-
}
1382-
13831371
link_state_event(l_ptr, RESET_MSG);
13841372

13851373
/* fall thru' */
@@ -1408,6 +1396,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
14081396
l_ptr->peer_session = msg_session(msg);
14091397
l_ptr->peer_bearer_id = msg_bearer_id(msg);
14101398

1399+
if (!msg_peer_is_up(msg))
1400+
tipc_node_fsm_evt(l_ptr->owner, PEER_LOST_CONTACT_EVT);
14111401
if (msg_type(msg) == ACTIVATE_MSG)
14121402
link_state_event(l_ptr, ACTIVATE_MSG);
14131403
break;
@@ -1419,11 +1409,11 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
14191409

14201410
if (msg_linkprio(msg) &&
14211411
(msg_linkprio(msg) != l_ptr->priority)) {
1422-
pr_debug("%s<%s>, priority change %u->%u\n",
1423-
link_rst_msg, l_ptr->name,
1424-
l_ptr->priority, msg_linkprio(msg));
1412+
pr_info("%s<%s>, priority change %u->%u\n",
1413+
link_rst_msg, l_ptr->name,
1414+
l_ptr->priority, msg_linkprio(msg));
14251415
l_ptr->priority = msg_linkprio(msg);
1426-
tipc_link_reset(l_ptr); /* Enforce change to take effect */
1416+
tipc_link_reset(l_ptr);
14271417
break;
14281418
}
14291419

@@ -1446,15 +1436,18 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
14461436
tipc_bclink_update_link_state(l_ptr->owner,
14471437
msg_last_bcast(msg));
14481438

1449-
if (rec_gap || (msg_probe(msg))) {
1439+
if (rec_gap || (msg_probe(msg)))
14501440
tipc_link_proto_xmit(l_ptr, STATE_MSG, 0,
14511441
rec_gap, 0, 0);
1452-
}
1442+
14531443
if (msg_seq_gap(msg)) {
14541444
l_ptr->stats.recv_nacks++;
14551445
tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq),
14561446
msg_seq_gap(msg));
14571447
}
1448+
if (tipc_link_is_up(l_ptr))
1449+
tipc_node_fsm_evt(l_ptr->owner,
1450+
PEER_ESTABL_CONTACT_EVT);
14581451
break;
14591452
}
14601453
exit:
@@ -1478,10 +1471,6 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
14781471
if (l->exec_mode == TIPC_LINK_BLOCKED)
14791472
return;
14801473

1481-
/* Abort non-RESET send if communication with node is prohibited */
1482-
if ((tipc_node_blocked(l->owner)) && (mtyp != RESET_MSG))
1483-
return;
1484-
14851474
msg_set_type(hdr, mtyp);
14861475
msg_set_net_plane(hdr, l->net_plane);
14871476
msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
@@ -1799,27 +1788,28 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
17991788
l_ptr->stats.recv_info = l_ptr->rcv_nxt;
18001789
}
18011790

1802-
static void link_print(struct tipc_link *l_ptr, const char *str)
1791+
static void link_print(struct tipc_link *l, const char *str)
18031792
{
1804-
struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id);
1805-
struct tipc_bearer *b_ptr;
1793+
struct sk_buff *hskb = skb_peek(&l->transmq);
1794+
u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt;
1795+
u16 tail = l->snd_nxt - 1;
18061796

1807-
rcu_read_lock();
1808-
b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]);
1809-
if (b_ptr)
1810-
pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name);
1811-
rcu_read_unlock();
1797+
pr_info("%s Link <%s>:", str, l->name);
18121798

1813-
if (link_probing(l_ptr))
1799+
if (link_probing(l))
18141800
pr_cont(":P\n");
1815-
else if (link_establishing(l_ptr))
1801+
else if (link_establishing(l))
18161802
pr_cont(":E\n");
1817-
else if (link_resetting(l_ptr))
1803+
else if (link_resetting(l))
18181804
pr_cont(":R\n");
1819-
else if (link_working(l_ptr))
1805+
else if (link_working(l))
18201806
pr_cont(":W\n");
18211807
else
18221808
pr_cont("\n");
1809+
1810+
pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n",
1811+
skb_queue_len(&l->transmq), head, tail,
1812+
skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt);
18231813
}
18241814

18251815
/* Parse and validate nested (link) properties valid for media, bearer and link

net/tipc/msg.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,13 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
766766
msg_set_bits(m, 9, 0, 0xffff, n);
767767
}
768768

769+
static inline bool msg_peer_is_up(struct tipc_msg *m)
770+
{
771+
if (likely(msg_user(m) != LINK_PROTOCOL) || (msg_type(m) == STATE_MSG))
772+
return true;
773+
return msg_redundant_link(m);
774+
}
775+
769776
struct sk_buff *tipc_buf_acquire(u32 size);
770777
bool tipc_msg_validate(struct sk_buff *skb);
771778
bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,

net/tipc/node.c

Lines changed: 125 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
141141
break;
142142
}
143143
list_add_tail_rcu(&n_ptr->list, &temp_node->list);
144-
n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
144+
n_ptr->state = SELF_DOWN_PEER_DOWN;
145145
n_ptr->signature = INVALID_NODE_SIG;
146146
n_ptr->active_links[0] = INVALID_BEARER_ID;
147147
n_ptr->active_links[1] = INVALID_BEARER_ID;
@@ -421,8 +421,131 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
421421
}
422422
}
423423

424+
/* tipc_node_fsm_evt - node finite state machine
425+
* Determines when contact is allowed with peer node
426+
*/
427+
void tipc_node_fsm_evt(struct tipc_node *n, int evt)
428+
{
429+
int state = n->state;
430+
431+
switch (state) {
432+
case SELF_DOWN_PEER_DOWN:
433+
switch (evt) {
434+
case SELF_ESTABL_CONTACT_EVT:
435+
state = SELF_UP_PEER_COMING;
436+
break;
437+
case PEER_ESTABL_CONTACT_EVT:
438+
state = SELF_COMING_PEER_UP;
439+
break;
440+
case SELF_LOST_CONTACT_EVT:
441+
case PEER_LOST_CONTACT_EVT:
442+
break;
443+
default:
444+
pr_err("Unknown node fsm evt %x/%x\n", state, evt);
445+
}
446+
break;
447+
case SELF_UP_PEER_UP:
448+
switch (evt) {
449+
case SELF_LOST_CONTACT_EVT:
450+
state = SELF_DOWN_PEER_LEAVING;
451+
break;
452+
case PEER_LOST_CONTACT_EVT:
453+
state = SELF_LEAVING_PEER_DOWN;
454+
break;
455+
case SELF_ESTABL_CONTACT_EVT:
456+
case PEER_ESTABL_CONTACT_EVT:
457+
break;
458+
default:
459+
pr_err("Unknown node fsm evt %x/%x\n", state, evt);
460+
}
461+
break;
462+
case SELF_DOWN_PEER_LEAVING:
463+
switch (evt) {
464+
case PEER_LOST_CONTACT_EVT:
465+
state = SELF_DOWN_PEER_DOWN;
466+
break;
467+
case SELF_ESTABL_CONTACT_EVT:
468+
case PEER_ESTABL_CONTACT_EVT:
469+
case SELF_LOST_CONTACT_EVT:
470+
break;
471+
default:
472+
pr_err("Unknown node fsm evt %x/%x\n", state, evt);
473+
}
474+
break;
475+
case SELF_UP_PEER_COMING:
476+
switch (evt) {
477+
case PEER_ESTABL_CONTACT_EVT:
478+
state = SELF_UP_PEER_UP;
479+
break;
480+
case SELF_LOST_CONTACT_EVT:
481+
state = SELF_DOWN_PEER_LEAVING;
482+
break;
483+
case SELF_ESTABL_CONTACT_EVT:
484+
case PEER_LOST_CONTACT_EVT:
485+
break;
486+
default:
487+
pr_err("Unknown node fsm evt %x/%x\n", state, evt);
488+
}
489+
break;
490+
case SELF_COMING_PEER_UP:
491+
switch (evt) {
492+
case SELF_ESTABL_CONTACT_EVT:
493+
state = SELF_UP_PEER_UP;
494+
break;
495+
case PEER_LOST_CONTACT_EVT:
496+
state = SELF_LEAVING_PEER_DOWN;
497+
break;
498+
case SELF_LOST_CONTACT_EVT:
499+
case PEER_ESTABL_CONTACT_EVT:
500+
break;
501+
default:
502+
pr_err("Unknown node fsm evt %x/%x\n", state, evt);
503+
}
504+
break;
505+
case SELF_LEAVING_PEER_DOWN:
506+
switch (evt) {
507+
case SELF_LOST_CONTACT_EVT:
508+
state = SELF_DOWN_PEER_DOWN;
509+
break;
510+
case SELF_ESTABL_CONTACT_EVT:
511+
case PEER_ESTABL_CONTACT_EVT:
512+
case PEER_LOST_CONTACT_EVT:
513+
break;
514+
default:
515+
pr_err("Unknown node fsm evt %x/%x\n", state, evt);
516+
}
517+
break;
518+
default:
519+
pr_err("Unknown node fsm state %x\n", state);
520+
break;
521+
}
522+
523+
n->state = state;
524+
}
525+
526+
bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_msg *hdr)
527+
{
528+
int state = n->state;
529+
530+
if (likely(state == SELF_UP_PEER_UP))
531+
return true;
532+
if (state == SELF_DOWN_PEER_DOWN)
533+
return true;
534+
if (state == SELF_UP_PEER_COMING)
535+
return true;
536+
if (state == SELF_COMING_PEER_UP)
537+
return true;
538+
if (state == SELF_LEAVING_PEER_DOWN)
539+
return false;
540+
if (state == SELF_DOWN_PEER_LEAVING)
541+
if (!msg_peer_is_up(hdr))
542+
return true;
543+
return false;
544+
}
545+
424546
static void node_established_contact(struct tipc_node *n_ptr)
425547
{
548+
tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT);
426549
n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
427550
n_ptr->bclink.oos_state = 0;
428551
n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
@@ -468,11 +591,8 @@ static void node_lost_contact(struct tipc_node *n_ptr)
468591
l_ptr->failover_skb = NULL;
469592
tipc_link_reset_fragments(l_ptr);
470593
}
471-
472-
n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
473-
474594
/* Prevent re-contact with node until cleanup is done */
475-
n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN;
595+
tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT);
476596

477597
/* Notify publications from this node */
478598
n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;

net/tipc/node.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,24 @@
4747

4848
#define INVALID_BEARER_ID -1
4949

50+
/* Node FSM states and events:
51+
*/
52+
enum {
53+
SELF_DOWN_PEER_DOWN = 0xdd,
54+
SELF_UP_PEER_UP = 0xaa,
55+
SELF_DOWN_PEER_LEAVING = 0xd1,
56+
SELF_UP_PEER_COMING = 0xac,
57+
SELF_COMING_PEER_UP = 0xca,
58+
SELF_LEAVING_PEER_DOWN = 0x1d,
59+
};
60+
61+
enum {
62+
SELF_ESTABL_CONTACT_EVT = 0xec,
63+
SELF_LOST_CONTACT_EVT = 0x1c,
64+
PEER_ESTABL_CONTACT_EVT = 0xfec,
65+
PEER_LOST_CONTACT_EVT = 0xf1c
66+
};
67+
5068
/* Flags used to take different actions according to flag type
5169
* TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
5270
* TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
@@ -56,8 +74,6 @@
5674
*/
5775
enum {
5876
TIPC_MSG_EVT = 1,
59-
TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
60-
TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2),
6177
TIPC_NOTIFY_NODE_DOWN = (1 << 3),
6278
TIPC_NOTIFY_NODE_UP = (1 << 4),
6379
TIPC_WAKEUP_BCAST_USERS = (1 << 5),
@@ -133,6 +149,7 @@ struct tipc_node {
133149
int action_flags;
134150
struct tipc_node_bclink bclink;
135151
struct list_head list;
152+
int state;
136153
int link_cnt;
137154
u16 working_links;
138155
u16 capabilities;
@@ -176,11 +193,8 @@ static inline void tipc_node_lock(struct tipc_node *node)
176193
spin_lock_bh(&node->lock);
177194
}
178195

179-
static inline bool tipc_node_blocked(struct tipc_node *node)
180-
{
181-
return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN |
182-
TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
183-
}
196+
void tipc_node_fsm_evt(struct tipc_node *n, int evt);
197+
bool tipc_node_filter_skb(struct tipc_node *n, struct tipc_msg *hdr);
184198

185199
static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel)
186200
{

0 commit comments

Comments
 (0)