Skip to content

Commit 40501f9

Browse files
Jon Paul Maloydavem330
authored andcommitted
tipc: don't reset stale broadcast send link
When the broadcast send link after 100 attempts has failed to transfer a packet to all peers, we consider it stale, and reset it. Thereafter it needs to re-synchronize with the peers, something currently done by just resetting and re-establishing all links to all peers. This has turned out to be overkill, with potentially unwanted consequences for the remaining cluster. A closer analysis reveals that this can be done much simpler. When this kind of failure happens, for reasons that may lie outside the TIPC protocol, it is typically only one peer which is failing to receive and acknowledge packets. It is hence sufficient to identify and reset the links only to that peer to resolve the situation, without having to reset the broadcast link at all. This solution entails a much lower risk of negative consequences for the own node as well as for the overall cluster. We implement this change in this commit. Reviewed-by: Parthasarathy Bhuvaragan <[email protected]> Acked-by: Ying Xue <[email protected]> Signed-off-by: Jon Maloy <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent e65a495 commit 40501f9

File tree

4 files changed

+17
-45
lines changed

4 files changed

+17
-45
lines changed

net/tipc/bearer.c

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -365,30 +365,6 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
365365
return 0;
366366
}
367367

368-
/* tipc_bearer_reset_all - reset all links on all bearers
369-
*/
370-
void tipc_bearer_reset_all(struct net *net)
371-
{
372-
struct tipc_bearer *b;
373-
int i;
374-
375-
for (i = 0; i < MAX_BEARERS; i++) {
376-
b = bearer_get(net, i);
377-
if (b)
378-
clear_bit_unlock(0, &b->up);
379-
}
380-
for (i = 0; i < MAX_BEARERS; i++) {
381-
b = bearer_get(net, i);
382-
if (b)
383-
tipc_reset_bearer(net, b);
384-
}
385-
for (i = 0; i < MAX_BEARERS; i++) {
386-
b = bearer_get(net, i);
387-
if (b)
388-
test_and_set_bit_lock(0, &b->up);
389-
}
390-
}
391-
392368
/**
393369
* bearer_disable
394370
*

net/tipc/bearer.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,6 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
210210
struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
211211
int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id);
212212
struct tipc_media *tipc_media_find(const char *name);
213-
void tipc_bearer_reset_all(struct net *net);
214213
int tipc_bearer_setup(void);
215214
void tipc_bearer_cleanup(void);
216215
void tipc_bearer_stop(struct net *net);

net/tipc/link.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -978,15 +978,15 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
978978
struct tipc_msg *hdr = buf_msg(skb);
979979

980980
pr_warn("Retransmission failure on link <%s>\n", l->name);
981-
link_print(l, "Resetting link ");
981+
link_print(l, "State of link ");
982982
pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
983983
msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
984984
pr_info("sqno %u, prev: %x, src: %x\n",
985985
msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
986986
}
987987

988-
int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
989-
struct sk_buff_head *xmitq)
988+
int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
989+
u16 from, u16 to, struct sk_buff_head *xmitq)
990990
{
991991
struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
992992
struct tipc_msg *hdr;
@@ -997,11 +997,14 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
997997
return 0;
998998

999999
/* Detect repeated retransmit failures on same packet */
1000-
if (likely(l->last_retransm != buf_seqno(skb))) {
1001-
l->last_retransm = buf_seqno(skb);
1002-
l->stale_count = 1;
1003-
} else if (++l->stale_count > 100) {
1000+
if (nacker->last_retransm != buf_seqno(skb)) {
1001+
nacker->last_retransm = buf_seqno(skb);
1002+
nacker->stale_count = 1;
1003+
} else if (++nacker->stale_count > 100) {
10041004
link_retransmit_failure(l, skb);
1005+
nacker->stale_count = 0;
1006+
if (link_is_bc_sndlink(l))
1007+
return TIPC_LINK_DOWN_EVT;
10051008
return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
10061009
}
10071010

@@ -1528,7 +1531,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
15281531

15291532
/* If NACK, retransmit will now start at right position */
15301533
if (gap) {
1531-
rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq);
1534+
rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq);
15321535
l->stats.recv_nacks++;
15331536
}
15341537

@@ -1680,7 +1683,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
16801683
return rc;
16811684

16821685
if (link_bc_retr_eval(snd_l, &from, &to))
1683-
rc = tipc_link_retrans(snd_l, from, to, xmitq);
1686+
rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
16841687

16851688
l->snd_nxt = peers_snd_nxt;
16861689
if (link_bc_rcv_gap(l))
@@ -1775,7 +1778,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
17751778

17761779
if (dnode == tipc_own_addr(l->net)) {
17771780
tipc_link_bc_ack_rcv(l, acked, xmitq);
1778-
rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
1781+
rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq);
17791782
l->stats.recv_nacks++;
17801783
return rc;
17811784
}

net/tipc/node.c

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,7 +1284,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
12841284
rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
12851285

12861286
if (rc & TIPC_LINK_DOWN_EVT) {
1287-
tipc_bearer_reset_all(n->net);
1287+
tipc_node_reset_links(n);
12881288
return;
12891289
}
12901290

@@ -1351,15 +1351,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
13511351
if (!skb_queue_empty(&be->inputq1))
13521352
tipc_node_mcast_rcv(n);
13531353

1354-
if (rc & TIPC_LINK_DOWN_EVT) {
1355-
/* Reception reassembly failure => reset all links to peer */
1356-
if (!tipc_link_is_up(be->link))
1357-
tipc_node_reset_links(n);
1358-
1359-
/* Retransmission failure => reset all links to all peers */
1360-
if (!tipc_link_is_up(tipc_bc_sndlink(net)))
1361-
tipc_bearer_reset_all(net);
1362-
}
1354+
/* If reassembly or retransmission failure => reset all links to peer */
1355+
if (rc & TIPC_LINK_DOWN_EVT)
1356+
tipc_node_reset_links(n);
13631357

13641358
tipc_node_put(n);
13651359
}

0 commit comments

Comments
 (0)