@@ -75,6 +75,7 @@ struct sockaddr_pair {
75
75
* @conn_instance: TIPC instance used when connection was established
76
76
* @published: non-zero if port has one or more associated names
77
77
* @max_pkt: maximum packet size "hint" used when building messages sent by port
78
+ * @maxnagle: maximum size of msg which can be subject to nagle
78
79
* @portid: unique port identity in TIPC socket hash table
79
80
* @phdr: preformatted message header used when sending messages
80
81
* #cong_links: list of congested links
@@ -97,6 +98,7 @@ struct tipc_sock {
97
98
u32 conn_instance ;
98
99
int published ;
99
100
u32 max_pkt ;
101
+ u32 maxnagle ;
100
102
u32 portid ;
101
103
struct tipc_msg phdr ;
102
104
struct list_head cong_links ;
@@ -116,6 +118,10 @@ struct tipc_sock {
116
118
struct tipc_mc_method mc_method ;
117
119
struct rcu_head rcu ;
118
120
struct tipc_group * group ;
121
+ u32 oneway ;
122
+ u16 snd_backlog ;
123
+ bool expect_ack ;
124
+ bool nodelay ;
119
125
bool group_is_open ;
120
126
};
121
127
@@ -137,6 +143,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk);
137
143
static void tipc_sk_remove (struct tipc_sock * tsk );
138
144
static int __tipc_sendstream (struct socket * sock , struct msghdr * m , size_t dsz );
139
145
static int __tipc_sendmsg (struct socket * sock , struct msghdr * m , size_t dsz );
146
+ static void tipc_sk_push_backlog (struct tipc_sock * tsk );
140
147
141
148
static const struct proto_ops packet_ops ;
142
149
static const struct proto_ops stream_ops ;
@@ -227,6 +234,26 @@ static u16 tsk_inc(struct tipc_sock *tsk, int msglen)
227
234
return 1 ;
228
235
}
229
236
237
+ /* tsk_set_nagle - enable/disable nagle property by manipulating maxnagle
238
+ */
239
+ static void tsk_set_nagle (struct tipc_sock * tsk )
240
+ {
241
+ struct sock * sk = & tsk -> sk ;
242
+
243
+ tsk -> maxnagle = 0 ;
244
+ if (sk -> sk_type != SOCK_STREAM )
245
+ return ;
246
+ if (tsk -> nodelay )
247
+ return ;
248
+ if (!(tsk -> peer_caps & TIPC_NAGLE ))
249
+ return ;
250
+ /* Limit node local buffer size to avoid receive queue overflow */
251
+ if (tsk -> max_pkt == MAX_MSG_SIZE )
252
+ tsk -> maxnagle = 1500 ;
253
+ else
254
+ tsk -> maxnagle = tsk -> max_pkt ;
255
+ }
256
+
230
257
/**
231
258
* tsk_advance_rx_queue - discard first buffer in socket receive queue
232
259
*
@@ -446,6 +473,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
446
473
447
474
tsk = tipc_sk (sk );
448
475
tsk -> max_pkt = MAX_PKT_DEFAULT ;
476
+ tsk -> maxnagle = 0 ;
449
477
INIT_LIST_HEAD (& tsk -> publications );
450
478
INIT_LIST_HEAD (& tsk -> cong_links );
451
479
msg = & tsk -> phdr ;
@@ -512,8 +540,12 @@ static void __tipc_shutdown(struct socket *sock, int error)
512
540
tipc_wait_for_cond (sock , & timeout , (!tsk -> cong_link_cnt &&
513
541
!tsk_conn_cong (tsk )));
514
542
515
- /* Remove any pending SYN message */
516
- __skb_queue_purge (& sk -> sk_write_queue );
543
+ /* Push out unsent messages or remove if pending SYN */
544
+ skb = skb_peek (& sk -> sk_write_queue );
545
+ if (skb && !msg_is_syn (buf_msg (skb )))
546
+ tipc_sk_push_backlog (tsk );
547
+ else
548
+ __skb_queue_purge (& sk -> sk_write_queue );
517
549
518
550
/* Reject all unreceived messages, except on an active connection
519
551
* (which disconnects locally & sends a 'FIN+' to peer).
@@ -1208,6 +1240,27 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
1208
1240
tipc_sk_rcv (net , inputq );
1209
1241
}
1210
1242
1243
+ /* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
1244
+ * when socket is in Nagle mode
1245
+ */
1246
+ static void tipc_sk_push_backlog (struct tipc_sock * tsk )
1247
+ {
1248
+ struct sk_buff_head * txq = & tsk -> sk .sk_write_queue ;
1249
+ struct net * net = sock_net (& tsk -> sk );
1250
+ u32 dnode = tsk_peer_node (tsk );
1251
+ int rc ;
1252
+
1253
+ if (skb_queue_empty (txq ) || tsk -> cong_link_cnt )
1254
+ return ;
1255
+
1256
+ tsk -> snt_unacked += tsk -> snd_backlog ;
1257
+ tsk -> snd_backlog = 0 ;
1258
+ tsk -> expect_ack = true;
1259
+ rc = tipc_node_xmit (net , txq , dnode , tsk -> portid );
1260
+ if (rc == - ELINKCONG )
1261
+ tsk -> cong_link_cnt = 1 ;
1262
+ }
1263
+
1211
1264
/**
1212
1265
* tipc_sk_conn_proto_rcv - receive a connection mng protocol message
1213
1266
* @tsk: receiving socket
@@ -1221,7 +1274,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
1221
1274
u32 onode = tsk_own_node (tsk );
1222
1275
struct sock * sk = & tsk -> sk ;
1223
1276
int mtyp = msg_type (hdr );
1224
- bool conn_cong ;
1277
+ bool was_cong ;
1225
1278
1226
1279
/* Ignore if connection cannot be validated: */
1227
1280
if (!tsk_peer_msg (tsk , hdr )) {
@@ -1254,11 +1307,13 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
1254
1307
__skb_queue_tail (xmitq , skb );
1255
1308
return ;
1256
1309
} else if (mtyp == CONN_ACK ) {
1257
- conn_cong = tsk_conn_cong (tsk );
1310
+ was_cong = tsk_conn_cong (tsk );
1311
+ tsk -> expect_ack = false;
1312
+ tipc_sk_push_backlog (tsk );
1258
1313
tsk -> snt_unacked -= msg_conn_ack (hdr );
1259
1314
if (tsk -> peer_caps & TIPC_BLOCK_FLOWCTL )
1260
1315
tsk -> snd_win = msg_adv_win (hdr );
1261
- if (conn_cong )
1316
+ if (was_cong && ! tsk_conn_cong ( tsk ) )
1262
1317
sk -> sk_write_space (sk );
1263
1318
} else if (mtyp != CONN_PROBE_REPLY ) {
1264
1319
pr_warn ("Received unknown CONN_PROTO msg\n" );
@@ -1437,15 +1492,15 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
1437
1492
struct sock * sk = sock -> sk ;
1438
1493
DECLARE_SOCKADDR (struct sockaddr_tipc * , dest , m -> msg_name );
1439
1494
long timeout = sock_sndtimeo (sk , m -> msg_flags & MSG_DONTWAIT );
1495
+ struct sk_buff_head * txq = & sk -> sk_write_queue ;
1440
1496
struct tipc_sock * tsk = tipc_sk (sk );
1441
1497
struct tipc_msg * hdr = & tsk -> phdr ;
1442
1498
struct net * net = sock_net (sk );
1443
- struct sk_buff_head pkts ;
1444
1499
u32 dnode = tsk_peer_node (tsk );
1500
+ int maxnagle = tsk -> maxnagle ;
1501
+ int maxpkt = tsk -> max_pkt ;
1445
1502
int send , sent = 0 ;
1446
- int rc = 0 ;
1447
-
1448
- __skb_queue_head_init (& pkts );
1503
+ int blocks , rc = 0 ;
1449
1504
1450
1505
if (unlikely (dlen > INT_MAX ))
1451
1506
return - EMSGSIZE ;
@@ -1467,21 +1522,35 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
1467
1522
tipc_sk_connected (sk )));
1468
1523
if (unlikely (rc ))
1469
1524
break ;
1470
-
1471
1525
send = min_t (size_t , dlen - sent , TIPC_MAX_USER_MSG_SIZE );
1472
- rc = tipc_msg_build (hdr , m , sent , send , tsk -> max_pkt , & pkts );
1473
- if (unlikely (rc != send ))
1474
- break ;
1475
-
1476
- trace_tipc_sk_sendstream (sk , skb_peek (& pkts ),
1526
+ blocks = tsk -> snd_backlog ;
1527
+ if (tsk -> oneway ++ >= 4 && send <= maxnagle ) {
1528
+ rc = tipc_msg_append (hdr , m , send , maxnagle , txq );
1529
+ if (unlikely (rc < 0 ))
1530
+ break ;
1531
+ blocks += rc ;
1532
+ if (blocks <= 64 && tsk -> expect_ack ) {
1533
+ tsk -> snd_backlog = blocks ;
1534
+ sent += send ;
1535
+ break ;
1536
+ }
1537
+ tsk -> expect_ack = true;
1538
+ } else {
1539
+ rc = tipc_msg_build (hdr , m , sent , send , maxpkt , txq );
1540
+ if (unlikely (rc != send ))
1541
+ break ;
1542
+ blocks += tsk_inc (tsk , send + MIN_H_SIZE );
1543
+ }
1544
+ trace_tipc_sk_sendstream (sk , skb_peek (txq ),
1477
1545
TIPC_DUMP_SK_SNDQ , " " );
1478
- rc = tipc_node_xmit (net , & pkts , dnode , tsk -> portid );
1546
+ rc = tipc_node_xmit (net , txq , dnode , tsk -> portid );
1479
1547
if (unlikely (rc == - ELINKCONG )) {
1480
1548
tsk -> cong_link_cnt = 1 ;
1481
1549
rc = 0 ;
1482
1550
}
1483
1551
if (likely (!rc )) {
1484
- tsk -> snt_unacked += tsk_inc (tsk , send + MIN_H_SIZE );
1552
+ tsk -> snt_unacked += blocks ;
1553
+ tsk -> snd_backlog = 0 ;
1485
1554
sent += send ;
1486
1555
}
1487
1556
} while (sent < dlen && !rc );
@@ -1528,6 +1597,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
1528
1597
tipc_node_add_conn (net , peer_node , tsk -> portid , peer_port );
1529
1598
tsk -> max_pkt = tipc_node_get_mtu (net , peer_node , tsk -> portid , true);
1530
1599
tsk -> peer_caps = tipc_node_get_capabilities (net , peer_node );
1600
+ tsk_set_nagle (tsk );
1531
1601
__skb_queue_purge (& sk -> sk_write_queue );
1532
1602
if (tsk -> peer_caps & TIPC_BLOCK_FLOWCTL )
1533
1603
return ;
@@ -1848,6 +1918,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
1848
1918
bool peek = flags & MSG_PEEK ;
1849
1919
int offset , required , copy , copied = 0 ;
1850
1920
int hlen , dlen , err , rc ;
1921
+ bool ack = false;
1851
1922
long timeout ;
1852
1923
1853
1924
/* Catch invalid receive attempts */
@@ -1892,6 +1963,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
1892
1963
1893
1964
/* Copy data if msg ok, otherwise return error/partial data */
1894
1965
if (likely (!err )) {
1966
+ ack = msg_ack_required (hdr );
1895
1967
offset = skb_cb -> bytes_read ;
1896
1968
copy = min_t (int , dlen - offset , buflen - copied );
1897
1969
rc = skb_copy_datagram_msg (skb , hlen + offset , m , copy );
@@ -1919,7 +1991,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
1919
1991
1920
1992
/* Send connection flow control advertisement when applicable */
1921
1993
tsk -> rcv_unacked += tsk_inc (tsk , hlen + dlen );
1922
- if (unlikely ( tsk -> rcv_unacked >= tsk -> rcv_win / TIPC_ACK_RATE ) )
1994
+ if (ack || tsk -> rcv_unacked >= tsk -> rcv_win / TIPC_ACK_RATE )
1923
1995
tipc_sk_send_ack (tsk );
1924
1996
1925
1997
/* Exit if all requested data or FIN/error received */
@@ -1990,6 +2062,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
1990
2062
smp_wmb ();
1991
2063
tsk -> cong_link_cnt -- ;
1992
2064
wakeup = true;
2065
+ tipc_sk_push_backlog (tsk );
1993
2066
break ;
1994
2067
case GROUP_PROTOCOL :
1995
2068
tipc_group_proto_rcv (grp , & wakeup , hdr , inputq , xmitq );
@@ -2029,6 +2102,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
2029
2102
2030
2103
if (unlikely (msg_mcast (hdr )))
2031
2104
return false;
2105
+ tsk -> oneway = 0 ;
2032
2106
2033
2107
switch (sk -> sk_state ) {
2034
2108
case TIPC_CONNECTING :
@@ -2074,6 +2148,8 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
2074
2148
return true;
2075
2149
return false;
2076
2150
case TIPC_ESTABLISHED :
2151
+ if (!skb_queue_empty (& sk -> sk_write_queue ))
2152
+ tipc_sk_push_backlog (tsk );
2077
2153
/* Accept only connection-based messages sent by peer */
2078
2154
if (likely (con_msg && !err && pport == oport && pnode == onode ))
2079
2155
return true;
@@ -2959,6 +3035,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
2959
3035
case TIPC_SRC_DROPPABLE :
2960
3036
case TIPC_DEST_DROPPABLE :
2961
3037
case TIPC_CONN_TIMEOUT :
3038
+ case TIPC_NODELAY :
2962
3039
if (ol < sizeof (value ))
2963
3040
return - EINVAL ;
2964
3041
if (get_user (value , (u32 __user * )ov ))
@@ -3007,6 +3084,10 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
3007
3084
case TIPC_GROUP_LEAVE :
3008
3085
res = tipc_sk_leave (tsk );
3009
3086
break ;
3087
+ case TIPC_NODELAY :
3088
+ tsk -> nodelay = !!value ;
3089
+ tsk_set_nagle (tsk );
3090
+ break ;
3010
3091
default :
3011
3092
res = - EINVAL ;
3012
3093
}
0 commit comments