Skip to content

Commit 78b9f35

Browse files
committed
Fix long-standing race in net-tokio reading after a disconnect event
If rust-lightning tells us to disconnect a socket after we read some bytes from the socket, but before we actually give those bytes to rust-lightning, we may end up calling rust-lightning with a Descriptor that isn't registered anymore. Sadly, there really isn't a good way to solve this, and it should be a pretty quick event, so we just busy-wait.
1 parent bf177e1 commit 78b9f35

File tree

1 file changed

+35
-15
lines changed

1 file changed

+35
-15
lines changed

lightning-net-tokio/src/lib.rs

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ use lightning::ln::peer_handler;
5252
use lightning::ln::peer_handler::SocketDescriptor as LnSocketTrait;
5353
use lightning::ln::msgs::ChannelMessageHandler;
5454

55-
use std::task;
55+
use std::{task, thread};
5656
use std::net::SocketAddr;
5757
use std::sync::{Arc, Mutex};
5858
use std::sync::atomic::{AtomicU64, Ordering};
@@ -82,6 +82,11 @@ struct Connection {
8282
// so by setting read_paused. If the read thread thereafter reads some data, it will place a
8383
// Sender here and then block on it.
8484
read_blocker: Option<oneshot::Sender<()>>,
85+
// When we are told by rust-lightning to disconnect, we can't return to rust-lightning until we
86+
// are sure we won't call any more read/write PeerManager functions with the same connection.
87+
// This is set to true if we're in such a condition (with disconnect checked before with the
88+
// top-level mutex held) and false when we can return.
89+
disconnect_block: bool,
8590
read_paused: bool,
8691
// If we get disconnected via SocketDescriptor::disconnect_socket(), we don't call
8792
// disconnect_event(), but if we get an Err return value out of PeerManager, in general, we do.
@@ -102,19 +107,28 @@ impl Connection {
102107
} }
103108
}
104109

110+
macro_rules! prepare_read_write_call {
111+
() => { {
112+
let mut us_lock = us.lock().unwrap();
113+
if us_lock.disconnect {
114+
shutdown_socket!("disconnect_socket() call from RL");
115+
}
116+
us_lock.disconnect_block = true;
117+
} }
118+
}
119+
105120
// Whenever we want to block, we have to at least select with the write_event Receiver,
106121
// which is used by the SocketDescriptor to wake us up if we need to shut down the
107122
// socket or if we need to generate a write_event.
108123
macro_rules! select_write_ev {
109124
($v: expr) => { {
110125
assert!($v.is_some()); // We can't have dropped the sending end, its in the us Arc!
111-
if us.lock().unwrap().disconnect {
112-
shutdown_socket!("disconnect_socket() call from RL");
113-
}
126+
prepare_read_write_call!();
114127
if let Err(e) = peer_manager.write_buffer_space_avail(&mut SocketDescriptor::new(us.clone())) {
115128
us.lock().unwrap().need_disconnect_event = false;
116129
shutdown_socket!(e);
117130
}
131+
us.lock().unwrap().disconnect_block = false;
118132
} }
119133
}
120134

@@ -147,6 +161,7 @@ impl Connection {
147161
v = write_event.recv() => select_write_ev!(v),
148162
}
149163
}
164+
prepare_read_write_call!();
150165
match peer_manager.read_event(&mut SocketDescriptor::new(Arc::clone(&us)), &buf[0..len]) {
151166
Ok(pause_read) => {
152167
if pause_read {
@@ -168,6 +183,7 @@ impl Connection {
168183
shutdown_socket!(e)
169184
},
170185
}
186+
us.lock().unwrap().disconnect_block = false;
171187
},
172188
Err(e) => {
173189
println!("Connection closed: {}", e);
@@ -176,6 +192,7 @@ impl Connection {
176192
},
177193
}
178194
}
195+
us.lock().unwrap().disconnect_block = false;
179196
let writer_option = us.lock().unwrap().writer.take();
180197
if let Some(mut writer) = writer_option {
181198
// If the socket is already closed, shutdown() will fail, so just ignore it.
@@ -205,7 +222,7 @@ impl Connection {
205222

206223
(reader, receiver,
207224
Arc::new(Mutex::new(Self {
208-
writer: Some(writer), event_notify, write_avail,
225+
writer: Some(writer), event_notify, write_avail, disconnect_block: false,
209226
read_blocker: None, read_paused: false, need_disconnect_event: true, disconnect: false,
210227
id: ID_COUNTER.fetch_add(1, Ordering::AcqRel)
211228
})))
@@ -371,16 +388,19 @@ impl peer_handler::SocketDescriptor for SocketDescriptor {
371388
}
372389

373390
fn disconnect_socket(&mut self) {
374-
let mut us = self.conn.lock().unwrap();
375-
us.need_disconnect_event = false;
376-
us.disconnect = true;
377-
us.read_paused = true;
378-
// Wake up the sending thread, assuming its still alive
379-
let _ = us.write_avail.try_send(());
380-
// TODO: There's a race where we don't meet the requirements of disconnect_socket if the
381-
// read task is about to call a PeerManager function (eg read_event or write_event).
382-
// Ideally we need to release the us lock and block until we have confirmation from the
383-
// read task that it has broken out of its main loop.
391+
{
392+
let mut us = self.conn.lock().unwrap();
393+
us.need_disconnect_event = false;
394+
us.disconnect = true;
395+
us.read_paused = true;
396+
// Wake up the sending thread, assuming its still alive
397+
let _ = us.write_avail.try_send(());
398+
// Happy-path return:
399+
if !us.disconnect_block { return; }
400+
}
401+
while self.conn.lock().unwrap().disconnect_block {
402+
thread::yield_now();
403+
}
384404
}
385405
}
386406
impl Clone for SocketDescriptor {

0 commit comments

Comments
 (0)