Skip to content

Commit c4ba3ff

Browse files
committed
Fix long-standing race in net-tokio reading after a disconnect event
If rust-lightning tells us to disconnect a socket after we read some bytes from the socket, but before we actually give those bytes to rust-lightning, we may end up calling rust-lightning with a Descriptor that isn't registered anymore. Sadly, there really isn't a good way to solve this, and it should be a pretty quick event, so we just busy-wait.
1 parent 62e7b00 commit c4ba3ff

File tree

1 file changed

+35
-18
lines changed

1 file changed

+35
-18
lines changed

lightning-net-tokio/src/lib.rs

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ use lightning::ln::peer_handler;
7070
use lightning::ln::peer_handler::SocketDescriptor as LnSocketTrait;
7171
use lightning::ln::msgs::ChannelMessageHandler;
7272

73-
use std::task;
73+
use std::{task, thread};
7474
use std::net::SocketAddr;
7575
use std::sync::{Arc, Mutex};
7676
use std::sync::atomic::{AtomicU64, Ordering};
@@ -111,6 +111,11 @@ struct Connection {
111111
// socket. To wake it up (without otherwise changing its state, we can push a value into this
112112
// Sender.
113113
read_waker: mpsc::Sender<()>,
114+
// When we are told by rust-lightning to disconnect, we can't return to rust-lightning until we
115+
// are sure we won't call any more read/write PeerManager functions with the same connection.
116+
// This is set to true if we're in such a condition (with disconnect checked before with the
117+
// top-level mutex held) and false when we can return.
118+
block_disconnect_socket: bool,
114119
read_paused: bool,
115120
disconnect_state: DisconnectionState,
116121
id: u64,
@@ -128,17 +133,26 @@ impl Connection {
128133
} }
129134
}
130135

136+
macro_rules! prepare_read_write_call {
137+
() => { {
138+
let mut us_lock = us.lock().unwrap();
139+
if us_lock.disconnect_state == DisconnectionState::RLTriggeredDisconnect {
140+
shutdown_socket!("disconnect_socket() call from RL");
141+
}
142+
us_lock.block_disconnect_socket = true;
143+
} }
144+
}
145+
131146
let read_paused = us.lock().unwrap().read_paused;
132147
tokio::select! {
133148
v = write_avail_receiver.recv() => {
134149
assert!(v.is_some()); // We can't have dropped the sending end, its in the us Arc!
135-
if us.lock().unwrap().disconnect_state == DisconnectionState::RLTriggeredDisconnect {
136-
shutdown_socket!("disconnect_socket() call from RL");
137-
}
150+
prepare_read_write_call!();
138151
if let Err(e) = peer_manager.write_buffer_space_avail(&mut SocketDescriptor::new(us.clone())) {
139152
us.lock().unwrap().disconnect_state = DisconnectionState::RLTriggeredDisconnect;
140153
shutdown_socket!(e);
141154
}
155+
us.lock().unwrap().block_disconnect_socket = false;
142156
},
143157
_ = read_wake_receiver.recv() => {},
144158
read = reader.read(&mut buf), if !read_paused => match read {
@@ -147,9 +161,7 @@ impl Connection {
147161
break;
148162
},
149163
Ok(len) => {
150-
if us.lock().unwrap().disconnect_state == DisconnectionState::RLTriggeredDisconnect {
151-
shutdown_socket!("disconnect_socket() call from RL");
152-
}
164+
prepare_read_write_call!();
153165
match peer_manager.read_event(&mut SocketDescriptor::new(Arc::clone(&us)), &buf[0..len]) {
154166
Ok(pause_read) => {
155167
if pause_read {
@@ -171,6 +183,7 @@ impl Connection {
171183
shutdown_socket!(e)
172184
},
173185
}
186+
us.lock().unwrap().block_disconnect_socket = false;
174187
},
175188
Err(e) => {
176189
println!("Connection closed: {}", e);
@@ -179,6 +192,7 @@ impl Connection {
179192
},
180193
}
181194
}
195+
us.lock().unwrap().block_disconnect_socket = false;
182196
let writer_option = us.lock().unwrap().writer.take();
183197
if let Some(mut writer) = writer_option {
184198
// If the socket is already closed, shutdown() will fail, so just ignore it.
@@ -212,8 +226,8 @@ impl Connection {
212226

213227
(reader, write_receiver, read_receiver,
214228
Arc::new(Mutex::new(Self {
215-
writer: Some(writer), event_notify, write_avail, read_waker,
216-
read_paused: false, disconnect_state: DisconnectionState::NeedDisconnectEvent,
229+
writer: Some(writer), event_notify, write_avail, read_waker, read_paused: false,
230+
block_disconnect_socket: false, disconnect_state: DisconnectionState::NeedDisconnectEvent,
217231
id: ID_COUNTER.fetch_add(1, Ordering::AcqRel)
218232
})))
219233
}
@@ -400,15 +414,18 @@ impl peer_handler::SocketDescriptor for SocketDescriptor {
400414
}
401415

402416
fn disconnect_socket(&mut self) {
403-
let mut us = self.conn.lock().unwrap();
404-
us.disconnect_state = DisconnectionState::RLTriggeredDisconnect;
405-
us.read_paused = true;
406-
// Wake up the sending thread, assuming it is still alive
407-
let _ = us.write_avail.try_send(());
408-
// TODO: There's a race where we don't meet the requirements of disconnect_socket if the
409-
// read task is about to call a PeerManager function (eg read_event or write_event).
410-
// Ideally we need to release the us lock and block until we have confirmation from the
411-
// read task that it has broken out of its main loop.
417+
{
418+
let mut us = self.conn.lock().unwrap();
419+
us.disconnect_state = DisconnectionState::RLTriggeredDisconnect;
420+
us.read_paused = true;
421+
// Wake up the sending thread, assuming it is still alive
422+
let _ = us.write_avail.try_send(());
423+
// Happy-path return:
424+
if !us.block_disconnect_socket { return; }
425+
}
426+
while self.conn.lock().unwrap().block_disconnect_socket {
427+
thread::yield_now();
428+
}
412429
}
413430
}
414431
impl Clone for SocketDescriptor {

0 commit comments

Comments
 (0)