[Concurrency runtime] Don't read from the actor after transitioning state

DougGregor · DougGregor · commit 3f0d61cc5d97 · 2023-05-18T15:57:27.000-07:00
Once we have transitioned the actor into a new state, we report the
state change as a trace event so it can be noted by tools (e.g.,
Instruments). However, the act of transitioning to a new state can mean
that there is an opportunity for another thread to deallocate the
actor. This means that the tracing call cannot depend on dereferencing
the actor pointer.

A refactoring a few months ago to move the bit that indicates when a
distributed actor is remote from inside the atomic actor state out to a
separate field (because it's constant for a given actor instance),
which introduced a dereference of the actor instance in forming the
tracing call. This introduced a narrow window in which a race
condition could occur: the actor transitions to an idle state, and is
then deallocate before the trace event for the actor transition occurs,
leading to a use-after-free.

Fetch this bit of information earlier in the process, before any state
changes and when we know the actor is still allocated, and pass it
through to the tracing code.

Fixes rdar://108497870.
diff --git a/stdlib/public/Concurrency/Actor.cpp b/stdlib/public/Concurrency/Actor.cpp
@@ -794,7 +794,7 @@ class alignas(sizeof(void *) * 2) ActiveActorStatus {
   }
 #endif
 
-  void traceStateChanged(HeapObject *actor) {
+  void traceStateChanged(HeapObject *actor, bool distributedActorIsRemote) {
     // Convert our state to a consistent raw value. These values currently match
     // the enum values, but this explicit conversion provides room for change.
     uint8_t traceState = 255;
@@ -814,7 +814,7 @@ class alignas(sizeof(void *) * 2) ActiveActorStatus {
     }
     concurrency::trace::actor_state_changed(
         actor, getFirstJob().getRawJob(), getFirstJob().needsPreprocessing(),
-        traceState, swift_distributed_actor_is_remote((HeapObject *) actor),
+        traceState, distributedActorIsRemote,
         isMaxPriorityEscalated(), static_cast<uint8_t>(getMaxPriority()));
   }
 };
@@ -1176,12 +1176,12 @@ static void traceJobQueue(DefaultActorImpl *actor, Job *first) {
 }
 
 static SWIFT_ATTRIBUTE_ALWAYS_INLINE void traceActorStateTransition(DefaultActorImpl *actor,
-    ActiveActorStatus oldState, ActiveActorStatus newState) {
+    ActiveActorStatus oldState, ActiveActorStatus newState, bool distributedActorIsRemote) {
 
   SWIFT_TASK_DEBUG_LOG("Actor %p transitioned from %#x to %#x (%s)", actor,
                        oldState.getOpaqueFlags(), newState.getOpaqueFlags(),
                        __FUNCTION__);
-  newState.traceStateChanged(actor);
+  newState.traceStateChanged(actor, distributedActorIsRemote);
 }
 
 #if SWIFT_CONCURRENCY_ENABLE_PRIORITY_ESCALATION
@@ -1206,6 +1206,7 @@ void DefaultActorImpl::enqueue(Job *job, JobPriority priority) {
   SWIFT_TASK_DEBUG_LOG("Enqueueing job %p onto actor %p at priority %#zx", job,
                        this, priority);
   concurrency::trace::actor_enqueue(this, job);
+  bool distributedActorIsRemote = swift_distributed_actor_is_remote(this);
   auto oldState = _status().load(std::memory_order_relaxed);
   while (true) {
     auto newState = oldState;
@@ -1233,7 +1234,7 @@ void DefaultActorImpl::enqueue(Job *job, JobPriority priority) {
     if (_status().compare_exchange_weak(oldState, newState,
                    /* success */ std::memory_order_release,
                    /* failure */ std::memory_order_relaxed)) {
-      traceActorStateTransition(this, oldState, newState);
+      traceActorStateTransition(this, oldState, newState, distributedActorIsRemote);
 
       if (!oldState.isScheduled() && newState.isScheduled()) {
         // We took responsibility to schedule the actor for the first time. See
@@ -1276,6 +1277,7 @@ void DefaultActorImpl::enqueueStealer(Job *job, JobPriority priority) {
 
   SWIFT_TASK_DEBUG_LOG("[Override] Escalating an actor %p due to job that is enqueued being escalated", this);
 
+  bool distributedActorIsRemote = swift_distributed_actor_is_remote(this);
   auto oldState = _status().load(std::memory_order_relaxed);
   while (true) {
     // Until we figure out how to safely enqueue a stealer and rendevouz with
@@ -1314,7 +1316,7 @@ void DefaultActorImpl::enqueueStealer(Job *job, JobPriority priority) {
     if (_status().compare_exchange_weak(oldState, newState,
                    /* success */ std::memory_order_relaxed,
                    /* failure */ std::memory_order_relaxed)) {
-      traceActorStateTransition(this, oldState, newState);
+      traceActorStateTransition(this, oldState, newState, distributedActorIsRemote);
 #if SWIFT_CONCURRENCY_ENABLE_PRIORITY_ESCALATION
       if (newState.isRunning()) {
         // Actor is running on a thread, escalate the thread running it
@@ -1344,6 +1346,7 @@ Job * DefaultActorImpl::drainOne() {
   SWIFT_TASK_DEBUG_LOG("Draining one job from default actor %p", this);
 
   // Pairs with the store release in DefaultActorImpl::enqueue
+  bool distributedActorIsRemote = swift_distributed_actor_is_remote(this);
   auto oldState = _status().load(SWIFT_MEMORY_ORDER_CONSUME);
   _swift_tsan_consume(this);
 
@@ -1367,7 +1370,7 @@ Job * DefaultActorImpl::drainOne() {
                             /* success */ std::memory_order_relaxed,
                             /* failure */ std::memory_order_relaxed)) {
       SWIFT_TASK_DEBUG_LOG("Drained first job %p from actor %p", firstJob, this);
-      traceActorStateTransition(this, oldState, newState);
+      traceActorStateTransition(this, oldState, newState, distributedActorIsRemote);
       concurrency::trace::actor_dequeue(this, firstJob);
       return firstJob;
     }
@@ -1556,6 +1559,7 @@ retry:;
   SWIFT_TASK_DEBUG_LOG("Thread attempting to jump onto %p, as drainer = %d", this, asDrainer);
 #endif
 
+  bool distributedActorIsRemote = swift_distributed_actor_is_remote(this);
   auto oldState = _status().load(std::memory_order_relaxed);
   while (true) {
 
@@ -1612,7 +1616,7 @@ retry:;
                                  std::memory_order_acquire,
                                  std::memory_order_relaxed)) {
       _swift_tsan_acquire(this);
-      traceActorStateTransition(this, oldState, newState);
+      traceActorStateTransition(this, oldState, newState, distributedActorIsRemote);
       return true;
     }
   }
@@ -1635,6 +1639,7 @@ bool DefaultActorImpl::unlock(bool forceUnlock)
   this->drainLock.unlock();
   return true;
 #else
+  bool distributedActorIsRemote = swift_distributed_actor_is_remote(this);
   auto oldState = _status().load(std::memory_order_relaxed);
   SWIFT_TASK_DEBUG_LOG("Try unlock-ing actor %p with forceUnlock = %d", this, forceUnlock);
 
@@ -1693,7 +1698,7 @@ bool DefaultActorImpl::unlock(bool forceUnlock)
                       /* success */ std::memory_order_release,
                       /* failure */ std::memory_order_relaxed)) {
       _swift_tsan_release(this);
-      traceActorStateTransition(this, oldState, newState);
+      traceActorStateTransition(this, oldState, newState, distributedActorIsRemote);
 
       if (newState.isScheduled()) {
         // See ownership rule (6) in DefaultActorImpl