Skip to content

Commit 9a94afc

Browse files
committed
[Runtime] Eliminate stack frames in swift_retain and swift_bridgeObjectRetain on ARM64.
Rearrange the slow paths a bit to make them tail calls, which allows the compiler to emit these functions without frames. Clang is happy to emit frameless functions on ARM64 if no stack space is needed on all execution paths. However, when there's a fast path which doesn't need stack space, and a slow path which does, clang emits code that pushes a stack frame and then decides which path to take. This is fine, but it means we're paying more than we'd like to on the fast path. We can work around that by manually outlining the slow path, and ensuring that it's invoked with a tail call. Then the original function doesn't need a stack frame on any path and clang omits the stack frame. We tweak RefCounts::increment to return the object it's being called on, which allows `swift_retain` to tail-call it. We manually outline the objc_retain call in swift_bridgeObjectRetain, which allows the swift_retain path to be frameless. rdar://101764509
1 parent 8a95701 commit 9a94afc

File tree

5 files changed

+44
-14
lines changed

5 files changed

+44
-14
lines changed

stdlib/public/SwiftShims/swift/shims/RefCount.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ class RefCounts {
703703
// Out-of-line slow paths.
704704

705705
SWIFT_NOINLINE
706-
void incrementSlow(RefCountBits oldbits, uint32_t inc) SWIFT_CC(PreserveMost);
706+
HeapObject *incrementSlow(RefCountBits oldbits, uint32_t inc) SWIFT_CC(PreserveMost);
707707

708708
SWIFT_NOINLINE
709709
void incrementNonAtomicSlow(RefCountBits oldbits, uint32_t inc);
@@ -799,14 +799,18 @@ class RefCounts {
799799
}
800800

801801
// Increment the reference count.
802+
//
803+
// This returns the enclosing HeapObject so that it the result of this call
804+
// can be directly returned from swift_retain. This makes the call to
805+
// incrementSlow() a tail call.
802806
SWIFT_ALWAYS_INLINE
803-
void increment(uint32_t inc = 1) {
807+
HeapObject *increment(uint32_t inc = 1) {
804808
auto oldbits = refCounts.load(SWIFT_MEMORY_ORDER_CONSUME);
805809

806810
// constant propagation will remove this in swift_retain, it should only
807811
// be present in swift_retain_n
808812
if (inc != 1 && oldbits.isImmortal(true)) {
809-
return;
813+
return getHeapObject();
810814
}
811815

812816
RefCountBits newbits;
@@ -815,11 +819,12 @@ class RefCounts {
815819
bool fast = newbits.incrementStrongExtraRefCount(inc);
816820
if (SWIFT_UNLIKELY(!fast)) {
817821
if (oldbits.isImmortal(false))
818-
return;
822+
return getHeapObject();
819823
return incrementSlow(oldbits, inc);
820824
}
821825
} while (!refCounts.compare_exchange_weak(oldbits, newbits,
822826
std::memory_order_relaxed));
827+
return getHeapObject();
823828
}
824829

825830
SWIFT_ALWAYS_INLINE

stdlib/public/SwiftShims/swift/shims/Visibility.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@
109109
#define SWIFT_WEAK_IMPORT
110110
#endif
111111

112+
#if __has_attribute(musttail)
113+
#define SWIFT_MUSTTAIL __attribute__((musttail))
114+
#else
115+
#define SWIFT_MUSTTAIL
116+
#endif
117+
112118
// Define the appropriate attributes for sharing symbols across
113119
// image (executable / shared-library) boundaries.
114120
//

stdlib/public/runtime/HeapObject.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,12 @@ _swift_release_dealloc(HeapObject *object);
342342
SWIFT_ALWAYS_INLINE
343343
static HeapObject *_swift_retain_(HeapObject *object) {
344344
SWIFT_RT_TRACK_INVOCATION(object, swift_retain);
345-
if (isValidPointerForNativeRetain(object))
346-
object->refCounts.increment(1);
345+
if (isValidPointerForNativeRetain(object)) {
346+
// Return the result of increment() to make the eventual call to
347+
// incrementSlow a tail call, which avoids pushing a stack frame on the fast
348+
// path on ARM64.
349+
return object->refCounts.increment(1);
350+
}
347351
return object;
348352
}
349353

stdlib/public/runtime/RefCount.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
namespace swift {
1616

1717
template <typename RefCountBits>
18-
void RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
18+
HeapObject *RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
1919
uint32_t n) {
2020
if (oldbits.isImmortal(false)) {
21-
return;
21+
return getHeapObject();
2222
}
2323
else if (oldbits.hasSideTable()) {
2424
// Out-of-line slow path.
@@ -29,9 +29,10 @@ void RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
2929
// Retain count overflow.
3030
swift::swift_abortRetainOverflow();
3131
}
32+
return getHeapObject();
3233
}
33-
template void RefCounts<InlineRefCountBits>::incrementSlow(InlineRefCountBits oldbits, uint32_t n);
34-
template void RefCounts<SideTableRefCountBits>::incrementSlow(SideTableRefCountBits oldbits, uint32_t n);
34+
template HeapObject *RefCounts<InlineRefCountBits>::incrementSlow(InlineRefCountBits oldbits, uint32_t n);
35+
template HeapObject *RefCounts<SideTableRefCountBits>::incrementSlow(SideTableRefCountBits oldbits, uint32_t n);
3536

3637
template <typename RefCountBits>
3738
void RefCounts<RefCountBits>::incrementNonAtomicSlow(RefCountBits oldbits,

stdlib/public/runtime/SwiftObject.mm

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,18 @@ static bool isBridgeObjectTaggedPointer(void *object) {
574574
return (void*)(uintptr_t(object) & ~unTaggedNonNativeBridgeObjectBits);
575575
}
576576

577+
#if __arm64__
578+
// Marking this as noinline allows swift_bridgeObjectRetain to avoid emitting
579+
// a stack frame for the swift_retain path on ARM64. It makes for worse codegen
580+
// on x86-64, though, so limit it to ARM64.
581+
SWIFT_NOINLINE
582+
#endif
583+
static void *objcRetainAndReturn(void *object) {
584+
auto const objectRef = toPlainObject_unTagged_bridgeObject(object);
585+
objc_retain(static_cast<id>(objectRef));
586+
return object;
587+
}
588+
577589
void *swift::swift_bridgeObjectRetain(void *object) {
578590
#if SWIFT_OBJC_INTEROP
579591
if (isObjCTaggedPointer(object) || isBridgeObjectTaggedPointer(object))
@@ -584,11 +596,13 @@ static bool isBridgeObjectTaggedPointer(void *object) {
584596

585597
#if SWIFT_OBJC_INTEROP
586598
if (!isNonNative_unTagged_bridgeObject(object)) {
587-
swift_retain(static_cast<HeapObject *>(objectRef));
588-
return object;
599+
return swift_retain(static_cast<HeapObject *>(objectRef));
589600
}
590-
objc_retain(static_cast<id>(objectRef));
591-
return object;
601+
602+
// Put the call to objc_retain in a separate function, tail-called here. This
603+
// allows the fast path of swift_bridgeObjectRetain to avoid creating a stack
604+
// frame on ARM64.
605+
SWIFT_MUSTTAIL return objcRetainAndReturn(object);
592606
#else
593607
swift_retain(static_cast<HeapObject *>(objectRef));
594608
return object;

0 commit comments

Comments
 (0)