Skip to content

Commit 724a9a7

Browse files
committed
[Runtime] Eliminate stack frames in swift_retain and swift_bridgeObjectRetain on ARM64.
Rearrange the slow paths a bit to make them tail calls, which allows the compiler to emit these functions without frames. Clang is happy to emit frameless functions on ARM64 if no stack space is needed on all execution paths. However, when there's a fast path which doesn't need stack space, and a slow path which does, clang emits code that pushes a stack frame and then decides which path to take. This is fine, but it means we're paying more than we'd like to on the fast path. We can work around that by manually outlining the slow path, and ensuring that it's invoked with a tail call. Then the original function doesn't need a stack frame on any path and clang omits the stack frame. We tweak RefCounts::increment to return the object it's being called on, which allows `swift_retain` to tail-call it. We manually outline the objc_retain call in swift_bridgeObjectRetain, which allows the swift_retain path to be frameless. rdar://101764509
1 parent 8a95701 commit 724a9a7

File tree

5 files changed

+55
-17
lines changed

5 files changed

+55
-17
lines changed

stdlib/public/SwiftShims/swift/shims/RefCount.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ class RefCounts {
703703
// Out-of-line slow paths.
704704

705705
SWIFT_NOINLINE
706-
void incrementSlow(RefCountBits oldbits, uint32_t inc) SWIFT_CC(PreserveMost);
706+
HeapObject *incrementSlow(RefCountBits oldbits, uint32_t inc);
707707

708708
SWIFT_NOINLINE
709709
void incrementNonAtomicSlow(RefCountBits oldbits, uint32_t inc);
@@ -799,14 +799,18 @@ class RefCounts {
799799
}
800800

801801
// Increment the reference count.
802+
//
803+
// This returns the enclosing HeapObject so that it the result of this call
804+
// can be directly returned from swift_retain. This makes the call to
805+
// incrementSlow() a tail call.
802806
SWIFT_ALWAYS_INLINE
803-
void increment(uint32_t inc = 1) {
807+
HeapObject *increment(uint32_t inc = 1) {
804808
auto oldbits = refCounts.load(SWIFT_MEMORY_ORDER_CONSUME);
805809

806810
// constant propagation will remove this in swift_retain, it should only
807811
// be present in swift_retain_n
808812
if (inc != 1 && oldbits.isImmortal(true)) {
809-
return;
813+
return getHeapObject();
810814
}
811815

812816
RefCountBits newbits;
@@ -815,11 +819,12 @@ class RefCounts {
815819
bool fast = newbits.incrementStrongExtraRefCount(inc);
816820
if (SWIFT_UNLIKELY(!fast)) {
817821
if (oldbits.isImmortal(false))
818-
return;
822+
return getHeapObject();
819823
return incrementSlow(oldbits, inc);
820824
}
821825
} while (!refCounts.compare_exchange_weak(oldbits, newbits,
822826
std::memory_order_relaxed));
827+
return getHeapObject();
823828
}
824829

825830
SWIFT_ALWAYS_INLINE

stdlib/public/SwiftShims/swift/shims/Visibility.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@
109109
#define SWIFT_WEAK_IMPORT
110110
#endif
111111

112+
#if __has_attribute(musttail)
113+
#define SWIFT_MUSTTAIL [[clang::musttail]]
114+
#else
115+
#define SWIFT_MUSTTAIL
116+
#endif
117+
112118
// Define the appropriate attributes for sharing symbols across
113119
// image (executable / shared-library) boundaries.
114120
//

stdlib/public/runtime/HeapObject.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,12 @@ _swift_release_dealloc(HeapObject *object);
342342
SWIFT_ALWAYS_INLINE
343343
static HeapObject *_swift_retain_(HeapObject *object) {
344344
SWIFT_RT_TRACK_INVOCATION(object, swift_retain);
345-
if (isValidPointerForNativeRetain(object))
346-
object->refCounts.increment(1);
345+
if (isValidPointerForNativeRetain(object)) {
346+
// Return the result of increment() to make the eventual call to
347+
// incrementSlow a tail call, which avoids pushing a stack frame on the fast
348+
// path on ARM64.
349+
return object->refCounts.increment(1);
350+
}
347351
return object;
348352
}
349353

stdlib/public/runtime/RefCount.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
namespace swift {
1616

1717
template <typename RefCountBits>
18-
void RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
19-
uint32_t n) {
18+
HeapObject *RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
19+
uint32_t n) {
2020
if (oldbits.isImmortal(false)) {
21-
return;
21+
return getHeapObject();
2222
}
2323
else if (oldbits.hasSideTable()) {
2424
// Out-of-line slow path.
@@ -29,9 +29,14 @@ void RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
2929
// Retain count overflow.
3030
swift::swift_abortRetainOverflow();
3131
}
32+
return getHeapObject();
3233
}
33-
template void RefCounts<InlineRefCountBits>::incrementSlow(InlineRefCountBits oldbits, uint32_t n);
34-
template void RefCounts<SideTableRefCountBits>::incrementSlow(SideTableRefCountBits oldbits, uint32_t n);
34+
template HeapObject *
35+
RefCounts<InlineRefCountBits>::incrementSlow(InlineRefCountBits oldbits,
36+
uint32_t n);
37+
template HeapObject *
38+
RefCounts<SideTableRefCountBits>::incrementSlow(SideTableRefCountBits oldbits,
39+
uint32_t n);
3540

3641
template <typename RefCountBits>
3742
void RefCounts<RefCountBits>::incrementNonAtomicSlow(RefCountBits oldbits,

stdlib/public/runtime/SwiftObject.mm

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,20 @@ static bool isBridgeObjectTaggedPointer(void *object) {
574574
return (void*)(uintptr_t(object) & ~unTaggedNonNativeBridgeObjectBits);
575575
}
576576

577+
#if SWIFT_OBJC_INTEROP
578+
#if __arm64__
579+
// Marking this as noinline allows swift_bridgeObjectRetain to avoid emitting
580+
// a stack frame for the swift_retain path on ARM64. It makes for worse codegen
581+
// on x86-64, though, so limit it to ARM64.
582+
SWIFT_NOINLINE
583+
#endif
584+
static void *objcRetainAndReturn(void *object) {
585+
auto const objectRef = toPlainObject_unTagged_bridgeObject(object);
586+
objc_retain(static_cast<id>(objectRef));
587+
return object;
588+
}
589+
#endif
590+
577591
void *swift::swift_bridgeObjectRetain(void *object) {
578592
#if SWIFT_OBJC_INTEROP
579593
if (isObjCTaggedPointer(object) || isBridgeObjectTaggedPointer(object))
@@ -584,14 +598,18 @@ static bool isBridgeObjectTaggedPointer(void *object) {
584598

585599
#if SWIFT_OBJC_INTEROP
586600
if (!isNonNative_unTagged_bridgeObject(object)) {
587-
swift_retain(static_cast<HeapObject *>(objectRef));
588-
return object;
601+
return swift_retain(static_cast<HeapObject *>(objectRef));
589602
}
590-
objc_retain(static_cast<id>(objectRef));
591-
return object;
603+
604+
// Put the call to objc_retain in a separate function, tail-called here. This
605+
// allows the fast path of swift_bridgeObjectRetain to avoid creating a stack
606+
// frame on ARM64. We can't directly tail-call objc_retain, because
607+
// swift_bridgeObjectRetain returns the pointer with objectPointerIsObjCBit
608+
// set, so we have to make a non-tail call and then return the value with the
609+
// bit set.
610+
SWIFT_MUSTTAIL return objcRetainAndReturn(object);
592611
#else
593-
swift_retain(static_cast<HeapObject *>(objectRef));
594-
return object;
612+
return swift_retain(static_cast<HeapObject *>(objectRef));
595613
#endif
596614
}
597615

0 commit comments

Comments
 (0)