Skip to content

Commit b9391c0

Browse files
authored
Merge pull request #61794 from mikeash/retain-stack-frames
[Runtime] Eliminate stack frames in swift_retain and swift_bridgeObjectRetain on ARM64.
2 parents 819809b + 724a9a7 commit b9391c0

File tree

5 files changed

+55
-17
lines changed

5 files changed

+55
-17
lines changed

stdlib/public/SwiftShims/swift/shims/RefCount.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ class RefCounts {
703703
// Out-of-line slow paths.
704704

705705
SWIFT_NOINLINE
706-
void incrementSlow(RefCountBits oldbits, uint32_t inc) SWIFT_CC(PreserveMost);
706+
HeapObject *incrementSlow(RefCountBits oldbits, uint32_t inc);
707707

708708
SWIFT_NOINLINE
709709
void incrementNonAtomicSlow(RefCountBits oldbits, uint32_t inc);
@@ -799,14 +799,18 @@ class RefCounts {
799799
}
800800

801801
// Increment the reference count.
802+
//
803+
// This returns the enclosing HeapObject so that it the result of this call
804+
// can be directly returned from swift_retain. This makes the call to
805+
// incrementSlow() a tail call.
802806
SWIFT_ALWAYS_INLINE
803-
void increment(uint32_t inc = 1) {
807+
HeapObject *increment(uint32_t inc = 1) {
804808
auto oldbits = refCounts.load(SWIFT_MEMORY_ORDER_CONSUME);
805809

806810
// constant propagation will remove this in swift_retain, it should only
807811
// be present in swift_retain_n
808812
if (inc != 1 && oldbits.isImmortal(true)) {
809-
return;
813+
return getHeapObject();
810814
}
811815

812816
RefCountBits newbits;
@@ -815,11 +819,12 @@ class RefCounts {
815819
bool fast = newbits.incrementStrongExtraRefCount(inc);
816820
if (SWIFT_UNLIKELY(!fast)) {
817821
if (oldbits.isImmortal(false))
818-
return;
822+
return getHeapObject();
819823
return incrementSlow(oldbits, inc);
820824
}
821825
} while (!refCounts.compare_exchange_weak(oldbits, newbits,
822826
std::memory_order_relaxed));
827+
return getHeapObject();
823828
}
824829

825830
SWIFT_ALWAYS_INLINE

stdlib/public/SwiftShims/swift/shims/Visibility.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@
109109
#define SWIFT_WEAK_IMPORT
110110
#endif
111111

112+
#if __has_attribute(musttail)
113+
#define SWIFT_MUSTTAIL [[clang::musttail]]
114+
#else
115+
#define SWIFT_MUSTTAIL
116+
#endif
117+
112118
// Define the appropriate attributes for sharing symbols across
113119
// image (executable / shared-library) boundaries.
114120
//

stdlib/public/runtime/HeapObject.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,12 @@ _swift_release_dealloc(HeapObject *object);
342342
SWIFT_ALWAYS_INLINE
343343
static HeapObject *_swift_retain_(HeapObject *object) {
344344
SWIFT_RT_TRACK_INVOCATION(object, swift_retain);
345-
if (isValidPointerForNativeRetain(object))
346-
object->refCounts.increment(1);
345+
if (isValidPointerForNativeRetain(object)) {
346+
// Return the result of increment() to make the eventual call to
347+
// incrementSlow a tail call, which avoids pushing a stack frame on the fast
348+
// path on ARM64.
349+
return object->refCounts.increment(1);
350+
}
347351
return object;
348352
}
349353

stdlib/public/runtime/RefCount.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
namespace swift {
1616

1717
template <typename RefCountBits>
18-
void RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
19-
uint32_t n) {
18+
HeapObject *RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
19+
uint32_t n) {
2020
if (oldbits.isImmortal(false)) {
21-
return;
21+
return getHeapObject();
2222
}
2323
else if (oldbits.hasSideTable()) {
2424
// Out-of-line slow path.
@@ -29,9 +29,14 @@ void RefCounts<RefCountBits>::incrementSlow(RefCountBits oldbits,
2929
// Retain count overflow.
3030
swift::swift_abortRetainOverflow();
3131
}
32+
return getHeapObject();
3233
}
33-
template void RefCounts<InlineRefCountBits>::incrementSlow(InlineRefCountBits oldbits, uint32_t n);
34-
template void RefCounts<SideTableRefCountBits>::incrementSlow(SideTableRefCountBits oldbits, uint32_t n);
34+
template HeapObject *
35+
RefCounts<InlineRefCountBits>::incrementSlow(InlineRefCountBits oldbits,
36+
uint32_t n);
37+
template HeapObject *
38+
RefCounts<SideTableRefCountBits>::incrementSlow(SideTableRefCountBits oldbits,
39+
uint32_t n);
3540

3641
template <typename RefCountBits>
3742
void RefCounts<RefCountBits>::incrementNonAtomicSlow(RefCountBits oldbits,

stdlib/public/runtime/SwiftObject.mm

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,20 @@ static bool isBridgeObjectTaggedPointer(void *object) {
574574
return (void*)(uintptr_t(object) & ~unTaggedNonNativeBridgeObjectBits);
575575
}
576576

577+
#if SWIFT_OBJC_INTEROP
578+
#if __arm64__
579+
// Marking this as noinline allows swift_bridgeObjectRetain to avoid emitting
580+
// a stack frame for the swift_retain path on ARM64. It makes for worse codegen
581+
// on x86-64, though, so limit it to ARM64.
582+
SWIFT_NOINLINE
583+
#endif
584+
static void *objcRetainAndReturn(void *object) {
585+
auto const objectRef = toPlainObject_unTagged_bridgeObject(object);
586+
objc_retain(static_cast<id>(objectRef));
587+
return object;
588+
}
589+
#endif
590+
577591
void *swift::swift_bridgeObjectRetain(void *object) {
578592
#if SWIFT_OBJC_INTEROP
579593
if (isObjCTaggedPointer(object) || isBridgeObjectTaggedPointer(object))
@@ -584,14 +598,18 @@ static bool isBridgeObjectTaggedPointer(void *object) {
584598

585599
#if SWIFT_OBJC_INTEROP
586600
if (!isNonNative_unTagged_bridgeObject(object)) {
587-
swift_retain(static_cast<HeapObject *>(objectRef));
588-
return object;
601+
return swift_retain(static_cast<HeapObject *>(objectRef));
589602
}
590-
objc_retain(static_cast<id>(objectRef));
591-
return object;
603+
604+
// Put the call to objc_retain in a separate function, tail-called here. This
605+
// allows the fast path of swift_bridgeObjectRetain to avoid creating a stack
606+
// frame on ARM64. We can't directly tail-call objc_retain, because
607+
// swift_bridgeObjectRetain returns the pointer with objectPointerIsObjCBit
608+
// set, so we have to make a non-tail call and then return the value with the
609+
// bit set.
610+
SWIFT_MUSTTAIL return objcRetainAndReturn(object);
592611
#else
593-
swift_retain(static_cast<HeapObject *>(objectRef));
594-
return object;
612+
return swift_retain(static_cast<HeapObject *>(objectRef));
595613
#endif
596614
}
597615

0 commit comments

Comments
 (0)