Skip to content

[Runtime] Add register-specific entrypoints for retain/release calls on ARM64. #62103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions include/swift/Runtime/CustomRRABI.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//===--- CustomRRABI.h - Custom retain/release ABI support ----------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
//
// Utilities for creating register-specific retain/release entrypoints.
//
//===----------------------------------------------------------------------===//

#ifndef SWIFT_RUNTIME_CUSTOMRRABI_H
#define SWIFT_RUNTIME_CUSTOMRRABI_H

namespace swift {

#if __arm64__ || defined(_M_ARM64)

// Invoke the macro X on the number of each register we support for a custom ABI
// entrypoint, along with a custom parameter. We don't support all 31 registers:
// - x0 is already covered by the standard entrypoints.
// - x16/x17 are scratch registers that can be used by procedure call glue.
// - x18 is reserved.
// - x29 is the frame pointer.
// - x30 is the link register and gets overwritten when making a call.
#define CUSTOM_RR_ENTRYPOINTS_FOREACH_REG(X, param) \
X(1, param) \
X(2, param) \
X(3, param) \
X(4, param) \
X(5, param) \
X(6, param) \
X(7, param) \
X(8, param) \
X(9, param) \
X(10, param) \
X(11, param) \
X(12, param) \
X(13, param) \
X(14, param) \
X(15, param) \
X(19, param) \
X(20, param) \
X(21, param) \
X(22, param) \
X(23, param) \
X(24, param) \
X(25, param) \
X(26, param) \
X(27, param) \
X(28, param)

// Helper template for deducing the parameter type of a one-parameter function.
template <typename Ret, typename Param>
Param returnTypeHelper(Ret (*)(Param)) {}

// Helper macro that defines one entrypoint that takes the parameter in reg and
// calls through to function.
#define CUSTOM_RR_ENTRYPOINTS_DEFINE_ONE_ENTRYPOINT(reg, function) \
extern "C" SWIFT_RUNTIME_EXPORT decltype(function( \
nullptr)) function##_x##reg() { \
decltype(returnTypeHelper(function)) ptr; \
asm(".ifnc %0, x" #reg "\n" \
"mov %0, x" #reg "\n" \
".endif" \
: "=r"(ptr)); \
return function(ptr); \
}

// A macro that defines all register-specific entrypoints for the given
// retain/release function.
#define CUSTOM_RR_ENTRYPOINTS_DEFINE_ENTRYPOINTS(function) \
CUSTOM_RR_ENTRYPOINTS_FOREACH_REG( \
CUSTOM_RR_ENTRYPOINTS_DEFINE_ONE_ENTRYPOINT, function)

#else

// No custom entrypoints on other architectures.
#define CUSTOM_RR_ENTRYPOINTS_DEFINE_ENTRYPOINTS(function)

#endif

} // namespace swift

#endif // SWIFT_RUNTIME_CUSTOMRRABI_H
5 changes: 5 additions & 0 deletions stdlib/public/runtime/HeapObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "RuntimeInvocationsTracking.h"
#include "WeakReference.h"
#include "swift/Runtime/Debug.h"
#include "swift/Runtime/CustomRRABI.h"
#include "swift/Runtime/InstrumentsSupport.h"
#include "swift/shims/GlobalObjects.h"
#include "swift/shims/RuntimeShims.h"
Expand Down Expand Up @@ -359,6 +360,8 @@ HeapObject *swift::swift_retain(HeapObject *object) {
#endif
}

CUSTOM_RR_ENTRYPOINTS_DEFINE_ENTRYPOINTS(swift_retain)

SWIFT_RUNTIME_EXPORT
HeapObject *(*SWIFT_RT_DECLARE_ENTRY _swift_retain)(HeapObject *object) =
_swift_retain_;
Expand Down Expand Up @@ -412,6 +415,8 @@ void swift::swift_release(HeapObject *object) {
#endif
}

CUSTOM_RR_ENTRYPOINTS_DEFINE_ENTRYPOINTS(swift_release)

SWIFT_RUNTIME_EXPORT
void (*SWIFT_RT_DECLARE_ENTRY _swift_release)(HeapObject *object) =
_swift_release_;
Expand Down
111 changes: 111 additions & 0 deletions test/Runtime/Inputs/custom_rr_abi_utilities.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#define NUM_REGS 30

// Apply `macro` to "all" registers. Skip x18 since it's reserved, and x30 since
// it's the link register.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to worry about other -ffixed-x?? and any thing like x29 being reserved for FP?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it matters so much for ALL_REGS as it does for FUNCTION_REGS; I think even the ones we skip would be OK in the former. The latter has to be a little more careful. We don't need to worry about -ffixed-x??, I don't think, because this is just for testing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, this is just for testing, so unless someone is going to build the tests that way, it's fine.

For the actual runtime entrypoints, any entrypoint corresponding to a reserved register wouldn't be called, so it would be harmless other than a bit of wasted code space.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, given that this is about code size, that would be kinda nice to handle. I don't see a good way to do this though as there is no macro that we have to check that and I am loathe to add a user controlled define for this. I suppose that if you are truly concerned about space, LTO is a thing, and at that point the DCE should kick in if you are statically linking.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds about right. If somebody really needs to squeeze every byte out of a dynamically-linked runtime with extra reserved registers, we can figure it out then.

#define ALL_REGS(macro) \
macro( 0) \
macro( 1) \
macro( 2) \
macro( 3) \
macro( 4) \
macro( 5) \
macro( 6) \
macro( 7) \
macro( 8) \
macro( 9) \
macro(10) \
macro(11) \
macro(12) \
macro(13) \
macro(14) \
macro(15) \
macro(16) \
macro(17) \
macro(19) \
macro(20) \
macro(21) \
macro(22) \
macro(23) \
macro(24) \
macro(25) \
macro(26) \
macro(27) \
macro(28) \
macro(29)

// Apply `macro` with the given parameters to all registers that have
// specialized entrypoints. That's the same as ALL_REGS, minus x0 (the standard
// entrypoint covers that), x16/x17 (temporary registers used as linker glue),
// and x29 (the link register).
#define FUNCTION_REGS(macro, ...) \
macro( 1, __VA_ARGS__) \
macro( 2, __VA_ARGS__) \
macro( 3, __VA_ARGS__) \
macro( 4, __VA_ARGS__) \
macro( 5, __VA_ARGS__) \
macro( 6, __VA_ARGS__) \
macro( 7, __VA_ARGS__) \
macro( 8, __VA_ARGS__) \
macro( 9, __VA_ARGS__) \
macro(10, __VA_ARGS__) \
macro(11, __VA_ARGS__) \
macro(12, __VA_ARGS__) \
macro(13, __VA_ARGS__) \
macro(14, __VA_ARGS__) \
macro(15, __VA_ARGS__) \
macro(19, __VA_ARGS__) \
macro(20, __VA_ARGS__) \
macro(21, __VA_ARGS__) \
macro(22, __VA_ARGS__) \
macro(23, __VA_ARGS__) \
macro(24, __VA_ARGS__) \
macro(25, __VA_ARGS__) \
macro(26, __VA_ARGS__) \
macro(27, __VA_ARGS__) \
macro(28, __VA_ARGS__)

// Apply `macro` to each function that gets specialized entrypoints. Also pass
// 1 if the function is a retain variant, and 0 if it's a release variant.
#define ALL_FUNCTIONS(macro) \
macro(swift_retain, 1) \
macro(swift_release, 0) \
macro(swift_bridgeObjectRetain, 1) \
macro(swift_bridgeObjectRelease, 0)

// Emit declarations for variables called xN stored in xN, initialized with
// regs[N].
#define PASS_REGS_HELPER(num) \
register void *x ## num asm ("x" #num) = regs[num];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

register is deprecated, we should avoid that. Do we want to use uintpt_t instead of void *?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can't use asm("<register>") on a variable without also using register.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As for the type, I don't think it matters whether you use void * or uintptr_t or, actually, uint64_t here, but void * is convenient because it lets us declare regs on the Swift side as [UnsafeMutableRawPointer?], which then also lets us put object references into it without having to do an unsafeBitCast().

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I do slightly worry that @compnerd might have a point here and that we should contemplate what happens if someone makes register ... asm("<register>") not work in the future. But in that case, the test will fail, so we'll notice that it's happened and can rewrite this code.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, we'll notice. Worst case we can rewrite this whole thing in inline assembly instead of convincing the compiler to load the registers for us.

#define PASS_REGS ALL_REGS(PASS_REGS_HELPER)

// Emit an entry in an asm inputs list containing "r" (xN).
#define REG_INPUTS_HELPER(num) \
"r" (x ## num),
#define REG_INPUTS ALL_REGS(REG_INPUTS_HELPER)

// Make a function called call_function_xN that calls function_xN with registers
// set to the contents of the given registers array.
#define MAKE_CALL_FUNC(reg, func) \
static inline void call_##func##_x##reg(void **regs) { \
PASS_REGS \
asm("bl _" #func "_x" #reg : : REG_INPUTS "i"(0)); \
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we can ensure that this doesn't setup a frame? This is also a guaranteed tail call right? Can we attribute that somehow?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure it's important, given that this is just for a test. I suppose if you wanted to avoid using the register ... asm("<reg>") annotation, you could write this bit entirely in assembly language, in which case it could do a tail call. It couldn't be inline then though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't actually care about whether this sets up a frame or does a tail call. We will overwrite the frame pointer, but that's OK. Otherwise, we just need it to call the requested runtime function with the registers set to the appropriate values. Whether it does that as a tail or standard call, and with or without a frame, is fine.


// Make a call_function_xN for each specialized function and register.
#define MAKE_ALL_CALL_FUNCS(function, isRetain) \
FUNCTION_REGS(MAKE_CALL_FUNC, function)
ALL_FUNCTIONS(MAKE_ALL_CALL_FUNCS)

// Call `call` with each call_function_xN function created above, as well as the
// base function name, the register it operates on, and whether it's a retain
// function.
static inline void foreachRRFunction(void (*call)(void (*)(void **regs),
const char *name, int reg,
int isRetain)) {
#define CALL_ONE_FUNCTION(reg, function, isRetain) \
call(call_##function##_x##reg, #function, reg, isRetain);
#define CALL_WITH_FUNCTIONS(function, isRetain) \
FUNCTION_REGS(CALL_ONE_FUNCTION, function, isRetain)

ALL_FUNCTIONS(CALL_WITH_FUNCTIONS)
}
86 changes: 86 additions & 0 deletions test/Runtime/custom_rr_abi.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// RUN: %target-run-simple-swift(-import-objc-header %S/Inputs/custom_rr_abi_utilities.h)

// REQUIRES: CPU=arm64 || CPU=arm64e

// REQUIRES: executable_test
// UNSUPPORTED: use_os_stdlib
// UNSUPPORTED: back_deployment_runtime

import StdlibUnittest

// A class that can provider a retainable pointer and determine whether it's
// been retained or released. This creates a helper object that will be retained
// or released. We don't attempt to clean up the helper so it leaks if not released,
// but this is only used for this one test so that's OK.
class RetainReleaseChecker {
var pointerValue: UnsafeMutableRawPointer

private class Helper {}

private weak var weakRef: Helper?

private let originalRetainCount: UInt

init() {
do {
// Make a helper object, retain it so it stays alive, and put it into
// pointerValue and weakRef.
let helper = Helper()
pointerValue = Unmanaged.passRetained(helper).toOpaque()
weakRef = helper
}
// Record the original retain count before anything happens. Then we can
// detect changes without needing to know exactly what the count is supposed
// to be.
originalRetainCount = _getRetainCount(weakRef!)
}

// If helper was retained, then weakRef will still point to it, and the retain
// count will have increased.
var retained: Bool {
weakRef != nil && _getRetainCount(weakRef!) > originalRetainCount
}

// weakRef is the only reference we had to the helper, aside from the retain we put
// on it to create pointerValue. If helper was released, then it will be destroyed
// and weakRef will be nil.
var released: Bool {
weakRef == nil
}
}

var CustomRRABITestSuite = TestSuite("CustomRRABI")

CustomRRABITestSuite.test("retain") {
foreachRRFunction { function, cname, register, isRetain in
let name = String(cString: cname!)
let fullname = "\(name)_x\(register)"

// Create a set of RR checker objects.
var checkers = (0..<NUM_REGS).map{ _ in RetainReleaseChecker() }

// Fill out a registers array with the pointers from the RR checkers.
var regs: [UnsafeMutableRawPointer?] = checkers.map{ $0.pointerValue }

// Call the RR function.
function!(&regs)

// Make sure all the checkers report what they're supposed to. All registers
// aside from `register` should be untouched, and `register` should have been
// either retained or released.
for (i, checker) in checkers.enumerated() {
if i == register {
if isRetain != 0 {
expectTrue(checker.retained, "\(fullname) must retain x\(i)")
} else {
expectTrue(checker.released, "\(fullname) must release x\(i)")
}
} else {
expectFalse(checker.retained, "\(fullname) must not retain x\(i)")
expectFalse(checker.released, "\(fullname) must not retain x\(i)")
}
}
}
}

runAllTests()