-
Notifications
You must be signed in to change notification settings - Fork 10.5k
[Runtime] Add register-specific entrypoints for retain/release calls on ARM64. #62103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
//===--- CustomRRABI.h - Custom retain/release ABI support ----------------===// | ||
// | ||
// This source file is part of the Swift.org open source project | ||
// | ||
// Copyright (c) 2022 Apple Inc. and the Swift project authors | ||
// Licensed under Apache License v2.0 with Runtime Library Exception | ||
// | ||
// See https://swift.org/LICENSE.txt for license information | ||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Utilities for creating register-specific retain/release entrypoints. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef SWIFT_RUNTIME_CUSTOMRRABI_H | ||
#define SWIFT_RUNTIME_CUSTOMRRABI_H | ||
|
||
namespace swift { | ||
|
||
#if __arm64__ || defined(_M_ARM64) | ||
|
||
// Invoke the macro X on the number of each register we support for a custom ABI | ||
// entrypoint, along with a custom parameter. We don't support all 31 registers: | ||
// - x0 is already covered by the standard entrypoints. | ||
// - x16/x17 are scratch registers that can be used by procedure call glue. | ||
// - x18 is reserved. | ||
// - x29 is the frame pointer. | ||
// - x30 is the link register and gets overwritten when making a call. | ||
#define CUSTOM_RR_ENTRYPOINTS_FOREACH_REG(X, param) \ | ||
X(1, param) \ | ||
X(2, param) \ | ||
X(3, param) \ | ||
X(4, param) \ | ||
X(5, param) \ | ||
X(6, param) \ | ||
X(7, param) \ | ||
X(8, param) \ | ||
X(9, param) \ | ||
X(10, param) \ | ||
X(11, param) \ | ||
X(12, param) \ | ||
X(13, param) \ | ||
X(14, param) \ | ||
X(15, param) \ | ||
X(19, param) \ | ||
X(20, param) \ | ||
X(21, param) \ | ||
X(22, param) \ | ||
X(23, param) \ | ||
X(24, param) \ | ||
X(25, param) \ | ||
X(26, param) \ | ||
X(27, param) \ | ||
X(28, param) | ||
|
||
// Helper template for deducing the parameter type of a one-parameter function. | ||
template <typename Ret, typename Param> | ||
Param returnTypeHelper(Ret (*)(Param)) {} | ||
|
||
// Helper macro that defines one entrypoint that takes the parameter in reg and | ||
// calls through to function. | ||
#define CUSTOM_RR_ENTRYPOINTS_DEFINE_ONE_ENTRYPOINT(reg, function) \ | ||
extern "C" SWIFT_RUNTIME_EXPORT decltype(function( \ | ||
nullptr)) function##_x##reg() { \ | ||
decltype(returnTypeHelper(function)) ptr; \ | ||
asm(".ifnc %0, x" #reg "\n" \ | ||
"mov %0, x" #reg "\n" \ | ||
".endif" \ | ||
: "=r"(ptr)); \ | ||
return function(ptr); \ | ||
} | ||
|
||
// A macro that defines all register-specific entrypoints for the given | ||
// retain/release function. | ||
#define CUSTOM_RR_ENTRYPOINTS_DEFINE_ENTRYPOINTS(function) \ | ||
CUSTOM_RR_ENTRYPOINTS_FOREACH_REG( \ | ||
CUSTOM_RR_ENTRYPOINTS_DEFINE_ONE_ENTRYPOINT, function) | ||
|
||
#else | ||
|
||
// No custom entrypoints on other architectures. | ||
#define CUSTOM_RR_ENTRYPOINTS_DEFINE_ENTRYPOINTS(function) | ||
|
||
#endif | ||
|
||
} // namespace swift | ||
|
||
#endif // SWIFT_RUNTIME_CUSTOMRRABI_H |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#define NUM_REGS 30 | ||
|
||
// Apply `macro` to "all" registers. Skip x18 since it's reserved, and x30 since | ||
// it's the link register. | ||
#define ALL_REGS(macro) \ | ||
macro( 0) \ | ||
macro( 1) \ | ||
macro( 2) \ | ||
macro( 3) \ | ||
macro( 4) \ | ||
macro( 5) \ | ||
macro( 6) \ | ||
macro( 7) \ | ||
macro( 8) \ | ||
macro( 9) \ | ||
macro(10) \ | ||
macro(11) \ | ||
macro(12) \ | ||
macro(13) \ | ||
macro(14) \ | ||
macro(15) \ | ||
macro(16) \ | ||
macro(17) \ | ||
macro(19) \ | ||
macro(20) \ | ||
macro(21) \ | ||
macro(22) \ | ||
macro(23) \ | ||
macro(24) \ | ||
macro(25) \ | ||
macro(26) \ | ||
macro(27) \ | ||
macro(28) \ | ||
macro(29) | ||
|
||
// Apply `macro` with the given parameters to all registers that have | ||
// specialized entrypoints. That's the same as ALL_REGS, minus x0 (the standard | ||
// entrypoint covers that), x16/x17 (temporary registers used as linker glue), | ||
// and x29 (the link register). | ||
#define FUNCTION_REGS(macro, ...) \ | ||
macro( 1, __VA_ARGS__) \ | ||
macro( 2, __VA_ARGS__) \ | ||
macro( 3, __VA_ARGS__) \ | ||
macro( 4, __VA_ARGS__) \ | ||
macro( 5, __VA_ARGS__) \ | ||
macro( 6, __VA_ARGS__) \ | ||
macro( 7, __VA_ARGS__) \ | ||
macro( 8, __VA_ARGS__) \ | ||
macro( 9, __VA_ARGS__) \ | ||
macro(10, __VA_ARGS__) \ | ||
macro(11, __VA_ARGS__) \ | ||
macro(12, __VA_ARGS__) \ | ||
macro(13, __VA_ARGS__) \ | ||
macro(14, __VA_ARGS__) \ | ||
macro(15, __VA_ARGS__) \ | ||
macro(19, __VA_ARGS__) \ | ||
macro(20, __VA_ARGS__) \ | ||
macro(21, __VA_ARGS__) \ | ||
macro(22, __VA_ARGS__) \ | ||
macro(23, __VA_ARGS__) \ | ||
macro(24, __VA_ARGS__) \ | ||
macro(25, __VA_ARGS__) \ | ||
macro(26, __VA_ARGS__) \ | ||
macro(27, __VA_ARGS__) \ | ||
macro(28, __VA_ARGS__) | ||
|
||
// Apply `macro` to each function that gets specialized entrypoints. Also pass | ||
// 1 if the function is a retain variant, and 0 if it's a release variant. | ||
#define ALL_FUNCTIONS(macro) \ | ||
macro(swift_retain, 1) \ | ||
macro(swift_release, 0) \ | ||
macro(swift_bridgeObjectRetain, 1) \ | ||
macro(swift_bridgeObjectRelease, 0) | ||
|
||
// Emit declarations for variables called xN stored in xN, initialized with | ||
// regs[N]. | ||
#define PASS_REGS_HELPER(num) \ | ||
register void *x ## num asm ("x" #num) = regs[num]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As for the type, I don't think it matters whether you use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (I do slightly worry that @compnerd might have a point here and that we should contemplate what happens if someone makes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, we'll notice. Worst case we can rewrite this whole thing in inline assembly instead of convincing the compiler to load the registers for us. |
||
#define PASS_REGS ALL_REGS(PASS_REGS_HELPER) | ||
|
||
// Emit an entry in an asm inputs list containing "r" (xN). | ||
#define REG_INPUTS_HELPER(num) \ | ||
"r" (x ## num), | ||
#define REG_INPUTS ALL_REGS(REG_INPUTS_HELPER) | ||
|
||
// Make a function called call_function_xN that calls function_xN with registers | ||
// set to the contents of the given registers array. | ||
#define MAKE_CALL_FUNC(reg, func) \ | ||
static inline void call_##func##_x##reg(void **regs) { \ | ||
PASS_REGS \ | ||
asm("bl _" #func "_x" #reg : : REG_INPUTS "i"(0)); \ | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we can ensure that this doesn't setup a frame? This is also a guaranteed tail call right? Can we attribute that somehow? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure it's important, given that this is just for a test. I suppose if you wanted to avoid using the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't actually care about whether this sets up a frame or does a tail call. We will overwrite the frame pointer, but that's OK. Otherwise, we just need it to call the requested runtime function with the registers set to the appropriate values. Whether it does that as a tail or standard call, and with or without a frame, is fine. |
||
|
||
// Make a call_function_xN for each specialized function and register. | ||
#define MAKE_ALL_CALL_FUNCS(function, isRetain) \ | ||
FUNCTION_REGS(MAKE_CALL_FUNC, function) | ||
ALL_FUNCTIONS(MAKE_ALL_CALL_FUNCS) | ||
|
||
// Call `call` with each call_function_xN function created above, as well as the | ||
// base function name, the register it operates on, and whether it's a retain | ||
// function. | ||
static inline void foreachRRFunction(void (*call)(void (*)(void **regs), | ||
const char *name, int reg, | ||
int isRetain)) { | ||
#define CALL_ONE_FUNCTION(reg, function, isRetain) \ | ||
call(call_##function##_x##reg, #function, reg, isRetain); | ||
#define CALL_WITH_FUNCTIONS(function, isRetain) \ | ||
FUNCTION_REGS(CALL_ONE_FUNCTION, function, isRetain) | ||
|
||
ALL_FUNCTIONS(CALL_WITH_FUNCTIONS) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
// RUN: %target-run-simple-swift(-import-objc-header %S/Inputs/custom_rr_abi_utilities.h) | ||
|
||
// REQUIRES: CPU=arm64 || CPU=arm64e | ||
|
||
// REQUIRES: executable_test | ||
// UNSUPPORTED: use_os_stdlib | ||
// UNSUPPORTED: back_deployment_runtime | ||
|
||
import StdlibUnittest | ||
|
||
// A class that can provider a retainable pointer and determine whether it's | ||
// been retained or released. This creates a helper object that will be retained | ||
// or released. We don't attempt to clean up the helper so it leaks if not released, | ||
// but this is only used for this one test so that's OK. | ||
class RetainReleaseChecker { | ||
var pointerValue: UnsafeMutableRawPointer | ||
|
||
private class Helper {} | ||
|
||
private weak var weakRef: Helper? | ||
|
||
private let originalRetainCount: UInt | ||
|
||
init() { | ||
do { | ||
// Make a helper object, retain it so it stays alive, and put it into | ||
// pointerValue and weakRef. | ||
let helper = Helper() | ||
pointerValue = Unmanaged.passRetained(helper).toOpaque() | ||
weakRef = helper | ||
} | ||
// Record the original retain count before anything happens. Then we can | ||
// detect changes without needing to know exactly what the count is supposed | ||
// to be. | ||
originalRetainCount = _getRetainCount(weakRef!) | ||
} | ||
|
||
// If helper was retained, then weakRef will still point to it, and the retain | ||
// count will have increased. | ||
var retained: Bool { | ||
weakRef != nil && _getRetainCount(weakRef!) > originalRetainCount | ||
} | ||
|
||
// weakRef is the only reference we had to the helper, aside from the retain we put | ||
// on it to create pointerValue. If helper was released, then it will be destroyed | ||
// and weakRef will be nil. | ||
var released: Bool { | ||
weakRef == nil | ||
} | ||
} | ||
|
||
var CustomRRABITestSuite = TestSuite("CustomRRABI") | ||
|
||
CustomRRABITestSuite.test("retain") { | ||
foreachRRFunction { function, cname, register, isRetain in | ||
let name = String(cString: cname!) | ||
let fullname = "\(name)_x\(register)" | ||
|
||
// Create a set of RR checker objects. | ||
var checkers = (0..<NUM_REGS).map{ _ in RetainReleaseChecker() } | ||
|
||
// Fill out a registers array with the pointers from the RR checkers. | ||
var regs: [UnsafeMutableRawPointer?] = checkers.map{ $0.pointerValue } | ||
|
||
// Call the RR function. | ||
function!(®s) | ||
|
||
// Make sure all the checkers report what they're supposed to. All registers | ||
// aside from `register` should be untouched, and `register` should have been | ||
// either retained or released. | ||
for (i, checker) in checkers.enumerated() { | ||
if i == register { | ||
if isRetain != 0 { | ||
expectTrue(checker.retained, "\(fullname) must retain x\(i)") | ||
} else { | ||
expectTrue(checker.released, "\(fullname) must release x\(i)") | ||
} | ||
} else { | ||
expectFalse(checker.retained, "\(fullname) must not retain x\(i)") | ||
expectFalse(checker.released, "\(fullname) must not retain x\(i)") | ||
} | ||
} | ||
} | ||
} | ||
|
||
runAllTests() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to worry about other
-ffixed-x??
and any thing like x29 being reserved for FP?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it matters so much for
ALL_REGS
as it does forFUNCTION_REGS
; I think even the ones we skip would be OK in the former. The latter has to be a little more careful. We don't need to worry about-ffixed-x??
, I don't think, because this is just for testing.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, this is just for testing, so unless someone is going to build the tests that way, it's fine.
For the actual runtime entrypoints, any entrypoint corresponding to a reserved register wouldn't be called, so it would be harmless other than a bit of wasted code space.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, given that this is about code size, that would be kinda nice to handle. I don't see a good way to do this though as there is no macro that we have to check that and I am loathe to add a user controlled define for this. I suppose that if you are truly concerned about space, LTO is a thing, and at that point the DCE should kick in if you are statically linking.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds about right. If somebody really needs to squeeze every byte out of a dynamically-linked runtime with extra reserved registers, we can figure it out then.