Skip to content

Commit d150523

Browse files
lephilousophetstellar
authored andcommitted
[AArch64] Use correct calling convention for each vararg
While checking is tail call optimization is possible, the calling convention applied to fixed arguments is not the correct one. This implies for DarwinPCS that all arguments of a vararg function will go to the stack although fixed ones can go in registers. This prevents non-virtual thunks to be tail optimized although they are marked as musttail. Differential Revision: https://reviews.llvm.org/D120622 (cherry picked from commit 26cd258)
1 parent fd98b0f commit d150523

File tree

3 files changed

+113
-76
lines changed

3 files changed

+113
-76
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 75 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -5843,14 +5843,62 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
58435843
}
58445844
}
58455845

5846+
static void analyzeCallOperands(const AArch64TargetLowering &TLI,
5847+
const AArch64Subtarget *Subtarget,
5848+
const TargetLowering::CallLoweringInfo &CLI,
5849+
CCState &CCInfo) {
5850+
const SelectionDAG &DAG = CLI.DAG;
5851+
CallingConv::ID CalleeCC = CLI.CallConv;
5852+
bool IsVarArg = CLI.IsVarArg;
5853+
const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5854+
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
5855+
5856+
unsigned NumArgs = Outs.size();
5857+
for (unsigned i = 0; i != NumArgs; ++i) {
5858+
MVT ArgVT = Outs[i].VT;
5859+
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5860+
5861+
bool UseVarArgCC = false;
5862+
if (IsVarArg) {
5863+
// On Windows, the fixed arguments in a vararg call are passed in GPRs
5864+
// too, so use the vararg CC to force them to integer registers.
5865+
if (IsCalleeWin64) {
5866+
UseVarArgCC = true;
5867+
} else {
5868+
UseVarArgCC = !Outs[i].IsFixed;
5869+
}
5870+
} else {
5871+
// Get type of the original argument.
5872+
EVT ActualVT =
5873+
TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty,
5874+
/*AllowUnknown*/ true);
5875+
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT;
5876+
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5877+
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5878+
ArgVT = MVT::i8;
5879+
else if (ActualMVT == MVT::i16)
5880+
ArgVT = MVT::i16;
5881+
}
5882+
5883+
CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CalleeCC, UseVarArgCC);
5884+
bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5885+
assert(!Res && "Call operand has unhandled type");
5886+
(void)Res;
5887+
}
5888+
}
5889+
58465890
bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5847-
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5848-
const SmallVectorImpl<ISD::OutputArg> &Outs,
5849-
const SmallVectorImpl<SDValue> &OutVals,
5850-
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5891+
const CallLoweringInfo &CLI) const {
5892+
CallingConv::ID CalleeCC = CLI.CallConv;
58515893
if (!mayTailCallThisCC(CalleeCC))
58525894
return false;
58535895

5896+
SDValue Callee = CLI.Callee;
5897+
bool IsVarArg = CLI.IsVarArg;
5898+
const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5899+
const SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5900+
const SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5901+
const SelectionDAG &DAG = CLI.DAG;
58545902
MachineFunction &MF = DAG.getMachineFunction();
58555903
const Function &CallerF = MF.getFunction();
58565904
CallingConv::ID CallerCC = CallerF.getCallingConv();
@@ -5915,30 +5963,14 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
59155963

59165964
// I want anyone implementing a new calling convention to think long and hard
59175965
// about this assert.
5918-
assert((!isVarArg || CalleeCC == CallingConv::C) &&
5966+
assert((!IsVarArg || CalleeCC == CallingConv::C) &&
59195967
"Unexpected variadic calling convention");
59205968

59215969
LLVMContext &C = *DAG.getContext();
5922-
if (isVarArg && !Outs.empty()) {
5923-
// At least two cases here: if caller is fastcc then we can't have any
5924-
// memory arguments (we'd be expected to clean up the stack afterwards). If
5925-
// caller is C then we could potentially use its argument area.
5926-
5927-
// FIXME: for now we take the most conservative of these in both cases:
5928-
// disallow all variadic memory operands.
5929-
SmallVector<CCValAssign, 16> ArgLocs;
5930-
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5931-
5932-
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5933-
for (const CCValAssign &ArgLoc : ArgLocs)
5934-
if (!ArgLoc.isRegLoc())
5935-
return false;
5936-
}
5937-
59385970
// Check that the call results are passed in the same way.
59395971
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5940-
CCAssignFnForCall(CalleeCC, isVarArg),
5941-
CCAssignFnForCall(CallerCC, isVarArg)))
5972+
CCAssignFnForCall(CalleeCC, IsVarArg),
5973+
CCAssignFnForCall(CallerCC, IsVarArg)))
59425974
return false;
59435975
// The callee has to preserve all registers the caller needs to preserve.
59445976
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -5958,9 +5990,22 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
59585990
return true;
59595991

59605992
SmallVector<CCValAssign, 16> ArgLocs;
5961-
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5993+
CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C);
5994+
5995+
analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
5996+
5997+
if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
5998+
// When we are musttail, additional checks have been done and we can safely ignore this check
5999+
// At least two cases here: if caller is fastcc then we can't have any
6000+
// memory arguments (we'd be expected to clean up the stack afterwards). If
6001+
// caller is C then we could potentially use its argument area.
59626002

5963-
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
6003+
// FIXME: for now we take the most conservative of these in both cases:
6004+
// disallow all variadic memory operands.
6005+
for (const CCValAssign &ArgLoc : ArgLocs)
6006+
if (!ArgLoc.isRegLoc())
6007+
return false;
6008+
}
59646009

59656010
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
59666011

@@ -6051,7 +6096,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
60516096
SDValue Chain = CLI.Chain;
60526097
SDValue Callee = CLI.Callee;
60536098
bool &IsTailCall = CLI.IsTailCall;
6054-
CallingConv::ID CallConv = CLI.CallConv;
6099+
CallingConv::ID &CallConv = CLI.CallConv;
60556100
bool IsVarArg = CLI.IsVarArg;
60566101

60576102
MachineFunction &MF = DAG.getMachineFunction();
@@ -6061,7 +6106,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
60616106
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
60626107
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
60636108
bool IsSibCall = false;
6064-
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
60656109

60666110
// Check callee args/returns for SVE registers and set calling convention
60676111
// accordingly.
@@ -6079,8 +6123,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
60796123

60806124
if (IsTailCall) {
60816125
// Check if it's really possible to do a tail call.
6082-
IsTailCall = isEligibleForTailCallOptimization(
6083-
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
6126+
IsTailCall = isEligibleForTailCallOptimization(CLI);
60846127

60856128
// A sibling call is one where we're under the usual C ABI and not planning
60866129
// to change that but can still do a tail call:
@@ -6101,56 +6144,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
61016144
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
61026145

61036146
if (IsVarArg) {
6104-
// Handle fixed and variable vector arguments differently.
6105-
// Variable vector arguments always go into memory.
61066147
unsigned NumArgs = Outs.size();
61076148

61086149
for (unsigned i = 0; i != NumArgs; ++i) {
6109-
MVT ArgVT = Outs[i].VT;
6110-
if (!Outs[i].IsFixed && ArgVT.isScalableVector())
6150+
if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector())
61116151
report_fatal_error("Passing SVE types to variadic functions is "
61126152
"currently not supported");
6113-
6114-
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6115-
bool UseVarArgCC = !Outs[i].IsFixed;
6116-
// On Windows, the fixed arguments in a vararg call are passed in GPRs
6117-
// too, so use the vararg CC to force them to integer registers.
6118-
if (IsCalleeWin64)
6119-
UseVarArgCC = true;
6120-
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
6121-
bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
6122-
assert(!Res && "Call operand has unhandled type");
6123-
(void)Res;
6124-
}
6125-
} else {
6126-
// At this point, Outs[].VT may already be promoted to i32. To correctly
6127-
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
6128-
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
6129-
// Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
6130-
// we use a special version of AnalyzeCallOperands to pass in ValVT and
6131-
// LocVT.
6132-
unsigned NumArgs = Outs.size();
6133-
for (unsigned i = 0; i != NumArgs; ++i) {
6134-
MVT ValVT = Outs[i].VT;
6135-
// Get type of the original argument.
6136-
EVT ActualVT = getValueType(DAG.getDataLayout(),
6137-
CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
6138-
/*AllowUnknown*/ true);
6139-
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
6140-
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6141-
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
6142-
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
6143-
ValVT = MVT::i8;
6144-
else if (ActualMVT == MVT::i16)
6145-
ValVT = MVT::i16;
6146-
6147-
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
6148-
bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
6149-
assert(!Res && "Call operand has unhandled type");
6150-
(void)Res;
61516153
}
61526154
}
61536155

6156+
analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
6157+
61546158
// Get a count of how many bytes are to be pushed on the stack.
61556159
unsigned NumBytes = CCInfo.getNextStackOffset();
61566160

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -898,11 +898,8 @@ class AArch64TargetLowering : public TargetLowering {
898898
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
899899
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
900900

901-
bool isEligibleForTailCallOptimization(
902-
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
903-
const SmallVectorImpl<ISD::OutputArg> &Outs,
904-
const SmallVectorImpl<SDValue> &OutVals,
905-
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
901+
bool
902+
isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
906903

907904
/// Finds the incoming stack arguments which overlap the given fixed stack
908905
/// object and incorporates their load into the current chain. This prevents
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; With Darwin PCS, non-virtual thunks generated are generated with musttail
2+
; and are expected to build
3+
; In general Darwin PCS should be tail optimized
4+
; RUN: llc -mtriple=arm64-apple-ios5.0.0 < %s | FileCheck %s
5+
6+
; CHECK-LABEL: __ZThn16_N1C3addEPKcz:
7+
; CHECK: b __ZN1C3addEPKcz
8+
; CHECK-LABEL: _tailTest:
9+
; CHECK: b __ZN1C3addEPKcz
10+
; CHECK-LABEL: __ZThn8_N1C1fEiiiiiiiiiz:
11+
; CHECK: ldr w9, [sp, #4]
12+
; CHECK: str w9, [sp, #4]
13+
; CHECK: b __ZN1C1fEiiiiiiiiiz
14+
15+
%class.C = type { %class.A.base, [4 x i8], %class.B.base, [4 x i8] }
16+
%class.A.base = type <{ i32 (...)**, i32 }>
17+
%class.B.base = type <{ i32 (...)**, i32 }>
18+
19+
declare void @_ZN1C3addEPKcz(%class.C*, i8*, ...) unnamed_addr #0 align 2
20+
21+
define void @_ZThn16_N1C3addEPKcz(%class.C* %0, i8* %1, ...) unnamed_addr #0 align 2 {
22+
musttail call void (%class.C*, i8*, ...) @_ZN1C3addEPKcz(%class.C* noundef nonnull align 8 dereferenceable(28) undef, i8* noundef %1, ...)
23+
ret void
24+
}
25+
26+
define void @tailTest(%class.C* %0, i8* %1, ...) unnamed_addr #0 align 2 {
27+
tail call void (%class.C*, i8*, ...) @_ZN1C3addEPKcz(%class.C* noundef nonnull align 8 dereferenceable(28) undef, i8* noundef %1)
28+
ret void
29+
}
30+
31+
declare void @_ZN1C1fEiiiiiiiiiz(%class.C* %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 noundef %9, ...) unnamed_addr #1 align 2
32+
33+
define void @_ZThn8_N1C1fEiiiiiiiiiz(%class.C* %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 noundef %9, ...) unnamed_addr #1 align 2 {
34+
musttail call void (%class.C*, i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) @_ZN1C1fEiiiiiiiiiz(%class.C* nonnull align 8 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 noundef %9, ...)
35+
ret void
36+
}

0 commit comments

Comments
 (0)