Skip to content

Commit 549e4ea

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.4
1 parent d023044 commit 549e4ea

File tree

8 files changed

+93
-60
lines changed

8 files changed

+93
-60
lines changed

llvm/lib/CodeGen/GlobalISel/CallLowering.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
144144
// Try looking through a bitcast from one function type to another.
145145
// Commonly happens with calls to objc_msgSend().
146146
const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
147-
if (const Function *F = dyn_cast<Function>(CalleeV))
148-
Info.Callee = MachineOperand::CreateGA(F, 0);
149-
else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
147+
if (const Function *F = dyn_cast<Function>(CalleeV)) {
148+
if (F->hasFnAttribute(Attribute::NonLazyBind)) {
149+
auto Reg =
150+
MRI.createGenericVirtualRegister(getLLTForType(*F->getType(), DL));
151+
MIRBuilder.buildGlobalValue(Reg, F);
152+
Info.Callee = MachineOperand::CreateReg(Reg, false);
153+
} else {
154+
Info.Callee = MachineOperand::CreateGA(F, 0);
155+
}
156+
} else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
150157
// IR IFuncs and Aliases can't be forward declared (only defined), so the
151158
// callee must be in the same TU and therefore we can direct-call it without
152159
// worrying about it being out of range.

llvm/lib/Target/AArch64/AArch64FastISel.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3202,6 +3202,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
32023202
if (Callee && !computeCallAddress(Callee, Addr))
32033203
return false;
32043204

3205+
// MO_GOT is not handled. -fno-plt compiled intrinsic calls do not have the
3206+
// nonlazybind attribute. Check "RtLibUseGOT" instead.
3207+
if ((Subtarget->classifyGlobalFunctionReference(Addr.getGlobalValue(), TM) !=
3208+
AArch64II::MO_NO_FLAG) ||
3209+
MF->getFunction().getParent()->getRtLibUseGOT())
3210+
return false;
3211+
32053212
// The weak function target may be zero; in that case we must use indirect
32063213
// addressing via a stub on windows as it may be out of range for a
32073214
// PC-relative jump.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7969,13 +7969,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
79697969
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
79707970
}
79717971
} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
7972-
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
7973-
Subtarget->isTargetMachO()) {
7974-
const char *Sym = S->getSymbol();
7972+
bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large &&
7973+
Subtarget->isTargetMachO()) ||
7974+
MF.getFunction().getParent()->getRtLibUseGOT();
7975+
const char *Sym = S->getSymbol();
7976+
if (UseGot) {
79757977
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
79767978
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
79777979
} else {
7978-
const char *Sym = S->getSymbol();
79797980
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
79807981
}
79817982
}

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ static cl::opt<bool>
4343
UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
4444
"an address is ignored"), cl::init(false), cl::Hidden);
4545

46-
static cl::opt<bool>
47-
UseNonLazyBind("aarch64-enable-nonlazybind",
48-
cl::desc("Call nonlazybind functions via direct GOT load"),
49-
cl::init(false), cl::Hidden);
46+
static cl::opt<bool> MachOUseNonLazyBind(
47+
"aarch64-macho-enable-nonlazybind",
48+
cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
49+
cl::Hidden);
5050

5151
static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
5252
cl::desc("Enable the use of AA during codegen."));
@@ -434,7 +434,8 @@ unsigned AArch64Subtarget::classifyGlobalFunctionReference(
434434

435435
// NonLazyBind goes via GOT unless we know it's available locally.
436436
auto *F = dyn_cast<Function>(GV);
437-
if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
437+
if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
438+
F->hasFnAttribute(Attribute::NonLazyBind) &&
438439
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
439440
return AArch64II::MO_GOT;
440441

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1273,8 +1273,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
12731273
!Subtarget.noBTIAtReturnTwice() &&
12741274
MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
12751275
Opc = AArch64::BLR_BTI;
1276-
else
1276+
else {
1277+
// For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
1278+
// is set.
1279+
if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
1280+
auto Reg =
1281+
MRI.createGenericVirtualRegister(getLLTForType(*F.getType(), DL));
1282+
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE);
1283+
DstOp(Reg).addDefToMIB(MRI, MIB);
1284+
MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
1285+
Info.Callee = MachineOperand::CreateReg(Reg, false);
1286+
}
12771287
Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
1288+
}
12781289

12791290
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
12801291
unsigned CalleeOpNo = 0;

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,11 +2841,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
28412841
}
28422842

28432843
case TargetOpcode::G_GLOBAL_VALUE: {
2844-
auto GV = I.getOperand(1).getGlobal();
2845-
if (GV->isThreadLocal())
2846-
return selectTLSGlobalValue(I, MRI);
2844+
const GlobalValue *GV = nullptr;
2845+
unsigned OpFlags;
2846+
if (I.getOperand(1).isSymbol()) {
2847+
OpFlags = I.getOperand(1).getTargetFlags();
2848+
// Currently only used by "RtLibUseGOT".
2849+
assert(OpFlags == AArch64II::MO_GOT);
2850+
} else {
2851+
GV = I.getOperand(1).getGlobal();
2852+
if (GV->isThreadLocal())
2853+
return selectTLSGlobalValue(I, MRI);
2854+
OpFlags = STI.ClassifyGlobalReference(GV, TM);
2855+
}
28472856

2848-
unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
28492857
if (OpFlags & AArch64II::MO_GOT) {
28502858
I.setDesc(TII.get(AArch64::LOADgot));
28512859
I.getOperand(1).setTargetFlags(OpFlags);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,6 +1314,9 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
13141314
// By splitting this here, we can optimize accesses in the small code model by
13151315
// folding in the G_ADD_LOW into the load/store offset.
13161316
auto &GlobalOp = MI.getOperand(1);
1317+
// Don't modify an intrinsic call.
1318+
if (GlobalOp.isSymbol())
1319+
return true;
13171320
const auto* GV = GlobalOp.getGlobal();
13181321
if (GV->isThreadLocal())
13191322
return true; // Don't want to modify TLS vars.
Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2-
; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
2+
; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-macho-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
33
; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=MACHO-NORMAL
44
; RUN: llc -mtriple=aarch64 -fast-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-FI
55
; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-GI
@@ -19,13 +19,18 @@ define void @test_laziness(ptr %a) nounwind {
1919
; MACHO-NEXT: Lloh1:
2020
; MACHO-NEXT: ldr x8, [x8, _external@GOTPAGEOFF]
2121
; MACHO-NEXT: blr x8
22+
; MACHO-NEXT: Lloh2:
23+
; MACHO-NEXT: adrp x8, _memset@GOTPAGE
2224
; MACHO-NEXT: mov x0, x19
2325
; MACHO-NEXT: mov w1, #1 ; =0x1
26+
; MACHO-NEXT: Lloh3:
27+
; MACHO-NEXT: ldr x8, [x8, _memset@GOTPAGEOFF]
2428
; MACHO-NEXT: mov w2, #1000 ; =0x3e8
25-
; MACHO-NEXT: bl _memset
29+
; MACHO-NEXT: blr x8
2630
; MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
2731
; MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
2832
; MACHO-NEXT: ret
33+
; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
2934
; MACHO-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
3035
;
3136
; MACHO-NORMAL-LABEL: test_laziness:
@@ -34,50 +39,34 @@ define void @test_laziness(ptr %a) nounwind {
3439
; MACHO-NORMAL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
3540
; MACHO-NORMAL-NEXT: mov x19, x0
3641
; MACHO-NORMAL-NEXT: bl _external
42+
; MACHO-NORMAL-NEXT: Lloh0:
43+
; MACHO-NORMAL-NEXT: adrp x8, _memset@GOTPAGE
3744
; MACHO-NORMAL-NEXT: mov x0, x19
3845
; MACHO-NORMAL-NEXT: mov w1, #1 ; =0x1
46+
; MACHO-NORMAL-NEXT: Lloh1:
47+
; MACHO-NORMAL-NEXT: ldr x8, [x8, _memset@GOTPAGEOFF]
3948
; MACHO-NORMAL-NEXT: mov w2, #1000 ; =0x3e8
40-
; MACHO-NORMAL-NEXT: bl _memset
49+
; MACHO-NORMAL-NEXT: blr x8
4150
; MACHO-NORMAL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
4251
; MACHO-NORMAL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
4352
; MACHO-NORMAL-NEXT: ret
53+
; MACHO-NORMAL-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
4454
;
45-
; ELF-FI-LABEL: test_laziness:
46-
; ELF-FI: // %bb.0:
47-
; ELF-FI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
48-
; ELF-FI-NEXT: mov x19, x0
49-
; ELF-FI-NEXT: bl external
50-
; ELF-FI-NEXT: mov w8, #1 // =0x1
51-
; ELF-FI-NEXT: mov x0, x19
52-
; ELF-FI-NEXT: mov x2, #1000 // =0x3e8
53-
; ELF-FI-NEXT: uxtb w1, w8
54-
; ELF-FI-NEXT: bl memset
55-
; ELF-FI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
56-
; ELF-FI-NEXT: ret
57-
;
58-
; ELF-GI-LABEL: test_laziness:
59-
; ELF-GI: // %bb.0:
60-
; ELF-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
61-
; ELF-GI-NEXT: mov x19, x0
62-
; ELF-GI-NEXT: bl external
63-
; ELF-GI-NEXT: mov x0, x19
64-
; ELF-GI-NEXT: mov w1, #1 // =0x1
65-
; ELF-GI-NEXT: mov w2, #1000 // =0x3e8
66-
; ELF-GI-NEXT: bl memset
67-
; ELF-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
68-
; ELF-GI-NEXT: ret
69-
;
70-
; ELF-SDAG-LABEL: test_laziness:
71-
; ELF-SDAG: // %bb.0:
72-
; ELF-SDAG-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
73-
; ELF-SDAG-NEXT: mov x19, x0
74-
; ELF-SDAG-NEXT: bl external
75-
; ELF-SDAG-NEXT: mov x0, x19
76-
; ELF-SDAG-NEXT: mov w1, #1 // =0x1
77-
; ELF-SDAG-NEXT: mov w2, #1000 // =0x3e8
78-
; ELF-SDAG-NEXT: bl memset
79-
; ELF-SDAG-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
80-
; ELF-SDAG-NEXT: ret
55+
; ELF-LABEL: test_laziness:
56+
; ELF: // %bb.0:
57+
; ELF-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
58+
; ELF-NEXT: adrp x8, :got:external
59+
; ELF-NEXT: mov x19, x0
60+
; ELF-NEXT: ldr x8, [x8, :got_lo12:external]
61+
; ELF-NEXT: blr x8
62+
; ELF-NEXT: adrp x8, :got:memset
63+
; ELF-NEXT: mov x0, x19
64+
; ELF-NEXT: mov w1, #1 // =0x1
65+
; ELF-NEXT: ldr x8, [x8, :got_lo12:memset]
66+
; ELF-NEXT: mov w2, #1000 // =0x3e8
67+
; ELF-NEXT: blr x8
68+
; ELF-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
69+
; ELF-NEXT: ret
8170
call void @external()
8271
call void @llvm.memset.p0.i64(ptr align 1 %a, i8 1, i64 1000, i1 false)
8372
ret void
@@ -86,20 +75,22 @@ define void @test_laziness(ptr %a) nounwind {
8675
define void @test_laziness_tail() nounwind {
8776
; MACHO-LABEL: test_laziness_tail:
8877
; MACHO: ; %bb.0:
89-
; MACHO-NEXT: Lloh2:
78+
; MACHO-NEXT: Lloh4:
9079
; MACHO-NEXT: adrp x0, _external@GOTPAGE
91-
; MACHO-NEXT: Lloh3:
80+
; MACHO-NEXT: Lloh5:
9281
; MACHO-NEXT: ldr x0, [x0, _external@GOTPAGEOFF]
9382
; MACHO-NEXT: br x0
94-
; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
83+
; MACHO-NEXT: .loh AdrpLdrGot Lloh4, Lloh5
9584
;
9685
; MACHO-NORMAL-LABEL: test_laziness_tail:
9786
; MACHO-NORMAL: ; %bb.0:
9887
; MACHO-NORMAL-NEXT: b _external
9988
;
10089
; ELF-LABEL: test_laziness_tail:
10190
; ELF: // %bb.0:
102-
; ELF-NEXT: b external
91+
; ELF-NEXT: adrp x0, :got:external
92+
; ELF-NEXT: ldr x0, [x0, :got_lo12:external]
93+
; ELF-NEXT: br x0
10394
tail call void @external()
10495
ret void
10596
}
@@ -108,3 +99,7 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
10899

109100
!llvm.module.flags = !{!0}
110101
!0 = !{i32 7, !"RtLibUseGOT", i32 1}
102+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
103+
; ELF-FI: {{.*}}
104+
; ELF-GI: {{.*}}
105+
; ELF-SDAG: {{.*}}

0 commit comments

Comments
 (0)