Skip to content

Commit 39263ac

Browse files
committed
[RISCV] Add lowering of global TLS addresses
This patch adds lowering for global TLS addresses for the TLS models of InitialExec, GlobalDynamic, LocalExec and LocalDynamic. LocalExec support required using a 4-operand add instruction, which uses the fourth operand to express a relocation on the symbol. The necessary fixup is emitted when the instruction is emitted. Differential Revision: https://reviews.llvm.org/D55305 llvm-svn: 363771
1 parent 73a28f0 commit 39263ac

File tree

7 files changed

+324
-0
lines changed

7 files changed

+324
-0
lines changed

llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ class RISCVExpandPseudo : public MachineFunctionPass {
6464
bool expandLoadAddress(MachineBasicBlock &MBB,
6565
MachineBasicBlock::iterator MBBI,
6666
MachineBasicBlock::iterator &NextMBBI);
67+
bool expandLoadTLSIEAddress(MachineBasicBlock &MBB,
68+
MachineBasicBlock::iterator MBBI,
69+
MachineBasicBlock::iterator &NextMBBI);
70+
bool expandLoadTLSGDAddress(MachineBasicBlock &MBB,
71+
MachineBasicBlock::iterator MBBI,
72+
MachineBasicBlock::iterator &NextMBBI);
6773
};
6874

6975
char RISCVExpandPseudo::ID = 0;
@@ -131,6 +137,10 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
131137
return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
132138
case RISCV::PseudoLA:
133139
return expandLoadAddress(MBB, MBBI, NextMBBI);
140+
case RISCV::PseudoLA_TLS_IE:
141+
return expandLoadTLSIEAddress(MBB, MBBI, NextMBBI);
142+
case RISCV::PseudoLA_TLS_GD:
143+
return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
134144
}
135145

136146
return false;
@@ -677,6 +687,24 @@ bool RISCVExpandPseudo::expandLoadAddress(
677687
return expandAuipcInstPair(MBB, MBBI, NextMBBI, FlagsHi, SecondOpcode);
678688
}
679689

690+
bool RISCVExpandPseudo::expandLoadTLSIEAddress(
691+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
692+
MachineBasicBlock::iterator &NextMBBI) {
693+
MachineFunction *MF = MBB.getParent();
694+
695+
const auto &STI = MF->getSubtarget<RISCVSubtarget>();
696+
unsigned SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW;
697+
return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GOT_HI,
698+
SecondOpcode);
699+
}
700+
701+
bool RISCVExpandPseudo::expandLoadTLSGDAddress(
702+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
703+
MachineBasicBlock::iterator &NextMBBI) {
704+
return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GD_HI,
705+
RISCV::ADDI);
706+
}
707+
680708
} // end of anonymous namespace
681709

682710
INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
178178
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
179179
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
180180

181+
setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
182+
181183
if (Subtarget.hasStdExtA()) {
182184
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
183185
setMinCmpXchgSizeInBits(32);
@@ -358,6 +360,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
358360
return lowerBlockAddress(Op, DAG);
359361
case ISD::ConstantPool:
360362
return lowerConstantPool(Op, DAG);
363+
case ISD::GlobalTLSAddress:
364+
return lowerGlobalTLSAddress(Op, DAG);
361365
case ISD::SELECT:
362366
return lowerSELECT(Op, DAG);
363367
case ISD::VASTART:
@@ -480,6 +484,116 @@ SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
480484
return getAddr(N, DAG);
481485
}
482486

487+
SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
488+
SelectionDAG &DAG,
489+
bool UseGOT) const {
490+
SDLoc DL(N);
491+
EVT Ty = getPointerTy(DAG.getDataLayout());
492+
const GlobalValue *GV = N->getGlobal();
493+
MVT XLenVT = Subtarget.getXLenVT();
494+
495+
if (UseGOT) {
496+
// Use PC-relative addressing to access the GOT for this TLS symbol, then
497+
// load the address from the GOT and add the thread pointer. This generates
498+
// the pattern (PseudoLA_TLS_IE sym), which expands to
499+
// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
500+
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
501+
SDValue Load =
502+
SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
503+
504+
// Add the thread pointer.
505+
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
506+
return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
507+
}
508+
509+
// Generate a sequence for accessing the address relative to the thread
510+
// pointer, with the appropriate adjustment for the thread pointer offset.
511+
// This generates the pattern
512+
// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
513+
SDValue AddrHi =
514+
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
515+
SDValue AddrAdd =
516+
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
517+
SDValue AddrLo =
518+
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
519+
520+
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
521+
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
522+
SDValue MNAdd = SDValue(
523+
DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
524+
0);
525+
return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
526+
}
527+
528+
SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
529+
SelectionDAG &DAG) const {
530+
SDLoc DL(N);
531+
EVT Ty = getPointerTy(DAG.getDataLayout());
532+
IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
533+
const GlobalValue *GV = N->getGlobal();
534+
535+
// Use a PC-relative addressing mode to access the global dynamic GOT address.
536+
// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
537+
// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
538+
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
539+
SDValue Load =
540+
SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
541+
542+
// Prepare argument list to generate call.
543+
ArgListTy Args;
544+
ArgListEntry Entry;
545+
Entry.Node = Load;
546+
Entry.Ty = CallTy;
547+
Args.push_back(Entry);
548+
549+
// Setup call to __tls_get_addr.
550+
TargetLowering::CallLoweringInfo CLI(DAG);
551+
CLI.setDebugLoc(DL)
552+
.setChain(DAG.getEntryNode())
553+
.setLibCallee(CallingConv::C, CallTy,
554+
DAG.getExternalSymbol("__tls_get_addr", Ty),
555+
std::move(Args));
556+
557+
return LowerCallTo(CLI).first;
558+
}
559+
560+
SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
561+
SelectionDAG &DAG) const {
562+
SDLoc DL(Op);
563+
EVT Ty = Op.getValueType();
564+
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
565+
int64_t Offset = N->getOffset();
566+
MVT XLenVT = Subtarget.getXLenVT();
567+
568+
// Non-PIC TLS lowering should always use the LocalExec model.
569+
TLSModel::Model Model = isPositionIndependent()
570+
? getTargetMachine().getTLSModel(N->getGlobal())
571+
: TLSModel::LocalExec;
572+
573+
SDValue Addr;
574+
switch (Model) {
575+
case TLSModel::LocalExec:
576+
Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
577+
break;
578+
case TLSModel::InitialExec:
579+
Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
580+
break;
581+
case TLSModel::LocalDynamic:
582+
case TLSModel::GeneralDynamic:
583+
Addr = getDynamicTLSAddr(N, DAG);
584+
break;
585+
}
586+
587+
// In order to maximise the opportunity for common subexpression elimination,
588+
// emit a separate ADD node for the global address offset instead of folding
589+
// it in the global address node. Later peephole optimisations may choose to
590+
// fold it back in when profitable.
591+
if (Offset != 0)
592+
return DAG.getNode(ISD::ADD, DL, Ty, Addr,
593+
DAG.getConstant(Offset, DL, XLenVT));
594+
return Addr;
595+
}
596+
483597
SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
484598
SDValue CondV = Op.getOperand(0);
485599
SDValue TrueV = Op.getOperand(1);

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,15 @@ class RISCVTargetLowering : public TargetLowering {
159159
template <class NodeTy>
160160
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
161161

162+
SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
163+
bool UseGOT) const;
164+
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
165+
162166
bool shouldConsiderGEPOffsetSplit() const override { return true; }
163167
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
164168
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
165169
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
170+
SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
166171
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
167172
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
168173
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,8 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
440440
case RISCV::PseudoTAIL:
441441
case RISCV::PseudoLLA:
442442
case RISCV::PseudoLA:
443+
case RISCV::PseudoLA_TLS_IE:
444+
case RISCV::PseudoLA_TLS_GD:
443445
return 8;
444446
case TargetOpcode::INLINEASM:
445447
case TargetOpcode::INLINEASM_BR: {

llvm/lib/Target/RISCV/RISCVMCInstLower.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,21 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
5757
case RISCVII::MO_GOT_HI:
5858
Kind = RISCVMCExpr::VK_RISCV_GOT_HI;
5959
break;
60+
case RISCVII::MO_TPREL_LO:
61+
Kind = RISCVMCExpr::VK_RISCV_TPREL_LO;
62+
break;
63+
case RISCVII::MO_TPREL_HI:
64+
Kind = RISCVMCExpr::VK_RISCV_TPREL_HI;
65+
break;
66+
case RISCVII::MO_TPREL_ADD:
67+
Kind = RISCVMCExpr::VK_RISCV_TPREL_ADD;
68+
break;
69+
case RISCVII::MO_TLS_GOT_HI:
70+
Kind = RISCVMCExpr::VK_RISCV_TLS_GOT_HI;
71+
break;
72+
case RISCVII::MO_TLS_GD_HI:
73+
Kind = RISCVMCExpr::VK_RISCV_TLS_GD_HI;
74+
break;
6075
}
6176

6277
const MCExpr *ME =

llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ enum {
5555
MO_PCREL_LO,
5656
MO_PCREL_HI,
5757
MO_GOT_HI,
58+
MO_TPREL_LO,
59+
MO_TPREL_HI,
60+
MO_TPREL_ADD,
61+
MO_TLS_GOT_HI,
62+
MO_TLS_GD_HI,
5863
};
5964
} // namespace RISCVII
6065

llvm/test/CodeGen/RISCV/tls-models.ll

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -relocation-model=pic < %s \
3+
; RUN: | FileCheck -check-prefix=RV32-PIC %s
4+
; RUN: llc -mtriple=riscv64 -relocation-model=pic < %s \
5+
; RUN: | FileCheck -check-prefix=RV64-PIC %s
6+
; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefix=NOPIC %s
7+
; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=NOPIC %s
8+
9+
; Check that TLS symbols are lowered correctly based on the specified
10+
; model.
11+
12+
@unspecified = thread_local global i32 42
13+
@ld = thread_local(localdynamic) global i32 42
14+
@ie = thread_local(initialexec) global i32 42
15+
@le = thread_local(localexec) global i32 42
16+
17+
18+
; No model specified
19+
20+
define i32* @f1() nounwind {
21+
; RV32-PIC-LABEL: f1:
22+
; RV32-PIC: # %bb.0: # %entry
23+
; RV32-PIC-NEXT: addi sp, sp, -16
24+
; RV32-PIC-NEXT: sw ra, 12(sp)
25+
; RV32-PIC-NEXT: .LBB0_1: # %entry
26+
; RV32-PIC-NEXT: # Label of block must be emitted
27+
; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(unspecified)
28+
; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB0_1)
29+
; RV32-PIC-NEXT: call __tls_get_addr@plt
30+
; RV32-PIC-NEXT: lw ra, 12(sp)
31+
; RV32-PIC-NEXT: addi sp, sp, 16
32+
; RV32-PIC-NEXT: ret
33+
;
34+
; RV64-PIC-LABEL: f1:
35+
; RV64-PIC: # %bb.0: # %entry
36+
; RV64-PIC-NEXT: addi sp, sp, -16
37+
; RV64-PIC-NEXT: sd ra, 8(sp)
38+
; RV64-PIC-NEXT: .LBB0_1: # %entry
39+
; RV64-PIC-NEXT: # Label of block must be emitted
40+
; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(unspecified)
41+
; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB0_1)
42+
; RV64-PIC-NEXT: call __tls_get_addr@plt
43+
; RV64-PIC-NEXT: ld ra, 8(sp)
44+
; RV64-PIC-NEXT: addi sp, sp, 16
45+
; RV64-PIC-NEXT: ret
46+
;
47+
; NOPIC-LABEL: f1:
48+
; NOPIC: # %bb.0: # %entry
49+
; NOPIC-NEXT: lui a0, %tprel_hi(unspecified)
50+
; NOPIC-NEXT: add a0, a0, tp, %tprel_add(unspecified)
51+
; NOPIC-NEXT: addi a0, a0, %tprel_lo(unspecified)
52+
; NOPIC-NEXT: ret
53+
entry:
54+
ret i32* @unspecified
55+
}
56+
57+
58+
; localdynamic specified
59+
60+
define i32* @f2() nounwind {
61+
; RV32-PIC-LABEL: f2:
62+
; RV32-PIC: # %bb.0: # %entry
63+
; RV32-PIC-NEXT: addi sp, sp, -16
64+
; RV32-PIC-NEXT: sw ra, 12(sp)
65+
; RV32-PIC-NEXT: .LBB1_1: # %entry
66+
; RV32-PIC-NEXT: # Label of block must be emitted
67+
; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld)
68+
; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB1_1)
69+
; RV32-PIC-NEXT: call __tls_get_addr@plt
70+
; RV32-PIC-NEXT: lw ra, 12(sp)
71+
; RV32-PIC-NEXT: addi sp, sp, 16
72+
; RV32-PIC-NEXT: ret
73+
;
74+
; RV64-PIC-LABEL: f2:
75+
; RV64-PIC: # %bb.0: # %entry
76+
; RV64-PIC-NEXT: addi sp, sp, -16
77+
; RV64-PIC-NEXT: sd ra, 8(sp)
78+
; RV64-PIC-NEXT: .LBB1_1: # %entry
79+
; RV64-PIC-NEXT: # Label of block must be emitted
80+
; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld)
81+
; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.LBB1_1)
82+
; RV64-PIC-NEXT: call __tls_get_addr@plt
83+
; RV64-PIC-NEXT: ld ra, 8(sp)
84+
; RV64-PIC-NEXT: addi sp, sp, 16
85+
; RV64-PIC-NEXT: ret
86+
;
87+
; NOPIC-LABEL: f2:
88+
; NOPIC: # %bb.0: # %entry
89+
; NOPIC-NEXT: lui a0, %tprel_hi(ld)
90+
; NOPIC-NEXT: add a0, a0, tp, %tprel_add(ld)
91+
; NOPIC-NEXT: addi a0, a0, %tprel_lo(ld)
92+
; NOPIC-NEXT: ret
93+
entry:
94+
ret i32* @ld
95+
}
96+
97+
98+
; initialexec specified
99+
100+
define i32* @f3() nounwind {
101+
; RV32-PIC-LABEL: f3:
102+
; RV32-PIC: # %bb.0: # %entry
103+
; RV32-PIC-NEXT: .LBB2_1: # %entry
104+
; RV32-PIC-NEXT: # Label of block must be emitted
105+
; RV32-PIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie)
106+
; RV32-PIC-NEXT: lw a0, %pcrel_lo(.LBB2_1)(a0)
107+
; RV32-PIC-NEXT: add a0, a0, tp
108+
; RV32-PIC-NEXT: ret
109+
;
110+
; RV64-PIC-LABEL: f3:
111+
; RV64-PIC: # %bb.0: # %entry
112+
; RV64-PIC-NEXT: .LBB2_1: # %entry
113+
; RV64-PIC-NEXT: # Label of block must be emitted
114+
; RV64-PIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie)
115+
; RV64-PIC-NEXT: ld a0, %pcrel_lo(.LBB2_1)(a0)
116+
; RV64-PIC-NEXT: add a0, a0, tp
117+
; RV64-PIC-NEXT: ret
118+
;
119+
; NOPIC-LABEL: f3:
120+
; NOPIC: # %bb.0: # %entry
121+
; NOPIC-NEXT: lui a0, %tprel_hi(ie)
122+
; NOPIC-NEXT: add a0, a0, tp, %tprel_add(ie)
123+
; NOPIC-NEXT: addi a0, a0, %tprel_lo(ie)
124+
; NOPIC-NEXT: ret
125+
entry:
126+
ret i32* @ie
127+
}
128+
129+
130+
; localexec specified
131+
132+
define i32* @f4() nounwind {
133+
; RV32-PIC-LABEL: f4:
134+
; RV32-PIC: # %bb.0: # %entry
135+
; RV32-PIC-NEXT: lui a0, %tprel_hi(le)
136+
; RV32-PIC-NEXT: add a0, a0, tp, %tprel_add(le)
137+
; RV32-PIC-NEXT: addi a0, a0, %tprel_lo(le)
138+
; RV32-PIC-NEXT: ret
139+
;
140+
; RV64-PIC-LABEL: f4:
141+
; RV64-PIC: # %bb.0: # %entry
142+
; RV64-PIC-NEXT: lui a0, %tprel_hi(le)
143+
; RV64-PIC-NEXT: add a0, a0, tp, %tprel_add(le)
144+
; RV64-PIC-NEXT: addi a0, a0, %tprel_lo(le)
145+
; RV64-PIC-NEXT: ret
146+
;
147+
; NOPIC-LABEL: f4:
148+
; NOPIC: # %bb.0: # %entry
149+
; NOPIC-NEXT: lui a0, %tprel_hi(le)
150+
; NOPIC-NEXT: add a0, a0, tp, %tprel_add(le)
151+
; NOPIC-NEXT: addi a0, a0, %tprel_lo(le)
152+
; NOPIC-NEXT: ret
153+
entry:
154+
ret i32* @le
155+
}

0 commit comments

Comments
 (0)