Skip to content

Commit cddb0db

Browse files
committed
[LLD][PowerPC] Implement GOT to PC-Rel relaxation
This patch implements the handling for the R_PPC64_PCREL_OPT relocation as well as the GOT relocation for the associated R_PPC64_GOT_PCREL34 relocation. On Power10 targets with PC-Relative addressing, the linker can relax GOT-relative accesses to PC-Relative under some conditions. Since the sequence consists of a prefixed load, followed by a non-prefixed access (load or store), the linker needs to replace the first instruction (as the replacement instruction will be prefixed). The compiler communicates to the linker that this optimization is safe by placing the two aforementioned relocations on the GOT load (of the address). The linker then does two things: - Convert the load from the got into a PC-Relative add to compute the address relative to the PC - Find the instruction referred to by the second relocation (R_PPC64_PCREL_OPT) and replace the first with the PC-Relative version of it It is important to synchronize the mapping from legacy memory instructions to their PC-Relative form. Hence, this patch adds a file to be included by both the compiler and the linker so they're always in agreement. Differential revision: https://reviews.llvm.org/D84360
1 parent 25d759c commit cddb0db

File tree

10 files changed

+744
-1
lines changed

10 files changed

+744
-1
lines changed

lld/ELF/Arch/PPC64.cpp

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,90 @@ enum DFormOpcd {
6262
ADDI = 14
6363
};
6464

65+
enum class PPCLegacyInsn : uint32_t {
66+
NOINSN = 0,
67+
// Loads.
68+
LBZ = 0x88000000,
69+
LHZ = 0xa0000000,
70+
LWZ = 0x80000000,
71+
LHA = 0xa8000000,
72+
LWA = 0xe8000002,
73+
LD = 0xe8000000,
74+
LFS = 0xC0000000,
75+
LXSSP = 0xe4000003,
76+
LFD = 0xc8000000,
77+
LXSD = 0xe4000002,
78+
LXV = 0xf4000001,
79+
LXVP = 0x18000000,
80+
81+
// Stores.
82+
STB = 0x98000000,
83+
STH = 0xb0000000,
84+
STW = 0x90000000,
85+
STD = 0xf8000000,
86+
STFS = 0xd0000000,
87+
STXSSP = 0xf4000003,
88+
STFD = 0xd8000000,
89+
STXSD = 0xf4000002,
90+
STXV = 0xf4000005,
91+
STXVP = 0x18000001
92+
};
93+
enum class PPCPrefixedInsn : uint64_t {
94+
NOINSN = 0,
95+
PREFIX_MLS = 0x0610000000000000,
96+
PREFIX_8LS = 0x0410000000000000,
97+
98+
// Loads.
99+
PLBZ = PREFIX_MLS,
100+
PLHZ = PREFIX_MLS,
101+
PLWZ = PREFIX_MLS,
102+
PLHA = PREFIX_MLS,
103+
PLWA = PREFIX_8LS | 0xa4000000,
104+
PLD = PREFIX_8LS | 0xe4000000,
105+
PLFS = PREFIX_MLS,
106+
PLXSSP = PREFIX_8LS | 0xac000000,
107+
PLFD = PREFIX_MLS,
108+
PLXSD = PREFIX_8LS | 0xa8000000,
109+
PLXV = PREFIX_8LS | 0xc8000000,
110+
PLXVP = PREFIX_8LS | 0xe8000000,
111+
112+
// Stores.
113+
PSTB = PREFIX_MLS,
114+
PSTH = PREFIX_MLS,
115+
PSTW = PREFIX_MLS,
116+
PSTD = PREFIX_8LS | 0xf4000000,
117+
PSTFS = PREFIX_MLS,
118+
PSTXSSP = PREFIX_8LS | 0xbc000000,
119+
PSTFD = PREFIX_MLS,
120+
PSTXSD = PREFIX_8LS | 0xb8000000,
121+
PSTXV = PREFIX_8LS | 0xd8000000,
122+
PSTXVP = PREFIX_8LS | 0xf8000000
123+
};
124+
static bool checkPPCLegacyInsn(uint32_t encoding) {
125+
PPCLegacyInsn insn = static_cast<PPCLegacyInsn>(encoding);
126+
if (insn == PPCLegacyInsn::NOINSN)
127+
return false;
128+
#define PCREL_OPT(Legacy, PCRel, InsnMask) \
129+
if (insn == PPCLegacyInsn::Legacy) \
130+
return true;
131+
#include "PPCInsns.def"
132+
#undef PCREL_OPT
133+
return false;
134+
}
135+
136+
// Masks to apply to legacy instructions when converting them to prefixed,
137+
// pc-relative versions. For the most part, the primary opcode is shared
138+
// between the legacy instruction and the suffix of its prefixed version.
139+
// However, there are some instances where that isn't the case (DS-Form and
140+
// DQ-form instructions).
141+
enum class LegacyToPrefixMask : uint64_t {
142+
NOMASK = 0x0,
143+
OPC_AND_RST = 0xffe00000, // Primary opc (0-5) and R[ST] (6-10).
144+
ONLY_RST = 0x3e00000, // [RS]T (6-10).
145+
ST_STX28_TO5 =
146+
0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5.
147+
};
148+
65149
uint64_t elf::getPPC64TocBase() {
66150
// The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
67151
// TOC starts where the first of these sections starts. We always create a
@@ -333,6 +417,7 @@ static bool isDQFormInstruction(uint32_t encoding) {
333417
switch (getPrimaryOpCode(encoding)) {
334418
default:
335419
return false;
420+
case 6: // Power10 paired loads/stores (lxvp, stxvp).
336421
case 56:
337422
// The only instruction with a primary opcode of 56 is `lq`.
338423
return true;
@@ -344,6 +429,78 @@ static bool isDQFormInstruction(uint32_t encoding) {
344429
}
345430
}
346431

432+
static bool isDSFormInstruction(PPCLegacyInsn insn) {
433+
switch (insn) {
434+
default:
435+
return false;
436+
case PPCLegacyInsn::LWA:
437+
case PPCLegacyInsn::LD:
438+
case PPCLegacyInsn::LXSD:
439+
case PPCLegacyInsn::LXSSP:
440+
case PPCLegacyInsn::STD:
441+
case PPCLegacyInsn::STXSD:
442+
case PPCLegacyInsn::STXSSP:
443+
return true;
444+
}
445+
}
446+
447+
static PPCLegacyInsn getPPCLegacyInsn(uint32_t encoding) {
448+
uint32_t opc = encoding & 0xfc000000;
449+
450+
// If the primary opcode is shared between multiple instructions, we need to
451+
// fix it up to match the actual instruction we are after.
452+
if ((opc == 0xe4000000 || opc == 0xe8000000 || opc == 0xf4000000 ||
453+
opc == 0xf8000000) &&
454+
!isDQFormInstruction(encoding))
455+
opc = encoding & 0xfc000003;
456+
else if (opc == 0xf4000000)
457+
opc = encoding & 0xfc000007;
458+
else if (opc == 0x18000000)
459+
opc = encoding & 0xfc00000f;
460+
461+
// If the value is not one of the enumerators in PPCLegacyInsn, we want to
462+
// return PPCLegacyInsn::NOINSN.
463+
if (!checkPPCLegacyInsn(opc))
464+
return PPCLegacyInsn::NOINSN;
465+
return static_cast<PPCLegacyInsn>(opc);
466+
}
467+
468+
static PPCPrefixedInsn getPCRelativeForm(PPCLegacyInsn insn) {
469+
switch (insn) {
470+
#define PCREL_OPT(Legacy, PCRel, InsnMask) \
471+
case PPCLegacyInsn::Legacy: \
472+
return PPCPrefixedInsn::PCRel
473+
#include "PPCInsns.def"
474+
#undef PCREL_OPT
475+
}
476+
return PPCPrefixedInsn::NOINSN;
477+
}
478+
479+
static LegacyToPrefixMask getInsnMask(PPCLegacyInsn insn) {
480+
switch (insn) {
481+
#define PCREL_OPT(Legacy, PCRel, InsnMask) \
482+
case PPCLegacyInsn::Legacy: \
483+
return LegacyToPrefixMask::InsnMask
484+
#include "PPCInsns.def"
485+
#undef PCREL_OPT
486+
}
487+
return LegacyToPrefixMask::NOMASK;
488+
}
489+
static uint64_t getPCRelativeForm(uint32_t encoding) {
490+
PPCLegacyInsn origInsn = getPPCLegacyInsn(encoding);
491+
PPCPrefixedInsn pcrelInsn = getPCRelativeForm(origInsn);
492+
if (pcrelInsn == PPCPrefixedInsn::NOINSN)
493+
return UINT64_C(-1);
494+
LegacyToPrefixMask origInsnMask = getInsnMask(origInsn);
495+
uint64_t pcrelEncoding =
496+
(uint64_t)pcrelInsn | (encoding & (uint64_t)origInsnMask);
497+
498+
// If the mask requires moving bit 28 to bit 5, do that now.
499+
if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5)
500+
pcrelEncoding |= (encoding & 0x8) << 23;
501+
return pcrelEncoding;
502+
}
503+
347504
static bool isInstructionUpdateForm(uint32_t encoding) {
348505
switch (getPrimaryOpCode(encoding)) {
349506
default:
@@ -368,6 +525,25 @@ static bool isInstructionUpdateForm(uint32_t encoding) {
368525
}
369526
}
370527

528+
// Compute the total displacement between the prefixed instruction that gets
529+
// to the start of the data and the load/store instruction that has the offset
530+
// into the data structure.
531+
// For example:
532+
// paddi 3, 0, 1000, 1
533+
// lwz 3, 20(3)
534+
// Should add up to 1020 for total displacement.
535+
static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) {
536+
int64_t disp34 = llvm::SignExtend64(
537+
((prefixedInsn & 0x3ffff00000000) >> 16) | (prefixedInsn & 0xffff), 34);
538+
int32_t disp16 = llvm::SignExtend32(accessInsn & 0xffff, 16);
539+
// For DS and DQ form instructions, we need to mask out the XO bits.
540+
if (isDQFormInstruction(accessInsn))
541+
disp16 &= ~0xf;
542+
else if (isDSFormInstruction(getPPCLegacyInsn(accessInsn)))
543+
disp16 &= ~0x3;
544+
return disp34 + disp16;
545+
}
546+
371547
// There are a number of places when we either want to read or write an
372548
// instruction when handling a half16 relocation type. On big-endian the buffer
373549
// pointer is pointing into the middle of the word we want to extract, and on
@@ -475,6 +651,49 @@ void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const {
475651
relocateNoSym(loc, R_PPC64_TOC16_LO, val);
476652
break;
477653
}
654+
case R_PPC64_GOT_PCREL34: {
655+
// Clear the first 8 bits of the prefix and the first 6 bits of the
656+
// instruction (the primary opcode).
657+
uint64_t insn = readPrefixedInstruction(loc);
658+
if ((insn & 0xfc000000) != 0xe4000000)
659+
error("expected a 'pld' for got-indirect to pc-relative relaxing");
660+
insn &= ~0xff000000fc000000;
661+
662+
// Replace the cleared bits with the values for PADDI (0x600000038000000);
663+
insn |= 0x600000038000000;
664+
writePrefixedInstruction(loc, insn);
665+
relocate(loc, rel, val);
666+
break;
667+
}
668+
case R_PPC64_PCREL_OPT: {
669+
// We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can
670+
// be relaxed. The eligibility for the relaxation needs to be determined
671+
// on that relocation since this one does not relocate a symbol.
672+
uint64_t insn = readPrefixedInstruction(loc);
673+
uint32_t accessInsn = read32(loc + rel.addend);
674+
uint64_t pcRelInsn = getPCRelativeForm(accessInsn);
675+
676+
// This error is not necessary for correctness but is emitted for now
677+
// to ensure we don't miss these opportunities in real code. It can be
678+
// removed at a later date.
679+
if (pcRelInsn == UINT64_C(-1)) {
680+
errorOrWarn(
681+
"unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" +
682+
Twine::utohexstr(accessInsn));
683+
break;
684+
}
685+
686+
int64_t totalDisp = getTotalDisp(insn, accessInsn);
687+
if (!isInt<34>(totalDisp))
688+
break; // Displacement doesn't fit.
689+
// Convert the PADDI to the prefixed version of accessInsn and convert
690+
// accessInsn to a nop.
691+
writePrefixedInstruction(loc, pcRelInsn |
692+
((totalDisp & 0x3ffff0000) << 16) |
693+
(totalDisp & 0xffff));
694+
write32(loc + rel.addend, 0x60000000); // nop accessInsn.
695+
break;
696+
}
478697
default:
479698
llvm_unreachable("unexpected relocation type");
480699
}
@@ -668,6 +887,7 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s,
668887
case R_PPC64_TOC16_LO:
669888
return R_GOTREL;
670889
case R_PPC64_GOT_PCREL34:
890+
case R_PPC64_PCREL_OPT:
671891
return R_GOT_PC;
672892
case R_PPC64_TOC16_HA:
673893
case R_PPC64_TOC16_LO_DS:
@@ -1024,6 +1244,9 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
10241244
(val & si1Mask));
10251245
break;
10261246
}
1247+
// If we encounter a PCREL_OPT relocation that we won't optimize.
1248+
case R_PPC64_PCREL_OPT:
1249+
break;
10271250
default:
10281251
llvm_unreachable("unknown relocation");
10291252
}
@@ -1080,6 +1303,14 @@ bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
10801303

10811304
RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data,
10821305
RelExpr expr) const {
1306+
if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) &&
1307+
config->pcRelOptimize) {
1308+
// It only makes sense to optimize pld since paddi means that the address
1309+
// of the object in the GOT is required rather than the object itself.
1310+
assert(data && "Expecting an instruction encoding here");
1311+
if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000)
1312+
return R_PPC64_RELAX_GOT_PC;
1313+
}
10831314
if (expr == R_RELAX_TLS_GD_TO_IE)
10841315
return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
10851316
if (expr == R_RELAX_TLS_LD_TO_LE)

lld/ELF/Arch/PPCInsns.def

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef PCREL_OPT
2+
#error "Need to define function-style macro PCREL_OPT"
3+
#endif
4+
PCREL_OPT(NOINSN, NOINSN, NOMASK);
5+
PCREL_OPT(LBZ, PLBZ, OPC_AND_RST);
6+
PCREL_OPT(LHZ, PLHZ, OPC_AND_RST);
7+
PCREL_OPT(LWZ, PLWZ, OPC_AND_RST);
8+
PCREL_OPT(LHA, PLHA, OPC_AND_RST);
9+
PCREL_OPT(LWA, PLWA, ONLY_RST);
10+
PCREL_OPT(LD, PLD , ONLY_RST);
11+
PCREL_OPT(LFS, PLFS, OPC_AND_RST);
12+
PCREL_OPT(LXSSP, PLXSSP, ONLY_RST);
13+
PCREL_OPT(LFD, PLFD, OPC_AND_RST);
14+
PCREL_OPT(LXSD, PLXSD, ONLY_RST);
15+
PCREL_OPT(LXV, PLXV, ST_STX28_TO5);
16+
PCREL_OPT(LXVP, PLXVP, OPC_AND_RST);
17+
18+
PCREL_OPT(STB, PSTB, OPC_AND_RST);
19+
PCREL_OPT(STH, PSTH, OPC_AND_RST);
20+
PCREL_OPT(STW, PSTW, OPC_AND_RST);
21+
PCREL_OPT(STD, PSTD, ONLY_RST);
22+
PCREL_OPT(STFS, PSTFS, OPC_AND_RST);
23+
PCREL_OPT(STXSSP, PSTXSSP, ONLY_RST);
24+
PCREL_OPT(STFD, PSTFD, OPC_AND_RST);
25+
PCREL_OPT(STXSD, PSTXSD, ONLY_RST);
26+
PCREL_OPT(STXV, PSTXV, ST_STX28_TO5);
27+
PCREL_OPT(STXVP, PSTXVP, OPC_AND_RST);

lld/ELF/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ struct Configuration {
206206
bool thinLTOIndexOnly;
207207
bool timeTraceEnabled;
208208
bool tocOptimize;
209+
bool pcRelOptimize;
209210
bool undefinedVersion;
210211
bool unique;
211212
bool useAndroidRelrTags = false;

lld/ELF/Driver.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,9 @@ static void checkOptions() {
309309
if (config->tocOptimize && config->emachine != EM_PPC64)
310310
error("--toc-optimize is only supported on the PowerPC64 target");
311311

312+
if (config->pcRelOptimize && config->emachine != EM_PPC64)
313+
error("--pcrel--optimize is only supported on the PowerPC64 target");
314+
312315
if (config->pie && config->shared)
313316
error("-shared and -pie may not be used together");
314317

@@ -1288,6 +1291,8 @@ static void setConfigs(opt::InputArgList &args) {
12881291

12891292
config->tocOptimize =
12901293
args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, m == EM_PPC64);
1294+
config->pcRelOptimize =
1295+
args.hasFlag(OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64);
12911296
}
12921297

12931298
// Returns a value of "-format" option.

lld/ELF/InputSection.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
807807
case R_PPC64_TOCBASE:
808808
return getPPC64TocBase() + a;
809809
case R_RELAX_GOT_PC:
810+
case R_PPC64_RELAX_GOT_PC:
810811
return sym.getVA(a) - p;
811812
case R_RELAX_TLS_GD_TO_LE:
812813
case R_RELAX_TLS_IE_TO_LE:
@@ -1004,6 +1005,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) {
10041005
void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
10051006
assert(flags & SHF_ALLOC);
10061007
const unsigned bits = config->wordsize * 8;
1008+
uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1);
10071009

10081010
for (const Relocation &rel : relocations) {
10091011
if (rel.expr == R_NONE)
@@ -1025,6 +1027,20 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
10251027
case R_RELAX_GOT_PC_NOPIC:
10261028
target->relaxGot(bufLoc, rel, targetVA);
10271029
break;
1030+
case R_PPC64_RELAX_GOT_PC: {
1031+
// The R_PPC64_PCREL_OPT relocation must appear immediately after
1032+
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
1033+
// We can only relax R_PPC64_PCREL_OPT if we have also relaxed
1034+
// the associated R_PPC64_GOT_PCREL34 since only the latter has an
1035+
// associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34
1036+
// and only relax the other if the saved offset matches.
1037+
if (type == R_PPC64_GOT_PCREL34)
1038+
lastPPCRelaxedRelocOff = offset;
1039+
if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff)
1040+
break;
1041+
target->relaxGot(bufLoc, rel, targetVA);
1042+
break;
1043+
}
10281044
case R_PPC64_RELAX_TOC:
10291045
// rel.sym refers to the STT_SECTION symbol associated to the .toc input
10301046
// section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC

lld/ELF/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,10 @@ defm toc_optimize : B<"toc-optimize",
404404
"(PowerPC64) Enable TOC related optimizations (default)",
405405
"(PowerPC64) Disable TOC related optimizations">;
406406

407+
defm pcrel_optimize : B<"pcrel-optimize",
408+
"(PowerPC64) Enable PC-relative optimizations (default)",
409+
"(PowerPC64) Disable PC-relative optimizations">;
410+
407411
def trace: F<"trace">, HelpText<"Print the names of the input files">;
408412

409413
defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">;

lld/ELF/Relocations.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ static bool needsGot(RelExpr expr) {
376376
static bool isRelExpr(RelExpr expr) {
377377
return oneof<R_PC, R_GOTREL, R_GOTPLTREL, R_MIPS_GOTREL, R_PPC64_CALL,
378378
R_PPC64_RELAX_TOC, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC,
379-
R_RISCV_PC_INDIRECT>(expr);
379+
R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr);
380380
}
381381

382382
// Returns true if a given relocation can be computed at link-time.

0 commit comments

Comments
 (0)