-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLD][RISCV] Add relaxation for absolute int12 Hi20Lo12 #86124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
If we have an absolute address whose high bits are known to be a sign extend of the low 12 bits, we can avoid emitting the LUI entirely. This is implemented in an analogous manner to the gp relative relocations - defining an internal usage relocation type. Since 12 bits (really 11 since the high bit must be zero in user code) is less than one page, all of these offsets fit in the null page. As such, the only application of these is likely to be undefined weak symbols except for embedded use cases. I'm mostly posting this for completeness sake.
@llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-elf Author: Philip Reames (preames) ChangesIf we have an absolute address whose high bits are known to be a sign extend of the low 12 bits, we can avoid emitting the LUI entirely. This is implemented in an analogous manner to the gp relative relocations - defining an internal usage relocation type. Since 12 bits (really 11 since the high bit must be zero in user code) is less than one page, all of these offsets fit in the null page. As such, the only application of these is likely to be undefined weak symbols except for embedded use cases. I'm mostly posting this for completeness sake. Full diff: https://github.com/llvm/llvm-project/pull/86124.diff 2 Files Affected:
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 20de1b9b7bde96..d59a2e2d3d2c03 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -50,10 +50,12 @@ class RISCV final : public TargetInfo {
} // end anonymous namespace
-// These are internal relocation numbers for GP relaxation. They aren't part
+// These are internal relocation numbers for GP/X0 relaxation. They aren't part
// of the psABI spec.
#define INTERNAL_R_RISCV_GPREL_I 256
#define INTERNAL_R_RISCV_GPREL_S 257
+#define INTERNAL_R_RISCV_X0REL_I 258
+#define INTERNAL_R_RISCV_X0REL_S 259
const uint64_t dtpOffset = 0x800;
@@ -70,6 +72,7 @@ enum Op {
};
enum Reg {
+ X_X0 = 0,
X_RA = 1,
X_GP = 3,
X_TP = 4,
@@ -464,6 +467,20 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
return;
}
+ case INTERNAL_R_RISCV_X0REL_I:
+ case INTERNAL_R_RISCV_X0REL_S: {
+ assert(isInt<12>(val));
+ uint64_t hi = (val + 0x800) >> 12;
+ uint64_t lo = val - (hi << 12);
+ uint32_t insn = (read32le(loc) & ~(31 << 15)) | (X_X0 << 15);
+ if (rel.type == INTERNAL_R_RISCV_X0REL_I)
+ insn = setLO12_I(insn, lo);
+ else
+ insn = setLO12_S(insn, lo);
+ write32le(loc, insn);
+ return;
+ }
+
case INTERNAL_R_RISCV_GPREL_I:
case INTERNAL_R_RISCV_GPREL_S: {
Defined *gp = ElfSym::riscvGlobalPointer;
@@ -789,6 +806,25 @@ static void relaxTlsLe(const InputSection &sec, size_t i, uint64_t loc,
static void relaxHi20Lo12(const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
+
+ // Fold into use of x0+offset
+ if (isInt<12>(r.sym->getVA(r.addend))) {
+ switch (r.type) {
+ case R_RISCV_HI20:
+ // Remove lui rd, %hi20(x).
+ sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
+ remove = 4;
+ break;
+ case R_RISCV_LO12_I:
+ sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_X0REL_I;
+ break;
+ case R_RISCV_LO12_S:
+ sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_X0REL_S;
+ break;
+ }
+ return;
+ }
+
const Defined *gp = ElfSym::riscvGlobalPointer;
if (!gp)
return;
@@ -989,6 +1025,8 @@ void RISCV::finalizeRelax(int passes) const {
switch (newType) {
case INTERNAL_R_RISCV_GPREL_I:
case INTERNAL_R_RISCV_GPREL_S:
+ case INTERNAL_R_RISCV_X0REL_I:
+ case INTERNAL_R_RISCV_X0REL_S:
break;
case R_RISCV_RELAX:
// Used by relaxTlsLe to indicate the relocation is ignored.
diff --git a/lld/test/ELF/riscv-relax-hi20-lo12.s b/lld/test/ELF/riscv-relax-hi20-lo12.s
index 6bb29b12348f80..aa10e957cc60ed 100644
--- a/lld/test/ELF/riscv-relax-hi20-lo12.s
+++ b/lld/test/ELF/riscv-relax-hi20-lo12.s
@@ -4,12 +4,12 @@
# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax a.s -o rv32.o
# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax a.s -o rv64.o
-# RUN: ld.lld --relax-gp --undefined=__global_pointer$ rv32.o lds -o rv32
-# RUN: ld.lld --relax-gp --undefined=__global_pointer$ rv64.o lds -o rv64
+# RUN: ld.lld --relax-gp --undefined=__global_pointer$ --defsym baz=420 rv32.o lds -o rv32
+# RUN: ld.lld --relax-gp --undefined=__global_pointer$ --defsym baz=420 rv64.o lds -o rv64
# RUN: llvm-objdump -td -M no-aliases --no-show-raw-insn rv32 | FileCheck %s
# RUN: llvm-objdump -td -M no-aliases --no-show-raw-insn rv64 | FileCheck %s
-# CHECK: 00000028 l .text {{0*}}0 a
+# CHECK: 00000040 l .text {{0*}}0 a
# CHECK-NOT: lui
# CHECK: addi a0, gp, -0x800
@@ -23,6 +23,14 @@
# CHECK-NEXT: addi a0, a0, 0x0
# CHECK-NEXT: lw a0, 0x0(a0)
# CHECK-NEXT: sw a0, 0x0(a0)
+# CHECK-NOT: lui
+# CHECK: addi a0, zero, 0x0
+# CHECK-NEXT: lw a0, 0x0(zero)
+# CHECK-NEXT: sw a0, 0x0(zero)
+# CHECK-NOT: lui
+# CHECK: addi a0, zero, 0x1a4
+# CHECK-NEXT: lw a0, 0x1a4(zero)
+# CHECK-NEXT: sw a0, 0x1a4(zero)
# CHECK-EMPTY:
# CHECK-NEXT: <a>:
# CHECK-NEXT: addi a0, a0, 0x1
@@ -42,6 +50,14 @@ _start:
addi a0, a0, %lo(norelax)
lw a0, %lo(norelax)(a0)
sw a0, %lo(norelax)(a0)
+ lui a0, %hi(undefined_weak)
+ addi a0, a0, %lo(undefined_weak)
+ lw a0, %lo(undefined_weak)(a0)
+ sw a0, %lo(undefined_weak)(a0)
+ lui a0, %hi(baz)
+ addi a0, a0, %lo(baz)
+ lw a0, %lo(baz)(a0)
+ sw a0, %lo(baz)(a0)
a:
addi a0, a0, 1
@@ -53,6 +69,7 @@ bar:
.byte 0
norelax:
.word 0
+.weak undefined_weak
#--- lds
SECTIONS {
|
assert(isInt<12>(val)); | ||
uint64_t hi = (val + 0x800) >> 12; | ||
uint64_t lo = val - (hi << 12); | ||
uint32_t insn = (read32le(loc) & ~(31 << 15)) | (X_X0 << 15); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To call out one possible concern with this patch. If there is an IType or SType instruction whose encoding reserved rs1=x0 (or otherwise gave it distinct behavior) this transform would not be legal. I did a search through the ISA manual and didn't find anything, but that doesn't mean such a special case definitely doesn't exist. In principal such a special case is also possible for GP, but there's a lot more precedent for X0 being special in the encoding (though usually as the value for RD).
We're adding 1 to the high bit of the low12 chunk, letting that carry through the high bits, and then keeping only the high bits. In this case, the high bits are always zero. Spelling this out since I found it confusing at first... Given an int12, two possible cases: val hi20 low12 b31-b12 b11 b10..b0 111..11 1 (dont-care) 0 trunc(val) 000..00 0 (dont-care) 0 trunc(val)
@MaskRay This change has been sitting out for nearly a year without response. I know it needs rebased by now, but don't want to waste time if this isn't going to get reviewed. Should I rebase, or just close it? |
this needs to be rebased but otherwise LGTM |
Updated, can you confirm the LGTM? |
@topperc Last comment was meant for you. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LGTM. Apologies that I didn't notice this earlier... Thanks for testing the undefined-weak symbol scenario as well. |
If we have an absolute address whose high bits are known to be a sign extend of the low 12 bits, we can avoid emitting the LUI entirely. This is implemented in an analogous manner to the gp relative relocations - defining an internal usage relocation type.
Since 12 bits (really 11 since the high bit must be zero in user code) is less than one page, all of these offsets fit in the null page. As such, the only application of these is likely to be undefined weak symbols except for embedded use cases. I'm mostly posting this for completeness sake.