-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ELF,RISCV] Fix oscillation due to call relaxation #142899
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ELF,RISCV] Fix oscillation due to call relaxation #142899
Conversation
Created using spr 1.3.5-bogner
@llvm/pr-subscribers-lld-elf Author: Fangrui Song (MaskRay) ChangesThe new test (derived from riscv32 openssl/test/cmp_msg_test.c) revealed
The issue is not related to alignment. In 2019, GNU ld addressed a This patch stabilizes the process by preventing Fix #113838 Full diff: https://github.com/llvm/llvm-project/pull/142899.diff 2 Files Affected:
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index abe8876668a45..72d83159ad8ac 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -740,19 +740,23 @@ static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
(r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
const int64_t displace = dest - loc;
- if (rvc && isInt<12>(displace) && rd == 0) {
+ // When the caller specifies the old value of `remove`, disallow its
+ // increment.
+ if (remove >= 6 && rvc && isInt<12>(displace) && rd == 0) {
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0xa001); // c.j
remove = 6;
- } else if (rvc && isInt<12>(displace) && rd == X_RA &&
+ } else if (remove >= 6 && rvc && isInt<12>(displace) && rd == X_RA &&
!ctx.arg.is64) { // RV32C only
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0x2001); // c.jal
remove = 6;
- } else if (isInt<21>(displace)) {
+ } else if (remove >= 4 && isInt<21>(displace)) {
sec.relaxAux->relocTypes[i] = R_RISCV_JAL;
sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal
remove = 4;
+ } else {
+ remove = 0;
}
}
@@ -828,7 +832,7 @@ static void relaxHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
}
}
-static bool relax(Ctx &ctx, InputSection &sec) {
+static bool relax(Ctx &ctx, int pass, InputSection &sec) {
const uint64_t secAddr = sec.getVA();
const MutableArrayRef<Relocation> relocs = sec.relocs();
auto &aux = *sec.relaxAux;
@@ -862,8 +866,13 @@ static bool relax(Ctx &ctx, InputSection &sec) {
}
case R_RISCV_CALL:
case R_RISCV_CALL_PLT:
- if (relaxable(relocs, i))
+ // Prevent oscillation between states by disallowing the increment of
+ // `remove` after a few passes. The previous `remove` value is
+ // `cur-delta`.
+ if (relaxable(relocs, i)) {
+ remove = pass < 4 ? 6 : cur - delta;
relaxCall(ctx, sec, i, loc, r, remove);
+ }
break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
@@ -945,7 +954,7 @@ bool RISCV::relaxOnce(int pass) const {
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage))
- changed |= relax(ctx, *sec);
+ changed |= relax(ctx, pass, *sec);
}
return changed;
}
diff --git a/lld/test/ELF/riscv-relax-call-stress.s b/lld/test/ELF/riscv-relax-call-stress.s
new file mode 100644
index 0000000000000..27b557eddc672
--- /dev/null
+++ b/lld/test/ELF/riscv-relax-call-stress.s
@@ -0,0 +1,50 @@
+# REQUIRES: riscv
+## The regression test led to oscillation between two states ("address assignment did not converge" error).
+## First jump (~2^11 bytes away): alternated between 4 and 8 bytes.
+## Second jump (~2^20 bytes away): alternated between 2 and 8 bytes.
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax a.s -o a.o
+# RUN: ld.lld -T lds a.o -o out
+# RUN: llvm-objdump -td --no-show-raw-insn -M no-aliases out | FileCheck %s
+
+# CHECK-LABEL: <_start>:
+# CHECK-NEXT: jal zero, {{.*}} <low>
+# CHECK-EMPTY:
+# CHECK-NEXT: <jump_high>:
+# CHECK-NEXT: 1004: auipc t0, 0x100
+# CHECK-NEXT: jalr zero, -0x2(t0) <high>
+# CHECK-NEXT: ...
+# CHECK: <low>:
+# CHECK: 1802: c.jr ra
+
+# CHECK: <high>:
+# CHECK-NEXT: 101002: jal ra, 0x1004 <jump_high>
+# CHECK-NEXT: auipc ra, 0xfff00
+# CHECK-NEXT: jalr ra, -0x2(ra) <jump_high>
+
+#--- a.s
+## At the beginning of state1, low-_start = 0x800-2, reachable by a c.j.
+## This increases high-jump_high from 0x100000-2 to 0x100000, unreachable by a jal.
+## In the next iteration, low-_start increases to 0x800, unreachable by a c.j.
+.global _start
+_start:
+ jump low, t0 # state0: 4 bytes; state1: 2 bytes
+jump_high:
+ jump high, t0 # state0: 4 bytes; state1: 8 bytes
+
+ .space 0x800-10
+low:
+ ret
+
+.section .high,"ax",@progbits
+ nop
+high:
+ call jump_high
+ call jump_high
+
+#--- lds
+SECTIONS {
+ .text 0x1000 : { *(.text) }
+ .high 0x101000 : { *(.high) }
+}
|
@llvm/pr-subscribers-lld Author: Fangrui Song (MaskRay) ChangesThe new test (derived from riscv32 openssl/test/cmp_msg_test.c) revealed
The issue is not related to alignment. In 2019, GNU ld addressed a This patch stabilizes the process by preventing Fix #113838 Full diff: https://github.com/llvm/llvm-project/pull/142899.diff 2 Files Affected:
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index abe8876668a45..72d83159ad8ac 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -740,19 +740,23 @@ static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
(r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
const int64_t displace = dest - loc;
- if (rvc && isInt<12>(displace) && rd == 0) {
+ // When the caller specifies the old value of `remove`, disallow its
+ // increment.
+ if (remove >= 6 && rvc && isInt<12>(displace) && rd == 0) {
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0xa001); // c.j
remove = 6;
- } else if (rvc && isInt<12>(displace) && rd == X_RA &&
+ } else if (remove >= 6 && rvc && isInt<12>(displace) && rd == X_RA &&
!ctx.arg.is64) { // RV32C only
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0x2001); // c.jal
remove = 6;
- } else if (isInt<21>(displace)) {
+ } else if (remove >= 4 && isInt<21>(displace)) {
sec.relaxAux->relocTypes[i] = R_RISCV_JAL;
sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal
remove = 4;
+ } else {
+ remove = 0;
}
}
@@ -828,7 +832,7 @@ static void relaxHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
}
}
-static bool relax(Ctx &ctx, InputSection &sec) {
+static bool relax(Ctx &ctx, int pass, InputSection &sec) {
const uint64_t secAddr = sec.getVA();
const MutableArrayRef<Relocation> relocs = sec.relocs();
auto &aux = *sec.relaxAux;
@@ -862,8 +866,13 @@ static bool relax(Ctx &ctx, InputSection &sec) {
}
case R_RISCV_CALL:
case R_RISCV_CALL_PLT:
- if (relaxable(relocs, i))
+ // Prevent oscillation between states by disallowing the increment of
+ // `remove` after a few passes. The previous `remove` value is
+ // `cur-delta`.
+ if (relaxable(relocs, i)) {
+ remove = pass < 4 ? 6 : cur - delta;
relaxCall(ctx, sec, i, loc, r, remove);
+ }
break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
@@ -945,7 +954,7 @@ bool RISCV::relaxOnce(int pass) const {
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage))
- changed |= relax(ctx, *sec);
+ changed |= relax(ctx, pass, *sec);
}
return changed;
}
diff --git a/lld/test/ELF/riscv-relax-call-stress.s b/lld/test/ELF/riscv-relax-call-stress.s
new file mode 100644
index 0000000000000..27b557eddc672
--- /dev/null
+++ b/lld/test/ELF/riscv-relax-call-stress.s
@@ -0,0 +1,50 @@
+# REQUIRES: riscv
+## The regression test led to oscillation between two states ("address assignment did not converge" error).
+## First jump (~2^11 bytes away): alternated between 4 and 8 bytes.
+## Second jump (~2^20 bytes away): alternated between 2 and 8 bytes.
+
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax a.s -o a.o
+# RUN: ld.lld -T lds a.o -o out
+# RUN: llvm-objdump -td --no-show-raw-insn -M no-aliases out | FileCheck %s
+
+# CHECK-LABEL: <_start>:
+# CHECK-NEXT: jal zero, {{.*}} <low>
+# CHECK-EMPTY:
+# CHECK-NEXT: <jump_high>:
+# CHECK-NEXT: 1004: auipc t0, 0x100
+# CHECK-NEXT: jalr zero, -0x2(t0) <high>
+# CHECK-NEXT: ...
+# CHECK: <low>:
+# CHECK: 1802: c.jr ra
+
+# CHECK: <high>:
+# CHECK-NEXT: 101002: jal ra, 0x1004 <jump_high>
+# CHECK-NEXT: auipc ra, 0xfff00
+# CHECK-NEXT: jalr ra, -0x2(ra) <jump_high>
+
+#--- a.s
+## At the beginning of state1, low-_start = 0x800-2, reachable by a c.j.
+## This increases high-jump_high from 0x100000-2 to 0x100000, unreachable by a jal.
+## In the next iteration, low-_start increases to 0x800, unreachable by a c.j.
+.global _start
+_start:
+ jump low, t0 # state0: 4 bytes; state1: 2 bytes
+jump_high:
+ jump high, t0 # state0: 4 bytes; state1: 8 bytes
+
+ .space 0x800-10
+low:
+ ret
+
+.section .high,"ax",@progbits
+ nop
+high:
+ call jump_high
+ call jump_high
+
+#--- lds
+SECTIONS {
+ .text 0x1000 : { *(.text) }
+ .high 0x101000 : { *(.high) }
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I checked with a few different builds in Android that had the relaxation issue and this change fixed them. Thanks for fixing it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The new test (derived from riscv32 openssl/test/cmp_msg_test.c) revealed oscillation in two R_RISCV_CALL_PLT jumps: - First jump (~2^11 bytes away): alternated between 4 and 8 bytes. - Second jump (~2^20 bytes away): alternated between 2 and 8 bytes. The issue is not related to alignment. In 2019, GNU ld addressed a similar problem by reducing the relaxation allowance for cross-section relaxation (https://sourceware.org/bugzilla/show_bug.cgi?id=25181). This approach would result in a suboptimal layout for the tight range tested by riscv-relax-call.s. This patch stabilizes the process by preventing `remove` increment after a few passes, similar to integrated assembler's fragment relaxation. (For the Android bit reproduce, `pass < 2` leads to non-optimal layout while `pass < 3` and `pass < 4` output is identical.) Fix llvm/llvm-project#113838 Possibly fix llvm/llvm-project#123248 (inputs are bitcode, subject to ever-changing code generation, not reproducible) Pull Request: llvm/llvm-project#142899
The new test (derived from riscv32 openssl/test/cmp_msg_test.c) revealed oscillation in two R_RISCV_CALL_PLT jumps: - First jump (~2^11 bytes away): alternated between 4 and 8 bytes. - Second jump (~2^20 bytes away): alternated between 2 and 8 bytes. The issue is not related to alignment. In 2019, GNU ld addressed a similar problem by reducing the relaxation allowance for cross-section relaxation (https://sourceware.org/bugzilla/show_bug.cgi?id=25181). This approach would result in a suboptimal layout for the tight range tested by riscv-relax-call.s. This patch stabilizes the process by preventing `remove` increment after a few passes, similar to integrated assembler's fragment relaxation. (For the Android bit reproduce, `pass < 2` leads to non-optimal layout while `pass < 3` and `pass < 4` output is identical.) Fix llvm#113838 Possibly fix llvm#123248 (inputs are bitcode, subject to ever-changing code generation, not reproducible) Pull Request: llvm#142899
The new test (derived from riscv32 openssl/test/cmp_msg_test.c) revealed oscillation in two R_RISCV_CALL_PLT jumps: - First jump (~2^11 bytes away): alternated between 4 and 8 bytes. - Second jump (~2^20 bytes away): alternated between 2 and 8 bytes. The issue is not related to alignment. In 2019, GNU ld addressed a similar problem by reducing the relaxation allowance for cross-section relaxation (https://sourceware.org/bugzilla/show_bug.cgi?id=25181). This approach would result in a suboptimal layout for the tight range tested by riscv-relax-call.s. This patch stabilizes the process by preventing `remove` increment after a few passes, similar to integrated assembler's fragment relaxation. (For the Android bit reproduce, `pass < 2` leads to non-optimal layout while `pass < 3` and `pass < 4` output is identical.) Fix llvm#113838 Possibly fix llvm#123248 (inputs are bitcode, subject to ever-changing code generation, not reproducible) Pull Request: llvm#142899
Hi Fangrui, the |
The new test (derived from riscv32 openssl/test/cmp_msg_test.c) revealed
oscillation in two R_RISCV_CALL_PLT jumps:
The issue is not related to alignment. In 2019, GNU ld addressed a
similar problem by reducing the relaxation allowance for cross-section
relaxation (https://sourceware.org/bugzilla/show_bug.cgi?id=25181).
This approach would result in a suboptimal layout for the tight range
tested by riscv-relax-call.s.
This patch stabilizes the process by preventing
remove
increment aftera few passes, similar to integrated assembler's fragment relaxation.
(For the Android bit reproduce,
pass < 2
leads to non-optimal layoutwhile
pass < 3
andpass < 4
output is identical.)Fix #113838
Possibly fix #123248 (inputs
are bitcode, subject to ever-changing code generation, not reproducible)