[X86] Don't convert local function foo in the same section to foo(%rip) when the offset is near INT32_MIN #98438

MaskRay · 2024-07-11T06:39:44Z

define internal void @foo() {
  ret void
}
define i64 @main() {
  ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2147483626)
}

When foo is a local symbol, foo and main are in the same section,
and offset is near INT32_MIN, referencing foo+offset in main with
RIP-relative addressing needs leaq .text+offset1(%rip), %rax where
offset1 < offset, and offset1 might underflow.
(https://discourse.llvm.org/t/arithmetic-referencing-dso-local-function-causes-compilation-error-on-linux-x64/80033):

Don't use RIP-relative addressing if the negative offset is near
INT32_MIN. Arbitrarily reuse the magic number in isOffsetSuitableForCodeModel to
guard against the edge case when address(current_instruction)-foo < 4GiB-16MiB.
If the difference is larger than 4GiB-16MiB, ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2**32+256MiB) would still cause the assembly
issue, such cases are unrealistic.

Created using spr 1.3.5-bogner

llvmbot · 2024-07-11T06:40:18Z

@llvm/pr-subscribers-backend-x86

Author: Fangrui Song (MaskRay)

Changes

When Sym is a local function symbol, leaq Sym+Offset(%rip), %rax in
non-large code models generates a relocation that references the section
symbol. An offset that is too closer to INT32_MIN could cause an
underflow addend (https://discourse.llvm.org/t/arithmetic-referencing-dso-local-function-causes-compilation-error-on-linux-x64/80033):

leaq Sym-0x80000000(%rip), %rax
=&gt;
leaq .text-0x80000001(%rip), %rax  # if Sym=.text+1

Don't fold such offsets. For simplicity, we don't check whether Sym is a
function symbol.

Full diff: https://github.com/llvm/llvm-project/pull/98438.diff

2 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+7)
(modified) llvm/test/CodeGen/X86/fold-add.ll (+96-2)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index c91bd576dc9f6..e06de1c4276a3 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1795,6 +1795,13 @@ bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
         !X86::isOffsetSuitableForCodeModel(Val, M,
                                            AM.hasSymbolicDisplacement()))
       return true;
+    // When Sym is a local symbol in ELF, leaq Sym+Offset(%rip) in non-large
+    // code models generates a relocation referencing the section symbol. Don't
+    // fold offsets that are too closer to INT32_MIN, as the addend could
+    // underflow.
+    if (Val < INT32_MIN + 256 * 1024 * 1024 && M != CodeModel::Large &&
+        AM.hasSymbolicDisplacement())
+      return true;
     // In addition to the checks required for a register base, check that
     // we do not try to use an unsafe Disp with a frame index.
     if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
diff --git a/llvm/test/CodeGen/X86/fold-add.ll b/llvm/test/CodeGen/X86/fold-add.ll
index 8c28d66597fb3..f7659c9cf0653 100644
--- a/llvm/test/CodeGen/X86/fold-add.ll
+++ b/llvm/test/CodeGen/X86/fold-add.ll
@@ -118,17 +118,46 @@ entry:
   ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -1)
 }
 
+define dso_local i64 @neg_0x70000000() #0 {
+; STATIC-LABEL: neg_0x70000000:
+; STATIC:       # %bb.0: # %entry
+; STATIC-NEXT:    leaq foo-1879048192(%rip), %rax
+; STATIC-NEXT:    retq
+;
+; PIC-LABEL: neg_0x70000000:
+; PIC:       # %bb.0: # %entry
+; PIC-NEXT:    leaq foo-1879048192(%rip), %rax
+; PIC-NEXT:    retq
+;
+; MSTATIC-LABEL: neg_0x70000000:
+; MSTATIC:       # %bb.0: # %entry
+; MSTATIC-NEXT:    movabsq $foo, %rax
+; MSTATIC-NEXT:    addq $-1879048192, %rax # imm = 0x90000000
+; MSTATIC-NEXT:    retq
+;
+; MPIC-LABEL: neg_0x70000000:
+; MPIC:       # %bb.0: # %entry
+; MPIC-NEXT:    leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
+; MPIC-NEXT:    movabsq $foo@GOTOFF, %rcx
+; MPIC-NEXT:    leaq -1879048192(%rax,%rcx), %rax
+; MPIC-NEXT:    retq
+entry:
+  ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -1879048192)
+}
+
 ;; Test we don't emit movl foo-2147483648, %eax. ELF R_X86_64_32 does not allow
 ;; a negative value.
 define dso_local i64 @neg_0x80000000() #0 {
 ; STATIC-LABEL: neg_0x80000000:
 ; STATIC:       # %bb.0: # %entry
-; STATIC-NEXT:    leaq foo-2147483648(%rip), %rax
+; STATIC-NEXT:    movq $-2147483648, %rax # imm = 0x80000000
+; STATIC-NEXT:    leaq foo(%rax), %rax
 ; STATIC-NEXT:    retq
 ;
 ; PIC-LABEL: neg_0x80000000:
 ; PIC:       # %bb.0: # %entry
-; PIC-NEXT:    leaq foo-2147483648(%rip), %rax
+; PIC-NEXT:    leaq foo(%rip), %rax
+; PIC-NEXT:    addq $-2147483648, %rax # imm = 0x80000000
 ; PIC-NEXT:    retq
 ;
 ; MSTATIC-LABEL: neg_0x80000000:
@@ -180,4 +209,69 @@ entry:
   ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2147483649)
 }
 
+define internal void @bar() #0 {
+; STATIC-LABEL: bar:
+; STATIC:       # %bb.0:
+; STATIC-NEXT:    retq
+;
+; PIC-LABEL: bar:
+; PIC:       # %bb.0:
+; PIC-NEXT:    retq
+;
+; MSTATIC-LABEL: bar:
+; MSTATIC:       # %bb.0:
+; MSTATIC-NEXT:    retq
+;
+; MPIC-LABEL: bar:
+; MPIC:       # %bb.0:
+; MPIC-NEXT:    retq
+  ret void
+}
+
+define dso_local i64 @fun_neg_0x6fffffff() #0 {
+; STATIC-LABEL: fun_neg_0x6fffffff:
+; STATIC:       # %bb.0:
+; STATIC-NEXT:    leaq bar-1879048191(%rip), %rax
+; STATIC-NEXT:    retq
+;
+; PIC-LABEL: fun_neg_0x6fffffff:
+; PIC:       # %bb.0:
+; PIC-NEXT:    leaq bar-1879048191(%rip), %rax
+; PIC-NEXT:    retq
+;
+; MSTATIC-LABEL: fun_neg_0x6fffffff:
+; MSTATIC:       # %bb.0:
+; MSTATIC-NEXT:    leaq bar-1879048191(%rip), %rax
+; MSTATIC-NEXT:    retq
+;
+; MPIC-LABEL: fun_neg_0x6fffffff:
+; MPIC:       # %bb.0:
+; MPIC-NEXT:    leaq bar-1879048191(%rip), %rax
+; MPIC-NEXT:    retq
+  ret i64 add (i64 ptrtoint (ptr @bar to i64), i64 -1879048191)
+}
+
+define dso_local i64 @fun_neg_0x70000000() #0 {
+; STATIC-LABEL: fun_neg_0x70000000:
+; STATIC:       # %bb.0:
+; STATIC-NEXT:    leaq bar-1879048192(%rip), %rax
+; STATIC-NEXT:    retq
+;
+; PIC-LABEL: fun_neg_0x70000000:
+; PIC:       # %bb.0:
+; PIC-NEXT:    leaq bar-1879048192(%rip), %rax
+; PIC-NEXT:    retq
+;
+; MSTATIC-LABEL: fun_neg_0x70000000:
+; MSTATIC:       # %bb.0:
+; MSTATIC-NEXT:    leaq bar-1879048192(%rip), %rax
+; MSTATIC-NEXT:    retq
+;
+; MPIC-LABEL: fun_neg_0x70000000:
+; MPIC:       # %bb.0:
+; MPIC-NEXT:    leaq bar-1879048192(%rip), %rax
+; MPIC-NEXT:    retq
+  ret i64 add (i64 ptrtoint (ptr @bar to i64), i64 -1879048192)
+}
+
 attributes #0 = { nounwind }

RKSimon · 2024-07-11T12:20:42Z

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+    // code models generates a relocation referencing the section symbol. Don't
+    // fold offsets that are too closer to INT32_MIN, as the addend could
+    // underflow.
+    if (Val < INT32_MIN + 256 * 1024 * 1024 && M != CodeModel::Large &&


What led you to the 256 * 1024 * 1024 magic number?

jyknight · 2024-07-11T15:49:45Z

I don't think we should try to fix this. There's no way to know at compile time how far the referenced symbol will be from RIP, and thus how big an offset would be too big.

Created using spr 1.3.5-bogner

MaskRay · 2024-07-11T20:42:33Z

I don't think we should try to fix this. There's no way to know at compile time how far the referenced symbol will be from RIP, and thus how big an offset would be too big.

The previous analysis was incorrect. Sorry, wrote the patch very late last night.

The latest update keeps the special case for very limited cases.

@RKSimon What led you to the 256 * 1024 * 1024 magic number?

The description has been updated to state that this is arbitrary (like the 1610241024 in isOffsetSuitableForCodeModel).
When foo and main are in the same section, we support main-foo up to 256MiB.

If main-foo is larger than 256MiB, and we have ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2**32+256MiB), we would still run into the assembly issue, but hopefully such cases are very unrealistic.

LTO builds default to -ffunction-sections, and a section in a relocatable file can unlikely be larger than 256MiB.

jyknight · 2024-07-11T21:24:09Z

But that's only one of the kinds of errors you can get from such code.

We'll also emit a PC32 relocation -- with the user-specified addend included -- for symbols that are NOT in the same section. It won't be an assembler error, then, but will turn into a linker error. A binary can easily be larger than 256MB, in which case this breaks again.

I really don't think magic numbers are the right answer here. We either need to formalize this sort of code as invalid, or else modify codegen to not use a 32-bit-limited fixup/relocation for code like this.

MaskRay · 2024-07-12T01:52:39Z

But that's only one of the kinds of errors you can get from such code.

The other kinds of errors are prevented by other code, e.g. isOffsetSuitableForCodeModel has return Offset < 16 * 1024 * 1024.

We'll also emit a PC32 relocation -- with the user-specified addend included -- for symbols that are NOT in the same section. It won't be an assembler error, then, but will turn into a linker error. A binary can easily be larger than 256MB, in which case this breaks again.

define internal void @foo() {
  ret void
}
define i64 @main() {
  ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -1879048192)
}
define i64 @bar() {
  ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 1879048192)
}

When foo and main are in different sections, MC will emit a relocation.
There won't be a linker error, even if --emit-relocs is used.
(When ld --emit-relocs converts .text.foo - 0x70000004 to .text + addend, the addend will be larger than -0x70000004.)

% readelf -Wr r.o
Relocation section '.rela.text.main' at offset 0xd0 contains 1 entry:
    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
0000000000000003  0000000200000002 R_X86_64_PC32          0000000000000000 .text.foo - 70000004

Even when foo and bar are in the same section, we will not fold the large displacement (0x70000000) into leaq
due to return Offset < 16 * 1024 * 1024 in isOffsetSuitableForCodeModel.

        movl    $1879048192, %eax               # imm = 0x70000000
        leaq    foo(%rax), %rax
        retq

I really don't think magic numbers are the right answer here. We either need to formalize this sort of code as invalid, or else modify codegen to not use a 32-bit-limited fixup/relocation for code like this.

The magic number is about the input section section size, instead of the output section size.
If the leaq optimization of a function symbol with a negative offset like -0x70000000 isn't that useful, I could also drop it completely.

MaskRay · 2024-07-29T23:13:49Z

The other kinds of errors are prevented by other code, e.g. isOffsetSuitableForCodeModel has return Offset < 16 * 1024 * 1024.

@jyknight Does the magic offset used in a similar place make you accept this approach more? :)

I am happy to change that INT_MIN+256MiB to 16MiB to have consistent magic numbers.

MaskRay · 2024-08-06T18:59:37Z

Friendly ping :)

MaskRay · 2024-09-17T17:13:45Z

The other kinds of errors are prevented by other code, e.g. isOffsetSuitableForCodeModel has return Offset < 16 * 1024 * 1024.

@jyknight Does the magic offset used in a similar place make you accept this approach more? :)

I am happy to change that INT_MIN+256MiB to 16MiB to have consistent magic numbers.

Friendly ping :)

RKSimon · 2024-09-17T21:11:45Z

Let's at least use the same offset as used in isOffsetSuitableForCodeModel - ideally add comments to both locations saying that they are matching estimates (you can pull out the magic number into one location if you want but its not necessary).

Created using spr 1.3.5-bogner

MaskRay · 2024-09-17T21:37:52Z

Let's at least use the same offset as used in isOffsetSuitableForCodeModel - ideally add comments to both locations saying that they are matching estimates (you can pull out the magic number into one location if you want but its not necessary).

Done. I think only if (isa_and_nonnull<Function>(AM.GV) && AM.Disp < -16 * 1024 * 1024) needs a comment where the magic number comes from. Its choice is more flexible and we just arbitrarily pick an extremely conservative threshold.

The 16MiB magic number in isOffsetSuitableForCodeModel could be arbitrarily increased without changing the code modified by this patch.

MaskRay · 2024-09-25T04:02:37Z

Ping

RKSimon

LGTM as a sticking plaster, but it'd be much better if we can come up with better solutions for both compile and link time cases.

@foo

…p) when the offset is near INT32_MIN ``` define internal void @foo() { ret void } define i64 @main() { ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2147483626) } ``` When `foo` is a local symbol, `foo` and `main` are in the same section, and `offset` is near INT32_MIN, referencing `foo+offset` in `main` with RIP-relative addressing needs `leaq .text+offset1(%rip), %rax` where `offset1 < offset`, and `offset1` might underflow. (https://discourse.llvm.org/t/arithmetic-referencing-dso-local-function-causes-compilation-error-on-linux-x64/80033): Don't use RIP-relative addressing if the negative offset is near INT32_MIN. Arbitrarily reuse the magic number in isOffsetSuitableForCodeModel to guard against the edge case when `address(current_instruction)-foo < 4GiB-16MiB`. If the difference is larger than 4GiB-16MiB, `ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2**32+256MiB)` would still cause the assembly issue, such cases are unrealistic. Pull Request: llvm/llvm-project#98438

@foo

…p) when the offset is near INT32_MIN ``` define internal void @foo() { ret void } define i64 @main() { ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2147483626) } ``` When `foo` is a local symbol, `foo` and `main` are in the same section, and `offset` is near INT32_MIN, referencing `foo+offset` in `main` with RIP-relative addressing needs `leaq .text+offset1(%rip), %rax` where `offset1 < offset`, and `offset1` might underflow. (https://discourse.llvm.org/t/arithmetic-referencing-dso-local-function-causes-compilation-error-on-linux-x64/80033): Don't use RIP-relative addressing if the negative offset is near INT32_MIN. Arbitrarily reuse the magic number in isOffsetSuitableForCodeModel to guard against the edge case when `address(current_instruction)-foo < 4GiB-16MiB`. If the difference is larger than 4GiB-16MiB, `ret i64 add (i64 ptrtoint (ptr @foo to i64), i64 -2**32+256MiB)` would still cause the assembly issue, such cases are unrealistic. Pull Request: llvm#98438

[𝘀𝗽𝗿] initial version

698da3b

Created using spr 1.3.5-bogner

llvmbot added the backend:X86 label Jul 11, 2024

MaskRay requested review from RKSimon, phoebewang and topperc July 11, 2024 06:40

RKSimon reviewed Jul 11, 2024

View reviewed changes

fix impl and fix description

7d44674

Created using spr 1.3.5-bogner

MaskRay changed the title ~~[X86] Don't fold offsets that are too closer to INT32_MIN in non-large code models~~ [X86] Don't convert foo to foo(%rip) when the offset is near INT32_MIN Jul 11, 2024

.

545ad01

Created using spr 1.3.5-bogner

MaskRay changed the title ~~[X86] Don't convert foo to foo(%rip) when the offset is near INT32_MIN~~ [X86] Don't convert local function foo in the same section to foo(%rip) when the offset is near INT32_MIN Jul 11, 2024

be much more conservative

835b141

Created using spr 1.3.5-bogner

RKSimon approved these changes Sep 30, 2024

View reviewed changes

MaskRay merged commit 570871e into main Oct 1, 2024
5 of 8 checks passed

MaskRay deleted the users/MaskRay/spr/x86-dont-fold-offsets-that-are-too-closer-to-int32_min-in-non-large-code-models branch October 1, 2024 05:46

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] Don't convert local function foo in the same section to foo(%rip) when the offset is near INT32_MIN #98438

[X86] Don't convert local function foo in the same section to foo(%rip) when the offset is near INT32_MIN #98438

Uh oh!

MaskRay commented Jul 11, 2024 •

edited

Loading

Uh oh!

llvmbot commented Jul 11, 2024

Uh oh!

RKSimon Jul 11, 2024

Uh oh!

jyknight commented Jul 11, 2024

Uh oh!

MaskRay commented Jul 11, 2024 •

edited

Loading

Uh oh!

jyknight commented Jul 11, 2024

Uh oh!

MaskRay commented Jul 12, 2024 •

edited

Loading

Uh oh!

MaskRay commented Jul 29, 2024 •

edited

Loading

Uh oh!

MaskRay commented Aug 6, 2024

Uh oh!

MaskRay commented Sep 17, 2024

Uh oh!

RKSimon commented Sep 17, 2024

Uh oh!

MaskRay commented Sep 17, 2024

Uh oh!

MaskRay commented Sep 25, 2024

Uh oh!

RKSimon left a comment

Uh oh!

Uh oh!

Uh oh!

[X86] Don't convert local function foo in the same section to foo(%rip) when the offset is near INT32_MIN #98438

[X86] Don't convert local function foo in the same section to foo(%rip) when the offset is near INT32_MIN #98438

Uh oh!

Conversation

MaskRay commented Jul 11, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 11, 2024

Uh oh!

RKSimon Jul 11, 2024

Choose a reason for hiding this comment

Uh oh!

jyknight commented Jul 11, 2024

Uh oh!

MaskRay commented Jul 11, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jyknight commented Jul 11, 2024

Uh oh!

MaskRay commented Jul 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

MaskRay commented Jul 29, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

MaskRay commented Aug 6, 2024

Uh oh!

MaskRay commented Sep 17, 2024

Uh oh!

RKSimon commented Sep 17, 2024

Uh oh!

MaskRay commented Sep 17, 2024

Uh oh!

MaskRay commented Sep 25, 2024

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

MaskRay commented Jul 11, 2024 •

edited

Loading

MaskRay commented Jul 11, 2024 •

edited

Loading

MaskRay commented Jul 12, 2024 •

edited

Loading

MaskRay commented Jul 29, 2024 •

edited

Loading