-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Allow accessing large globals in small code model #74785
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This removes some assumptions that the small code model will only reference "near" globals. There are still some missing optimizations and wrong code sequences, but I'd like to address those separately. This will require auditing any checks of the code model in the X86 backend.
@llvm/pr-subscribers-backend-x86 Author: Arthur Eubanks (aeubanks) ChangesThis removes some assumptions that the small code model will only There are still some missing optimizations and wrong code sequences, but Full diff: https://github.com/llvm/llvm-project/pull/74785.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a69ce40982ad82..d6ae4971d238d8 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8717,11 +8717,6 @@ bool X86InstrInfo::isSafeToMoveRegClassDefs(
/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
///
unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
- assert((!Subtarget.is64Bit() ||
- MF->getTarget().getCodeModel() == CodeModel::Medium ||
- MF->getTarget().getCodeModel() == CodeModel::Large) &&
- "X86-64 PIC uses RIP relative addressing");
-
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
Register GlobalBaseReg = X86FI->getGlobalBaseReg();
if (GlobalBaseReg != 0)
@@ -10083,12 +10078,6 @@ struct CGBR : public MachineFunctionPass {
static_cast<const X86TargetMachine *>(&MF.getTarget());
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- // Don't do anything in the 64-bit small and kernel code models. They use
- // RIP-relative addressing for everything.
- if (STI.is64Bit() && (TM->getCodeModel() == CodeModel::Small ||
- TM->getCodeModel() == CodeModel::Kernel))
- return false;
-
// Only emit a global base reg in PIC mode.
if (!TM->isPositionIndependent())
return false;
@@ -10114,16 +10103,7 @@ struct CGBR : public MachineFunctionPass {
PC = GlobalBaseReg;
if (STI.is64Bit()) {
- if (TM->getCodeModel() == CodeModel::Medium) {
- // In the medium code model, use a RIP-relative LEA to materialize the
- // GOT.
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
- .addReg(X86::RIP)
- .addImm(0)
- .addReg(0)
- .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
- .addReg(0);
- } else if (TM->getCodeModel() == CodeModel::Large) {
+ if (TM->getCodeModel() == CodeModel::Large) {
// In the large code model, we are aiming for this code, though the
// register allocation may vary:
// leaq .LN$pb(%rip), %rax
@@ -10146,7 +10126,14 @@ struct CGBR : public MachineFunctionPass {
.addReg(PBReg, RegState::Kill)
.addReg(GOTReg, RegState::Kill);
} else {
- llvm_unreachable("unexpected code model");
+ // In other code models, use a RIP-relative LEA to materialize the
+ // GOT.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
+ .addReg(0);
}
} else {
// Operand of MovePCtoStack is completely ignored by asm printer. It's
diff --git a/llvm/test/CodeGen/X86/code-model-elf.ll b/llvm/test/CodeGen/X86/code-model-elf.ll
index 483ffd87ac696f..457c6bde354f2e 100644
--- a/llvm/test/CodeGen/X86/code-model-elf.ll
+++ b/llvm/test/CodeGen/X86/code-model-elf.ll
@@ -41,7 +41,8 @@ target triple = "x86_64--linux"
@extern_data = external global [10 x i32], align 16
@thread_data = external thread_local global i32, align 4
@unknown_size_data = dso_local global [0 x i32] zeroinitializer, align 16
-
+@forced_small_data = dso_local global [10 x i32] zeroinitializer, code_model "small", align 16
+@forced_large_data = dso_local global [10 x i32] zeroinitializer, code_model "large", align 16
define dso_local ptr @lea_static_data() #0 {
; SMALL-STATIC-LABEL: lea_static_data:
@@ -223,6 +224,199 @@ define dso_local ptr @lea_unknown_size_data() #0 {
ret ptr @unknown_size_data
}
+; TODO: make nopic small and medium instruction sequence the same
+define dso_local ptr @lea_forced_small_data() #0 {
+; SMALL-STATIC-LABEL: lea_forced_small_data:
+; SMALL-STATIC: # %bb.0:
+; SMALL-STATIC-NEXT: movl $forced_small_data, %eax
+; SMALL-STATIC-NEXT: retq
+;
+; MEDIUM-STATIC-LABEL: lea_forced_small_data:
+; MEDIUM-STATIC: # %bb.0:
+; MEDIUM-STATIC-NEXT: movabsq $forced_small_data, %rax
+; MEDIUM-STATIC-NEXT: retq
+;
+; LARGE-STATIC-LABEL: lea_forced_small_data:
+; LARGE-STATIC: # %bb.0:
+; LARGE-STATIC-NEXT: movabsq $forced_small_data, %rax
+; LARGE-STATIC-NEXT: retq
+;
+; SMALL-PIC-LABEL: lea_forced_small_data:
+; SMALL-PIC: # %bb.0:
+; SMALL-PIC-NEXT: leaq forced_small_data(%rip), %rax
+; SMALL-PIC-NEXT: retq
+;
+; MEDIUM-SMALL-DATA-PIC-LABEL: lea_forced_small_data:
+; MEDIUM-SMALL-DATA-PIC: # %bb.0:
+; MEDIUM-SMALL-DATA-PIC-NEXT: leaq forced_small_data(%rip), %rax
+; MEDIUM-SMALL-DATA-PIC-NEXT: retq
+;
+; MEDIUM-PIC-LABEL: lea_forced_small_data:
+; MEDIUM-PIC: # %bb.0:
+; MEDIUM-PIC-NEXT: leaq forced_small_data(%rip), %rax
+; MEDIUM-PIC-NEXT: retq
+;
+; LARGE-PIC-LABEL: lea_forced_small_data:
+; LARGE-PIC: # %bb.0:
+; LARGE-PIC-NEXT: .L4$pb:
+; LARGE-PIC-NEXT: leaq .L4$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L4$pb, %rcx
+; LARGE-PIC-NEXT: addq %rax, %rcx
+; LARGE-PIC-NEXT: movabsq $forced_small_data@GOTOFF, %rax
+; LARGE-PIC-NEXT: addq %rcx, %rax
+; LARGE-PIC-NEXT: retq
+ ret ptr @forced_small_data
+}
+
+; TODO: make small and medium instruction sequence the same
+define dso_local i32 @load_forced_small_data() #0 {
+; SMALL-STATIC-LABEL: load_forced_small_data:
+; SMALL-STATIC: # %bb.0:
+; SMALL-STATIC-NEXT: movl forced_small_data+8(%rip), %eax
+; SMALL-STATIC-NEXT: retq
+;
+; MEDIUM-STATIC-LABEL: load_forced_small_data:
+; MEDIUM-STATIC: # %bb.0:
+; MEDIUM-STATIC-NEXT: movabsq $forced_small_data, %rax
+; MEDIUM-STATIC-NEXT: movl 8(%rax), %eax
+; MEDIUM-STATIC-NEXT: retq
+;
+; LARGE-STATIC-LABEL: load_forced_small_data:
+; LARGE-STATIC: # %bb.0:
+; LARGE-STATIC-NEXT: movabsq $forced_small_data, %rax
+; LARGE-STATIC-NEXT: movl 8(%rax), %eax
+; LARGE-STATIC-NEXT: retq
+;
+; SMALL-PIC-LABEL: load_forced_small_data:
+; SMALL-PIC: # %bb.0:
+; SMALL-PIC-NEXT: movl forced_small_data+8(%rip), %eax
+; SMALL-PIC-NEXT: retq
+;
+; MEDIUM-SMALL-DATA-PIC-LABEL: load_forced_small_data:
+; MEDIUM-SMALL-DATA-PIC: # %bb.0:
+; MEDIUM-SMALL-DATA-PIC-NEXT: leaq forced_small_data(%rip), %rax
+; MEDIUM-SMALL-DATA-PIC-NEXT: movl 8(%rax), %eax
+; MEDIUM-SMALL-DATA-PIC-NEXT: retq
+;
+; MEDIUM-PIC-LABEL: load_forced_small_data:
+; MEDIUM-PIC: # %bb.0:
+; MEDIUM-PIC-NEXT: leaq forced_small_data(%rip), %rax
+; MEDIUM-PIC-NEXT: movl 8(%rax), %eax
+; MEDIUM-PIC-NEXT: retq
+;
+; LARGE-PIC-LABEL: load_forced_small_data:
+; LARGE-PIC: # %bb.0:
+; LARGE-PIC-NEXT: .L5$pb:
+; LARGE-PIC-NEXT: leaq .L5$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L5$pb, %rcx
+; LARGE-PIC-NEXT: addq %rax, %rcx
+; LARGE-PIC-NEXT: movabsq $forced_small_data@GOTOFF, %rax
+; LARGE-PIC-NEXT: movl 8(%rcx,%rax), %eax
+; LARGE-PIC-NEXT: retq
+ %rv = load i32, ptr getelementptr inbounds (i32, ptr @forced_small_data, i64 2)
+ ret i32 %rv
+}
+
+; TODO: fix small code model instruction sequences to use 64-bit constants
+define dso_local ptr @lea_forced_large_data() #0 {
+; SMALL-STATIC-LABEL: lea_forced_large_data:
+; SMALL-STATIC: # %bb.0:
+; SMALL-STATIC-NEXT: movl $forced_large_data, %eax
+; SMALL-STATIC-NEXT: retq
+;
+; MEDIUM-STATIC-LABEL: lea_forced_large_data:
+; MEDIUM-STATIC: # %bb.0:
+; MEDIUM-STATIC-NEXT: movabsq $forced_large_data, %rax
+; MEDIUM-STATIC-NEXT: retq
+;
+; LARGE-STATIC-LABEL: lea_forced_large_data:
+; LARGE-STATIC: # %bb.0:
+; LARGE-STATIC-NEXT: movabsq $forced_large_data, %rax
+; LARGE-STATIC-NEXT: retq
+;
+; SMALL-PIC-LABEL: lea_forced_large_data:
+; SMALL-PIC: # %bb.0:
+; SMALL-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
+; SMALL-PIC-NEXT: leaq forced_large_data@GOTOFF(%rax), %rax
+; SMALL-PIC-NEXT: retq
+;
+; MEDIUM-SMALL-DATA-PIC-LABEL: lea_forced_large_data:
+; MEDIUM-SMALL-DATA-PIC: # %bb.0:
+; MEDIUM-SMALL-DATA-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rcx
+; MEDIUM-SMALL-DATA-PIC-NEXT: movabsq $forced_large_data@GOTOFF, %rax
+; MEDIUM-SMALL-DATA-PIC-NEXT: addq %rcx, %rax
+; MEDIUM-SMALL-DATA-PIC-NEXT: retq
+;
+; MEDIUM-PIC-LABEL: lea_forced_large_data:
+; MEDIUM-PIC: # %bb.0:
+; MEDIUM-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rcx
+; MEDIUM-PIC-NEXT: movabsq $forced_large_data@GOTOFF, %rax
+; MEDIUM-PIC-NEXT: addq %rcx, %rax
+; MEDIUM-PIC-NEXT: retq
+;
+; LARGE-PIC-LABEL: lea_forced_large_data:
+; LARGE-PIC: # %bb.0:
+; LARGE-PIC-NEXT: .L6$pb:
+; LARGE-PIC-NEXT: leaq .L6$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L6$pb, %rcx
+; LARGE-PIC-NEXT: addq %rax, %rcx
+; LARGE-PIC-NEXT: movabsq $forced_large_data@GOTOFF, %rax
+; LARGE-PIC-NEXT: addq %rcx, %rax
+; LARGE-PIC-NEXT: retq
+ ret ptr @forced_large_data
+}
+
+define dso_local i32 @load_forced_large_data() #0 {
+; SMALL-STATIC-LABEL: load_forced_large_data:
+; SMALL-STATIC: # %bb.0:
+; SMALL-STATIC-NEXT: movl forced_large_data+8(%rip), %eax
+; SMALL-STATIC-NEXT: retq
+;
+; MEDIUM-STATIC-LABEL: load_forced_large_data:
+; MEDIUM-STATIC: # %bb.0:
+; MEDIUM-STATIC-NEXT: movabsq $forced_large_data, %rax
+; MEDIUM-STATIC-NEXT: movl 8(%rax), %eax
+; MEDIUM-STATIC-NEXT: retq
+;
+; LARGE-STATIC-LABEL: load_forced_large_data:
+; LARGE-STATIC: # %bb.0:
+; LARGE-STATIC-NEXT: movabsq $forced_large_data, %rax
+; LARGE-STATIC-NEXT: movl 8(%rax), %eax
+; LARGE-STATIC-NEXT: retq
+;
+; SMALL-PIC-LABEL: load_forced_large_data:
+; SMALL-PIC: # %bb.0:
+; SMALL-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
+; SMALL-PIC-NEXT: movl forced_large_data@GOTOFF+8(%rax), %eax
+; SMALL-PIC-NEXT: retq
+;
+; MEDIUM-SMALL-DATA-PIC-LABEL: load_forced_large_data:
+; MEDIUM-SMALL-DATA-PIC: # %bb.0:
+; MEDIUM-SMALL-DATA-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
+; MEDIUM-SMALL-DATA-PIC-NEXT: movabsq $forced_large_data@GOTOFF, %rcx
+; MEDIUM-SMALL-DATA-PIC-NEXT: movl 8(%rax,%rcx), %eax
+; MEDIUM-SMALL-DATA-PIC-NEXT: retq
+;
+; MEDIUM-PIC-LABEL: load_forced_large_data:
+; MEDIUM-PIC: # %bb.0:
+; MEDIUM-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
+; MEDIUM-PIC-NEXT: movabsq $forced_large_data@GOTOFF, %rcx
+; MEDIUM-PIC-NEXT: movl 8(%rax,%rcx), %eax
+; MEDIUM-PIC-NEXT: retq
+;
+; LARGE-PIC-LABEL: load_forced_large_data:
+; LARGE-PIC: # %bb.0:
+; LARGE-PIC-NEXT: .L7$pb:
+; LARGE-PIC-NEXT: leaq .L7$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L7$pb, %rcx
+; LARGE-PIC-NEXT: addq %rax, %rcx
+; LARGE-PIC-NEXT: movabsq $forced_large_data@GOTOFF, %rax
+; LARGE-PIC-NEXT: movl 8(%rcx,%rax), %eax
+; LARGE-PIC-NEXT: retq
+ %rv = load i32, ptr getelementptr inbounds (i32, ptr @forced_large_data, i64 2)
+ ret i32 %rv
+}
+
define dso_local i32 @load_global_data() #0 {
; SMALL-STATIC-LABEL: load_global_data:
; SMALL-STATIC: # %bb.0:
@@ -261,9 +455,9 @@ define dso_local i32 @load_global_data() #0 {
;
; LARGE-PIC-LABEL: load_global_data:
; LARGE-PIC: # %bb.0:
-; LARGE-PIC-NEXT: .L4$pb:
-; LARGE-PIC-NEXT: leaq .L4$pb(%rip), %rax
-; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L4$pb, %rcx
+; LARGE-PIC-NEXT: .L8$pb:
+; LARGE-PIC-NEXT: leaq .L8$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L8$pb, %rcx
; LARGE-PIC-NEXT: addq %rax, %rcx
; LARGE-PIC-NEXT: movabsq $global_data@GOTOFF, %rax
; LARGE-PIC-NEXT: movl 8(%rcx,%rax), %eax
@@ -311,9 +505,9 @@ define dso_local i32 @load_extern_data() #0 {
;
; LARGE-PIC-LABEL: load_extern_data:
; LARGE-PIC: # %bb.0:
-; LARGE-PIC-NEXT: .L5$pb:
-; LARGE-PIC-NEXT: leaq .L5$pb(%rip), %rax
-; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L5$pb, %rcx
+; LARGE-PIC-NEXT: .L9$pb:
+; LARGE-PIC-NEXT: leaq .L9$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L9$pb, %rcx
; LARGE-PIC-NEXT: addq %rax, %rcx
; LARGE-PIC-NEXT: movabsq $extern_data@GOT, %rax
; LARGE-PIC-NEXT: movq (%rcx,%rax), %rax
@@ -362,9 +556,9 @@ define dso_local i32 @load_unknown_size_data() #0 {
;
; LARGE-PIC-LABEL: load_unknown_size_data:
; LARGE-PIC: # %bb.0:
-; LARGE-PIC-NEXT: .L6$pb:
-; LARGE-PIC-NEXT: leaq .L6$pb(%rip), %rax
-; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L6$pb, %rcx
+; LARGE-PIC-NEXT: .L10$pb:
+; LARGE-PIC-NEXT: leaq .L10$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L10$pb, %rcx
; LARGE-PIC-NEXT: addq %rax, %rcx
; LARGE-PIC-NEXT: movabsq $unknown_size_data@GOTOFF, %rax
; LARGE-PIC-NEXT: movl 8(%rcx,%rax), %eax
@@ -422,9 +616,9 @@ define dso_local ptr @lea_static_fn() #0 {
;
; LARGE-PIC-LABEL: lea_static_fn:
; LARGE-PIC: # %bb.0:
-; LARGE-PIC-NEXT: .L9$pb:
-; LARGE-PIC-NEXT: leaq .L9$pb(%rip), %rax
-; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L9$pb, %rcx
+; LARGE-PIC-NEXT: .L13$pb:
+; LARGE-PIC-NEXT: leaq .L13$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L13$pb, %rcx
; LARGE-PIC-NEXT: addq %rax, %rcx
; LARGE-PIC-NEXT: movabsq $static_fn@GOTOFF, %rax
; LARGE-PIC-NEXT: addq %rcx, %rax
@@ -465,9 +659,9 @@ define dso_local ptr @lea_global_fn() #0 {
;
; LARGE-PIC-LABEL: lea_global_fn:
; LARGE-PIC: # %bb.0:
-; LARGE-PIC-NEXT: .L10$pb:
-; LARGE-PIC-NEXT: leaq .L10$pb(%rip), %rax
-; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L10$pb, %rcx
+; LARGE-PIC-NEXT: .L14$pb:
+; LARGE-PIC-NEXT: leaq .L14$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L14$pb, %rcx
; LARGE-PIC-NEXT: addq %rax, %rcx
; LARGE-PIC-NEXT: movabsq $global_fn@GOTOFF, %rax
; LARGE-PIC-NEXT: addq %rcx, %rax
@@ -508,9 +702,9 @@ define dso_local ptr @lea_extern_fn() #0 {
;
; LARGE-PIC-LABEL: lea_extern_fn:
; LARGE-PIC: # %bb.0:
-; LARGE-PIC-NEXT: .L11$pb:
-; LARGE-PIC-NEXT: leaq .L11$pb(%rip), %rax
-; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L11$pb, %rcx
+; LARGE-PIC-NEXT: .L15$pb:
+; LARGE-PIC-NEXT: leaq .L15$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L15$pb, %rcx
; LARGE-PIC-NEXT: addq %rax, %rcx
; LARGE-PIC-NEXT: movabsq $extern_fn@GOT, %rax
; LARGE-PIC-NEXT: movq (%rcx,%rax), %rax
@@ -586,9 +780,9 @@ define dso_local float @load_constant_pool(float %x) #0 {
;
; LARGE-PIC-LABEL: load_constant_pool:
; LARGE-PIC: # %bb.0:
-; LARGE-PIC-NEXT: .L13$pb:
-; LARGE-PIC-NEXT: leaq .L13$pb(%rip), %rax
-; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L13$pb, %rcx
+; LARGE-PIC-NEXT: .L17$pb:
+; LARGE-PIC-NEXT: leaq .L17$pb(%rip), %rax
+; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L17$pb, %rcx
; LARGE-PIC-NEXT: addq %rax, %rcx
; LARGE-PIC-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}@GOTOFF, %rax
; LARGE-PIC-NEXT: addss (%rcx,%rax), %xmm0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good!
I see the TODOs in the test case and confirmed I see the same issues in the generated code.
This removes some assumptions that the small code model will only
reference "near" globals.
There are still some missing optimizations and wrong code sequences, but
I'd like to address those separately. This will require auditing any
checks of the code model in the X86 backend.