Skip to content

Commit 54bacaf

Browse files
committed
[X86] Always use rip-relative addressing on 64-bit when rematerializing all zeros/ones registers using a folded load.
Previously we only used RIP relative when PIC was enabled. But we know we're in small/kernel code model here so we should be able to always use RIP-relative which will give a smaller encoding. Here's a godbolt link that demonstrates the current codegen https://godbolt.org/z/j3158o Note in the non-PIC version the load from .LCPI0_0 doesn't use RIP-relative addressing, but if you change the constant in the source from 0.0 to 1.0 it will become RIP-relative. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D97208
1 parent ef76a33 commit 54bacaf

File tree

3 files changed

+14
-13
lines changed

3 files changed

+14
-13
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6085,15 +6085,16 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
60856085

60866086
// x86-32 PIC requires a PIC base register for constant pools.
60876087
unsigned PICBase = 0;
6088-
if (MF.getTarget().isPositionIndependent()) {
6089-
if (Subtarget.is64Bit())
6090-
PICBase = X86::RIP;
6091-
else
6092-
// FIXME: PICBase = getGlobalBaseReg(&MF);
6093-
// This doesn't work for several reasons.
6094-
// 1. GlobalBaseReg may have been spilled.
6095-
// 2. It may not be live at MI.
6096-
return nullptr;
6088+
// Since we're using Small or Kernel code model, we can always use
6089+
// RIP-relative addressing for a smaller encoding.
6090+
if (Subtarget.is64Bit()) {
6091+
PICBase = X86::RIP;
6092+
} else if (MF.getTarget().isPositionIndependent()) {
6093+
// FIXME: PICBase = getGlobalBaseReg(&MF);
6094+
// This doesn't work for several reasons.
6095+
// 1. GlobalBaseReg may have been spilled.
6096+
// 2. It may not be live at MI.
6097+
return nullptr;
60976098
}
60986099

60996100
// Create a constant-pool entry.

llvm/test/CodeGen/X86/avx-cmp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
33

44
define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind {
@@ -49,7 +49,7 @@ define void @render(double %a0) nounwind {
4949
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
5050
; CHECK-NEXT: vmovsd (%rsp), %xmm0 # 8-byte Reload
5151
; CHECK-NEXT: # xmm0 = mem[0],zero
52-
; CHECK-NEXT: vucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
52+
; CHECK-NEXT: vucomisd {{\.LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5353
; CHECK-NEXT: jne .LBB2_5
5454
; CHECK-NEXT: jnp .LBB2_2
5555
; CHECK-NEXT: .LBB2_5: # %if.then

llvm/test/CodeGen/X86/mmx-fold-zero.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip
22
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86
33
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
44

@@ -70,7 +70,7 @@ define double @mmx_zero(double, double, double, double) nounwind {
7070
; X64-NEXT: paddw %mm2, %mm0
7171
; X64-NEXT: paddw %mm6, %mm0
7272
; X64-NEXT: pmuludq %mm3, %mm0
73-
; X64-NEXT: paddw {{\.LCPI[0-9]+_[0-9]+}}, %mm0
73+
; X64-NEXT: paddw {{\.LCPI[0-9]+_[0-9]+}}(%rip), %mm0
7474
; X64-NEXT: paddw %mm1, %mm0
7575
; X64-NEXT: pmuludq %mm7, %mm0
7676
; X64-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload

0 commit comments

Comments
 (0)