Skip to content

Commit a77d3ea

Browse files
authored
[X86][GlobalISel] Add instruction selection support for x87 ld/st (#97016)
Add x87 G_LOAD/G_STORE selection support to existing C++ lowering.
1 parent 765e2f9 commit a77d3ea

File tree

3 files changed

+248
-14
lines changed

3 files changed

+248
-14
lines changed

llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
195195
return &X86::VR512RegClass;
196196
}
197197

198+
if (RB.getID() == X86::PSRRegBankID) {
199+
if (Ty.getSizeInBits() == 80)
200+
return &X86::RFP80RegClass;
201+
if (Ty.getSizeInBits() == 64)
202+
return &X86::RFP64RegClass;
203+
if (Ty.getSizeInBits() == 32)
204+
return &X86::RFP32RegClass;
205+
}
206+
198207
llvm_unreachable("Unknown RegBank!");
199208
}
200209

@@ -462,6 +471,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
462471
: (HasAVX512 ? X86::VMOVSSZmr :
463472
HasAVX ? X86::VMOVSSmr :
464473
X86::MOVSSmr);
474+
if (X86::PSRRegBankID == RB.getID())
475+
return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
465476
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
466477
if (X86::GPRRegBankID == RB.getID())
467478
return Isload ? X86::MOV64rm : X86::MOV64mr;
@@ -472,6 +483,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
472483
: (HasAVX512 ? X86::VMOVSDZmr :
473484
HasAVX ? X86::VMOVSDmr :
474485
X86::MOVSDmr);
486+
if (X86::PSRRegBankID == RB.getID())
487+
return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
488+
} else if (Ty == LLT::scalar(80)) {
489+
return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
475490
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
476491
if (Alignment >= Align(16))
477492
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
@@ -611,7 +626,9 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
611626
I.removeOperand(0);
612627
addFullAddress(MIB, AM).addUse(DefReg);
613628
}
614-
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
629+
bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
630+
I.addImplicitDefUseOperands(MF);
631+
return Constrained;
615632
}
616633

617634
static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
@@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
15031520
const Register DstReg = I.getOperand(0).getReg();
15041521
const LLT DstTy = MRI.getType(DstReg);
15051522
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1506-
Align Alignment = Align(DstTy.getSizeInBytes());
1523+
// Create the load from the constant pool.
1524+
const ConstantFP *CFP = I.getOperand(1).getFPImm();
1525+
const auto &DL = MF.getDataLayout();
1526+
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
15071527
const DebugLoc &DbgLoc = I.getDebugLoc();
15081528

15091529
unsigned Opc =
15101530
getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
15111531

1512-
// Create the load from the constant pool.
1513-
const ConstantFP *CFP = I.getOperand(1).getFPImm();
15141532
unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
15151533
MachineInstr *LoadInst = nullptr;
15161534
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
@@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
15251543

15261544
MachineMemOperand *MMO = MF.getMachineMemOperand(
15271545
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
1528-
LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
1546+
LLT::pointer(0, DL.getPointerSizeInBits()), Alignment);
15291547

15301548
LoadInst =
15311549
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),

llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,6 @@
77
; When we cannot produce a test case anymore, that means we can remove
88
; the fallback path.
99

10-
; Check that we fallback on invoke translation failures.
11-
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump)
12-
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump
13-
; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump:
14-
define void @test_x86_fp80_dump(ptr %ptr){
15-
store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16
16-
ret void
17-
}
18-
1910
; Check that we fallback on byVal argument
2011
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2)
2112
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2

llvm/test/CodeGen/X86/isel-x87.ll

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86
3+
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86
4+
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86,FAST_X86
5+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
6+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64
7+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64,FAST_X64
8+
9+
define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
10+
; GISEL_X86-LABEL: f0:
11+
; GISEL_X86: # %bb.0:
12+
; GISEL_X86-NEXT: pushl %ebp
13+
; GISEL_X86-NEXT: movl %esp, %ebp
14+
; GISEL_X86-NEXT: andl $-16, %esp
15+
; GISEL_X86-NEXT: subl $48, %esp
16+
; GISEL_X86-NEXT: fldt 8(%ebp)
17+
; GISEL_X86-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}
18+
; GISEL_X86-NEXT: fxch %st(1)
19+
; GISEL_X86-NEXT: fstpt {{[0-9]+}}(%esp)
20+
; GISEL_X86-NEXT: fstpt (%esp)
21+
; GISEL_X86-NEXT: fldt {{[0-9]+}}(%esp)
22+
; GISEL_X86-NEXT: fldt (%esp)
23+
; GISEL_X86-NEXT: faddp %st, %st(1)
24+
; GISEL_X86-NEXT: movl %ebp, %esp
25+
; GISEL_X86-NEXT: popl %ebp
26+
; GISEL_X86-NEXT: retl
27+
;
28+
; SDAG_X86-LABEL: f0:
29+
; SDAG_X86: # %bb.0:
30+
; SDAG_X86-NEXT: pushl %ebp
31+
; SDAG_X86-NEXT: movl %esp, %ebp
32+
; SDAG_X86-NEXT: andl $-16, %esp
33+
; SDAG_X86-NEXT: subl $48, %esp
34+
; SDAG_X86-NEXT: fldt 8(%ebp)
35+
; SDAG_X86-NEXT: fld %st(0)
36+
; SDAG_X86-NEXT: fstpt {{[0-9]+}}(%esp)
37+
; SDAG_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
38+
; SDAG_X86-NEXT: fld %st(0)
39+
; SDAG_X86-NEXT: fstpt (%esp)
40+
; SDAG_X86-NEXT: faddp %st, %st(1)
41+
; SDAG_X86-NEXT: movl %ebp, %esp
42+
; SDAG_X86-NEXT: popl %ebp
43+
; SDAG_X86-NEXT: retl
44+
;
45+
; GISEL_X64-LABEL: f0:
46+
; GISEL_X64: # %bb.0:
47+
; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp)
48+
; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
49+
; GISEL_X64-NEXT: fxch %st(1)
50+
; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
51+
; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
52+
; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
53+
; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
54+
; GISEL_X64-NEXT: faddp %st, %st(1)
55+
; GISEL_X64-NEXT: retq
56+
;
57+
; SDAG_X64-LABEL: f0:
58+
; SDAG_X64: # %bb.0:
59+
; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp)
60+
; SDAG_X64-NEXT: fld %st(0)
61+
; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
62+
; SDAG_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
63+
; SDAG_X64-NEXT: fld %st(0)
64+
; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
65+
; SDAG_X64-NEXT: faddp %st, %st(1)
66+
; SDAG_X64-NEXT: retq
67+
%a.addr = alloca x86_fp80, align 16
68+
%x = alloca x86_fp80, align 16
69+
store x86_fp80 %a, ptr %a.addr, align 16
70+
store x86_fp80 0xK400A8000000000000000, ptr %x, align 16
71+
%load1 = load x86_fp80, ptr %a.addr, align 16
72+
%load2 = load x86_fp80, ptr %x, align 16
73+
%add = fadd x86_fp80 %load1, %load2
74+
ret x86_fp80 %add
75+
}
76+
77+
78+
define void @f1(ptr %a, ptr %b) nounwind {
79+
; GISEL_X86-LABEL: f1:
80+
; GISEL_X86: # %bb.0:
81+
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
82+
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
83+
; GISEL_X86-NEXT: fldt (%eax)
84+
; GISEL_X86-NEXT: fldt (%ecx)
85+
; GISEL_X86-NEXT: fsubrp %st, %st(1)
86+
; GISEL_X86-NEXT: fstpt (%eax)
87+
; GISEL_X86-NEXT: retl
88+
;
89+
; SDAG_X86-LABEL: f1:
90+
; SDAG_X86: # %bb.0:
91+
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
92+
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
93+
; SDAG_X86-NEXT: fldt (%ecx)
94+
; SDAG_X86-NEXT: fldt (%eax)
95+
; SDAG_X86-NEXT: fsubrp %st, %st(1)
96+
; SDAG_X86-NEXT: fstpt (%ecx)
97+
; SDAG_X86-NEXT: retl
98+
;
99+
; CHECK-64-LABEL: f1:
100+
; CHECK-64: # %bb.0:
101+
; CHECK-64-NEXT: fldt (%rdi)
102+
; CHECK-64-NEXT: fldt (%rsi)
103+
; CHECK-64-NEXT: fsubrp %st, %st(1)
104+
; CHECK-64-NEXT: fstpt (%rdi)
105+
; CHECK-64-NEXT: retq
106+
%load1 = load x86_fp80, ptr %a, align 4
107+
%load2 = load x86_fp80, ptr %b, align 4
108+
%sub = fsub x86_fp80 %load1, %load2
109+
store x86_fp80 %sub, ptr %a, align 4
110+
ret void
111+
}
112+
113+
define void @f2(ptr %a, ptr %b) nounwind {
114+
; GISEL_X86-LABEL: f2:
115+
; GISEL_X86: # %bb.0:
116+
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
117+
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
118+
; GISEL_X86-NEXT: fldt (%eax)
119+
; GISEL_X86-NEXT: fldt (%ecx)
120+
; GISEL_X86-NEXT: fmulp %st, %st(1)
121+
; GISEL_X86-NEXT: fstpt (%eax)
122+
; GISEL_X86-NEXT: retl
123+
;
124+
; SDAG_X86-LABEL: f2:
125+
; SDAG_X86: # %bb.0:
126+
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
127+
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
128+
; SDAG_X86-NEXT: fldt (%ecx)
129+
; SDAG_X86-NEXT: fldt (%eax)
130+
; SDAG_X86-NEXT: fmulp %st, %st(1)
131+
; SDAG_X86-NEXT: fstpt (%ecx)
132+
; SDAG_X86-NEXT: retl
133+
;
134+
; CHECK-64-LABEL: f2:
135+
; CHECK-64: # %bb.0:
136+
; CHECK-64-NEXT: fldt (%rdi)
137+
; CHECK-64-NEXT: fldt (%rsi)
138+
; CHECK-64-NEXT: fmulp %st, %st(1)
139+
; CHECK-64-NEXT: fstpt (%rdi)
140+
; CHECK-64-NEXT: retq
141+
%load1 = load x86_fp80, ptr %a, align 16
142+
%load2 = load x86_fp80, ptr %b, align 16
143+
%mul = fmul x86_fp80 %load1, %load2
144+
store x86_fp80 %mul, ptr %a, align 16
145+
ret void
146+
}
147+
148+
define void @f3(ptr %a, ptr %b) nounwind {
149+
; GISEL_X86-LABEL: f3:
150+
; GISEL_X86: # %bb.0:
151+
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
152+
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
153+
; GISEL_X86-NEXT: fldt (%eax)
154+
; GISEL_X86-NEXT: fldt (%ecx)
155+
; GISEL_X86-NEXT: fdivrp %st, %st(1)
156+
; GISEL_X86-NEXT: fstpt (%eax)
157+
; GISEL_X86-NEXT: retl
158+
;
159+
; SDAG_X86-LABEL: f3:
160+
; SDAG_X86: # %bb.0:
161+
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
162+
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
163+
; SDAG_X86-NEXT: fldt (%ecx)
164+
; SDAG_X86-NEXT: fldt (%eax)
165+
; SDAG_X86-NEXT: fdivrp %st, %st(1)
166+
; SDAG_X86-NEXT: fstpt (%ecx)
167+
; SDAG_X86-NEXT: retl
168+
;
169+
; CHECK-64-LABEL: f3:
170+
; CHECK-64: # %bb.0:
171+
; CHECK-64-NEXT: fldt (%rdi)
172+
; CHECK-64-NEXT: fldt (%rsi)
173+
; CHECK-64-NEXT: fdivrp %st, %st(1)
174+
; CHECK-64-NEXT: fstpt (%rdi)
175+
; CHECK-64-NEXT: retq
176+
%load1 = load x86_fp80, ptr %a, align 4
177+
%load2 = load x86_fp80, ptr %b, align 4
178+
%div = fdiv x86_fp80 %load1, %load2
179+
store x86_fp80 %div, ptr %a, align 4
180+
ret void
181+
}
182+
183+
define void @f6(ptr %a, ptr %b) nounwind {
184+
; GISEL_X86-LABEL: f6:
185+
; GISEL_X86: # %bb.0:
186+
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
187+
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
188+
; GISEL_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
189+
; GISEL_X86-NEXT: flds (%eax)
190+
; GISEL_X86-NEXT: faddp %st, %st(1)
191+
; GISEL_X86-NEXT: fstps (%ecx)
192+
; GISEL_X86-NEXT: retl
193+
;
194+
; SDAG_X86-LABEL: f6:
195+
; SDAG_X86: # %bb.0:
196+
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
197+
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
198+
; SDAG_X86-NEXT: flds (%ecx)
199+
; SDAG_X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}
200+
; SDAG_X86-NEXT: fstps (%eax)
201+
; SDAG_X86-NEXT: retl
202+
;
203+
; GISEL_X64-LABEL: f6:
204+
; GISEL_X64: # %bb.0:
205+
; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
206+
; GISEL_X64-NEXT: flds (%rdi)
207+
; GISEL_X64-NEXT: faddp %st, %st(1)
208+
; GISEL_X64-NEXT: fstps (%rsi)
209+
; GISEL_X64-NEXT: retq
210+
;
211+
; SDAG_X64-LABEL: f6:
212+
; SDAG_X64: # %bb.0:
213+
; SDAG_X64-NEXT: flds (%rdi)
214+
; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
215+
; SDAG_X64-NEXT: fstps (%rsi)
216+
; SDAG_X64-NEXT: retq
217+
%load1 = load float, ptr %a
218+
%add = fadd float %load1, 20.0
219+
store float %add, ptr %b
220+
ret void
221+
}
222+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
223+
; CHECK-32: {{.*}}
224+
; FAST_X64: {{.*}}
225+
; FAST_X86: {{.*}}

0 commit comments

Comments
 (0)