Skip to content

Commit d43e6e1

Browse files
committed
R600: Expand is_fpclass
Fixes #135083
1 parent f819f46 commit d43e6e1

File tree

2 files changed

+274
-0
lines changed

2 files changed

+274
-0
lines changed

llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
100100

101101
setOperationAction(ISD::FSUB, MVT::f32, Expand);
102102

103+
setOperationAction(ISD::IS_FPCLASS,
104+
{MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
105+
MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
106+
Expand);
107+
103108
setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},
104109
MVT::f64, Custom);
105110

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
3+
4+
define amdgpu_kernel void @isnan_f32(ptr addrspace(1) %out, float %x) {
5+
; CM-LABEL: isnan_f32:
6+
; CM: ; %bb.0:
7+
; CM-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
8+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
9+
; CM-NEXT: CF_END
10+
; CM-NEXT: PAD
11+
; CM-NEXT: ALU clause starting at 4:
12+
; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
13+
; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
14+
; CM-NEXT: SETGT_INT * T0.W, PV.W, literal.x,
15+
; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
16+
; CM-NEXT: AND_INT * T0.X, PV.W, 1,
17+
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
18+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
19+
%result = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan
20+
%zext = zext i1 %result to i32
21+
store i32 %zext, ptr addrspace(1) %out, align 4
22+
ret void
23+
}
24+
25+
define amdgpu_kernel void @issue135083_f32(ptr addrspace(1) %out, float %x) {
26+
; CM-LABEL: issue135083_f32:
27+
; CM: ; %bb.0:
28+
; CM-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
29+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
30+
; CM-NEXT: CF_END
31+
; CM-NEXT: PAD
32+
; CM-NEXT: ALU clause starting at 4:
33+
; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
34+
; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
35+
; CM-NEXT: SETGT_INT * T0.W, literal.x, PV.W,
36+
; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
37+
; CM-NEXT: AND_INT * T0.X, PV.W, 1,
38+
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
39+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
40+
%result = call i1 @llvm.is.fpclass.f32(float %x, i32 504)
41+
%zext = zext i1 %result to i32
42+
store i32 %zext, ptr addrspace(1) %out, align 4
43+
ret void
44+
}
45+
46+
define amdgpu_kernel void @issue135083_v2f32(ptr addrspace(1) %out, <2 x float> %x) {
47+
; CM-LABEL: issue135083_v2f32:
48+
; CM: ; %bb.0:
49+
; CM-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[]
50+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
51+
; CM-NEXT: CF_END
52+
; CM-NEXT: PAD
53+
; CM-NEXT: ALU clause starting at 4:
54+
; CM-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
55+
; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
56+
; CM-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
57+
; CM-NEXT: SETGT_INT * T0.W, PV.W, literal.y,
58+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
59+
; CM-NEXT: CNDE_INT T0.Y, PV.W, 1, 0.0,
60+
; CM-NEXT: SETGT_INT * T0.W, PV.Z, literal.x,
61+
; CM-NEXT: 2139095039(3.402823e+38), 0(0.000000e+00)
62+
; CM-NEXT: CNDE_INT * T0.X, PV.W, 1, 0.0,
63+
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
64+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
65+
%result = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 504)
66+
%zext = zext <2 x i1> %result to <2 x i32>
67+
store <2 x i32> %zext, ptr addrspace(1) %out, align 8
68+
ret void
69+
}
70+
71+
define amdgpu_kernel void @issue135083_v3f32(ptr addrspace(1) %out, <3 x float> %x) {
72+
; CM-LABEL: issue135083_v3f32:
73+
; CM: ; %bb.0:
74+
; CM-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[]
75+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T3.X
76+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
77+
; CM-NEXT: CF_END
78+
; CM-NEXT: ALU clause starting at 4:
79+
; CM-NEXT: AND_INT * T0.W, KC0[3].W, literal.x,
80+
; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
81+
; CM-NEXT: AND_INT T0.Z, KC0[3].Z, literal.x,
82+
; CM-NEXT: SETGT_INT * T0.W, PV.W, literal.y,
83+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
84+
; CM-NEXT: CNDE_INT T0.X, PV.W, 1, 0.0,
85+
; CM-NEXT: AND_INT T0.Y, KC0[3].Y, literal.x,
86+
; CM-NEXT: SETGT_INT T0.Z, PV.Z, literal.y,
87+
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
88+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
89+
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
90+
; CM-NEXT: LSHR T1.X, PV.W, literal.x,
91+
; CM-NEXT: CNDE_INT T2.Y, PV.Z, 1, 0.0,
92+
; CM-NEXT: SETGT_INT * T0.W, PV.Y, literal.y,
93+
; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
94+
; CM-NEXT: CNDE_INT * T2.X, PV.W, 1, 0.0,
95+
; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
96+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
97+
%result = call <3 x i1> @llvm.is.fpclass.v3f32(<3 x float> %x, i32 504)
98+
%zext = zext <3 x i1> %result to <3 x i32>
99+
store <3 x i32> %zext, ptr addrspace(1) %out, align 16
100+
ret void
101+
}
102+
103+
define amdgpu_kernel void @issue135083_v4f32(ptr addrspace(1) %out, <4 x float> %x) {
104+
; CM-LABEL: issue135083_v4f32:
105+
; CM: ; %bb.0:
106+
; CM-NEXT: ALU 18, @4, KC0[CB0:0-32], KC1[]
107+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
108+
; CM-NEXT: CF_END
109+
; CM-NEXT: PAD
110+
; CM-NEXT: ALU clause starting at 4:
111+
; CM-NEXT: AND_INT * T0.W, KC0[4].X, literal.x,
112+
; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
113+
; CM-NEXT: SETGT_INT T0.Z, PV.W, literal.x,
114+
; CM-NEXT: AND_INT * T0.W, KC0[3].W, literal.y,
115+
; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
116+
; CM-NEXT: AND_INT T0.Y, KC0[3].Z, literal.x,
117+
; CM-NEXT: SETGT_INT T1.Z, PV.W, literal.y,
118+
; CM-NEXT: CNDE_INT * T0.W, PV.Z, 1, 0.0,
119+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
120+
; CM-NEXT: AND_INT T1.Y, KC0[3].Y, literal.x,
121+
; CM-NEXT: CNDE_INT T0.Z, PV.Z, 1, 0.0,
122+
; CM-NEXT: SETGT_INT * T1.W, PV.Y, literal.y,
123+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
124+
; CM-NEXT: CNDE_INT T0.Y, PV.W, 1, 0.0,
125+
; CM-NEXT: SETGT_INT * T1.W, PV.Y, literal.x,
126+
; CM-NEXT: 2139095039(3.402823e+38), 0(0.000000e+00)
127+
; CM-NEXT: CNDE_INT * T0.X, PV.W, 1, 0.0,
128+
; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
129+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
130+
%result = call <4 x i1> @llvm.is.fpclass.v3f32(<4 x float> %x, i32 504)
131+
%zext = zext <4 x i1> %result to <4 x i32>
132+
store <4 x i32> %zext, ptr addrspace(1) %out, align 16
133+
ret void
134+
}
135+
136+
define amdgpu_kernel void @issue135083_v8f32(ptr addrspace(1) %out, <8 x float> %x) {
137+
; CM-LABEL: issue135083_v8f32:
138+
; CM: ; %bb.0:
139+
; CM-NEXT: ALU 34, @4, KC0[CB0:0-32], KC1[]
140+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
141+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X
142+
; CM-NEXT: CF_END
143+
; CM-NEXT: ALU clause starting at 4:
144+
; CM-NEXT: AND_INT T0.Z, KC0[6].X, literal.x,
145+
; CM-NEXT: AND_INT * T0.W, KC0[4].W, literal.x,
146+
; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
147+
; CM-NEXT: AND_INT T0.X, KC0[4].Y, literal.x,
148+
; CM-NEXT: SETGT_INT T0.Y, PV.W, literal.y,
149+
; CM-NEXT: SETGT_INT T0.Z, PV.Z, literal.y,
150+
; CM-NEXT: AND_INT * T0.W, KC0[5].W, literal.x,
151+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
152+
; CM-NEXT: AND_INT T1.X, KC0[5].Z, literal.x,
153+
; CM-NEXT: SETGT_INT T1.Y, PV.W, literal.y,
154+
; CM-NEXT: AND_INT T1.Z, KC0[5].X, literal.x,
155+
; CM-NEXT: CNDE_INT * T1.W, PV.Z, 1, 0.0,
156+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
157+
; CM-NEXT: SETGT_INT T2.X, PV.Z, literal.x,
158+
; CM-NEXT: AND_INT T2.Y, KC0[5].Y, literal.y,
159+
; CM-NEXT: CNDE_INT T1.Z, PV.Y, 1, 0.0,
160+
; CM-NEXT: SETGT_INT * T0.W, PV.X, literal.x,
161+
; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
162+
; CM-NEXT: AND_INT T3.X, KC0[4].Z, literal.x,
163+
; CM-NEXT: CNDE_INT T1.Y, PV.W, 1, 0.0,
164+
; CM-NEXT: SETGT_INT T0.Z, PV.Y, literal.y,
165+
; CM-NEXT: CNDE_INT * T0.W, PV.X, 1, 0.0,
166+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
167+
; CM-NEXT: CNDE_INT T1.X, PV.Z, 1, 0.0,
168+
; CM-NEXT: SETGT_INT T2.Y, PV.X, literal.x,
169+
; CM-NEXT: CNDE_INT T0.Z, T0.Y, 1, 0.0,
170+
; CM-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y,
171+
; CM-NEXT: 2139095039(3.402823e+38), 16(2.242078e-44)
172+
; CM-NEXT: LSHR T2.X, PV.W, literal.x,
173+
; CM-NEXT: CNDE_INT T0.Y, PV.Y, 1, 0.0,
174+
; CM-NEXT: SETGT_INT * T2.W, T0.X, literal.y,
175+
; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
176+
; CM-NEXT: CNDE_INT * T0.X, PV.W, 1, 0.0,
177+
; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
178+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
179+
%result = call <8 x i1> @llvm.is.fpclass.v3f32(<8 x float> %x, i32 504)
180+
%zext = zext <8 x i1> %result to <8 x i32>
181+
store <8 x i32> %zext, ptr addrspace(1) %out, align 32
182+
ret void
183+
}
184+
185+
define amdgpu_kernel void @issue135083_v16f32(ptr addrspace(1) %out, <16 x float> %x) {
186+
; CM-LABEL: issue135083_v16f32:
187+
; CM: ; %bb.0:
188+
; CM-NEXT: ALU 69, @6, KC0[CB0:0-32], KC1[]
189+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T7.X
190+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X
191+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T6.X
192+
; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
193+
; CM-NEXT: CF_END
194+
; CM-NEXT: ALU clause starting at 6:
195+
; CM-NEXT: AND_INT T0.Z, KC0[6].Y, literal.x,
196+
; CM-NEXT: AND_INT * T0.W, KC0[6].W, literal.x,
197+
; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
198+
; CM-NEXT: SETGT_INT T0.X, PV.W, literal.x,
199+
; CM-NEXT: AND_INT T0.Y, KC0[6].Z, literal.y,
200+
; CM-NEXT: AND_INT T1.Z, KC0[7].Y, literal.y,
201+
; CM-NEXT: AND_INT * T0.W, KC0[7].X, literal.y,
202+
; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
203+
; CM-NEXT: SETGT_INT T1.X, PV.W, literal.x,
204+
; CM-NEXT: AND_INT T1.Y, KC0[7].Z, literal.y,
205+
; CM-NEXT: AND_INT T2.Z, KC0[8].X, literal.y,
206+
; CM-NEXT: AND_INT * T0.W, KC0[7].W, literal.y,
207+
; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
208+
; CM-NEXT: SETGT_INT T2.X, PV.W, literal.x,
209+
; CM-NEXT: SETGT_INT T2.Y, PV.Z, literal.x,
210+
; CM-NEXT: AND_INT T2.Z, KC0[10].X, literal.y,
211+
; CM-NEXT: AND_INT * T0.W, KC0[8].W, literal.y,
212+
; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
213+
; CM-NEXT: AND_INT T3.X, KC0[8].Y, literal.x,
214+
; CM-NEXT: SETGT_INT T3.Y, PV.W, literal.y,
215+
; CM-NEXT: SETGT_INT T2.Z, PV.Z, literal.y,
216+
; CM-NEXT: AND_INT * T0.W, KC0[9].W, literal.x,
217+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
218+
; CM-NEXT: AND_INT T4.X, KC0[9].Z, literal.x,
219+
; CM-NEXT: SETGT_INT T4.Y, PV.W, literal.y,
220+
; CM-NEXT: AND_INT T3.Z, KC0[9].X, literal.x,
221+
; CM-NEXT: CNDE_INT * T4.W, PV.Z, 1, 0.0,
222+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
223+
; CM-NEXT: SETGT_INT T5.X, PV.Z, literal.x,
224+
; CM-NEXT: AND_INT T5.Y, KC0[9].Y, literal.y,
225+
; CM-NEXT: CNDE_INT T4.Z, PV.Y, 1, 0.0,
226+
; CM-NEXT: SETGT_INT * T0.W, PV.X, literal.x,
227+
; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
228+
; CM-NEXT: AND_INT T6.X, KC0[8].Z, literal.x,
229+
; CM-NEXT: CNDE_INT T4.Y, PV.W, 1, 0.0,
230+
; CM-NEXT: SETGT_INT T2.Z, PV.Y, literal.y,
231+
; CM-NEXT: CNDE_INT * T3.W, PV.X, 1, 0.0,
232+
; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
233+
; CM-NEXT: CNDE_INT T4.X, PV.Z, 1, 0.0,
234+
; CM-NEXT: SETGT_INT T5.Y, PV.X, literal.x,
235+
; CM-NEXT: CNDE_INT T3.Z, T3.Y, 1, 0.0,
236+
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
237+
; CM-NEXT: 2139095039(3.402823e+38), 48(6.726233e-44)
238+
; CM-NEXT: LSHR T5.X, PV.W, literal.x,
239+
; CM-NEXT: CNDE_INT T3.Y, PV.Y, 1, 0.0,
240+
; CM-NEXT: SETGT_INT T2.Z, T3.X, literal.y,
241+
; CM-NEXT: CNDE_INT * T2.W, T2.Y, 1, 0.0,
242+
; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
243+
; CM-NEXT: CNDE_INT T3.X, PV.Z, 1, 0.0,
244+
; CM-NEXT: SETGT_INT T1.Y, T1.Y, literal.x,
245+
; CM-NEXT: CNDE_INT T2.Z, T2.X, 1, 0.0,
246+
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
247+
; CM-NEXT: 2139095039(3.402823e+38), 32(4.484155e-44)
248+
; CM-NEXT: LSHR T6.X, PV.W, literal.x,
249+
; CM-NEXT: CNDE_INT T2.Y, PV.Y, 1, 0.0,
250+
; CM-NEXT: SETGT_INT T1.Z, T1.Z, literal.y,
251+
; CM-NEXT: CNDE_INT * T1.W, T1.X, 1, 0.0,
252+
; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
253+
; CM-NEXT: CNDE_INT T2.X, PV.Z, 1, 0.0,
254+
; CM-NEXT: SETGT_INT T0.Y, T0.Y, literal.x,
255+
; CM-NEXT: CNDE_INT T1.Z, T0.X, 1, 0.0,
256+
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
257+
; CM-NEXT: 2139095039(3.402823e+38), 16(2.242078e-44)
258+
; CM-NEXT: LSHR T0.X, PV.W, literal.x,
259+
; CM-NEXT: CNDE_INT T1.Y, PV.Y, 1, 0.0,
260+
; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y,
261+
; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
262+
; CM-NEXT: CNDE_INT * T1.X, PV.W, 1, 0.0,
263+
; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
264+
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
265+
%result = call <16 x i1> @llvm.is.fpclass.v3f32(<16 x float> %x, i32 504)
266+
%zext = zext <16 x i1> %result to <16 x i32>
267+
store <16 x i32> %zext, ptr addrspace(1) %out, align 64
268+
ret void
269+
}

0 commit comments

Comments
 (0)