Skip to content

Commit c8007f9

Browse files
authored
DAG: Fix chain mismanagement in SoftenFloatRes_FP_EXTEND (#74558)
1 parent d24f23e commit c8007f9

File tree

2 files changed

+351
-2
lines changed

2 files changed

+351
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -522,8 +522,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
522522
Op = GetPromotedFloat(Op);
523523
// If the promotion did the FP_EXTEND to the destination type for us,
524524
// there's nothing left to do here.
525-
if (Op.getValueType() == N->getValueType(0))
525+
if (Op.getValueType() == N->getValueType(0)) {
526+
if (IsStrict)
527+
ReplaceValueWith(SDValue(N, 1), Chain);
526528
return BitConvertToInteger(Op);
529+
}
527530
}
528531

529532
// There's only a libcall for f16 -> f32 and shifting is only valid for bf16
@@ -541,8 +544,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
541544
}
542545
}
543546

544-
if (Op.getValueType() == MVT::bf16)
547+
if (Op.getValueType() == MVT::bf16) {
548+
// FIXME: Need ReplaceValueWith on chain in strict case
545549
return SoftenFloatRes_BF16_TO_FP(N);
550+
}
546551

547552
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
548553
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck -check-prefix=NOFP16 %s
3+
4+
declare void @f16_user(half)
5+
declare half @f16_result()
6+
7+
declare void @v2f16_user(<2 x half>)
8+
declare <2 x half> @v2f16_result()
9+
10+
declare void @v4f16_user(<4 x half>)
11+
declare <4 x half> @v4f16_result()
12+
13+
declare void @v8f16_user(<8 x half>)
14+
declare <8 x half> @v8f16_result()
15+
16+
define void @f16_arg(half %arg, ptr %ptr) #0 {
17+
; NOFP16-LABEL: f16_arg:
18+
; NOFP16: // %bb.0:
19+
; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
20+
; NOFP16-NEXT: .cfi_def_cfa_offset 16
21+
; NOFP16-NEXT: .cfi_offset w19, -8
22+
; NOFP16-NEXT: .cfi_offset w30, -16
23+
; NOFP16-NEXT: and w0, w0, #0xffff
24+
; NOFP16-NEXT: mov x19, x1
25+
; NOFP16-NEXT: bl __gnu_h2f_ieee
26+
; NOFP16-NEXT: str w0, [x19]
27+
; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
28+
; NOFP16-NEXT: ret
29+
%fpext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
30+
store float %fpext, ptr %ptr
31+
ret void
32+
}
33+
34+
define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 {
35+
; NOFP16-LABEL: v2f16_arg:
36+
; NOFP16: // %bb.0:
37+
; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
38+
; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
39+
; NOFP16-NEXT: .cfi_def_cfa_offset 32
40+
; NOFP16-NEXT: .cfi_offset w19, -8
41+
; NOFP16-NEXT: .cfi_offset w20, -16
42+
; NOFP16-NEXT: .cfi_offset w21, -24
43+
; NOFP16-NEXT: .cfi_offset w30, -32
44+
; NOFP16-NEXT: and w0, w0, #0xffff
45+
; NOFP16-NEXT: mov x19, x2
46+
; NOFP16-NEXT: mov w20, w1
47+
; NOFP16-NEXT: bl __gnu_h2f_ieee
48+
; NOFP16-NEXT: mov w21, w0
49+
; NOFP16-NEXT: and w0, w20, #0xffff
50+
; NOFP16-NEXT: bl __gnu_h2f_ieee
51+
; NOFP16-NEXT: stp w21, w0, [x19]
52+
; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
53+
; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
54+
; NOFP16-NEXT: ret
55+
%fpext = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
56+
store <2 x float> %fpext, ptr %ptr
57+
ret void
58+
}
59+
60+
define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
61+
; NOFP16-LABEL: v3f16_arg:
62+
; NOFP16: // %bb.0:
63+
; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
64+
; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
65+
; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
66+
; NOFP16-NEXT: .cfi_def_cfa_offset 48
67+
; NOFP16-NEXT: .cfi_offset w19, -8
68+
; NOFP16-NEXT: .cfi_offset w20, -16
69+
; NOFP16-NEXT: .cfi_offset w21, -24
70+
; NOFP16-NEXT: .cfi_offset w22, -32
71+
; NOFP16-NEXT: .cfi_offset w30, -48
72+
; NOFP16-NEXT: mov w21, w0
73+
; NOFP16-NEXT: and w0, w2, #0xffff
74+
; NOFP16-NEXT: mov x19, x3
75+
; NOFP16-NEXT: mov w20, w1
76+
; NOFP16-NEXT: bl __gnu_h2f_ieee
77+
; NOFP16-NEXT: mov w22, w0
78+
; NOFP16-NEXT: and w0, w21, #0xffff
79+
; NOFP16-NEXT: bl __gnu_h2f_ieee
80+
; NOFP16-NEXT: mov w21, w0
81+
; NOFP16-NEXT: and w0, w20, #0xffff
82+
; NOFP16-NEXT: bl __gnu_h2f_ieee
83+
; NOFP16-NEXT: mov w8, w21
84+
; NOFP16-NEXT: // kill: def $w0 killed $w0 def $x0
85+
; NOFP16-NEXT: str w22, [x19, #8]
86+
; NOFP16-NEXT: orr x8, x8, x0, lsl #32
87+
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
88+
; NOFP16-NEXT: str x8, [x19]
89+
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
90+
; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
91+
; NOFP16-NEXT: ret
92+
%fpext = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
93+
store <3 x float> %fpext, ptr %ptr
94+
ret void
95+
}
96+
97+
define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
98+
; NOFP16-LABEL: v4f16_arg:
99+
; NOFP16: // %bb.0:
100+
; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
101+
; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
102+
; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
103+
; NOFP16-NEXT: .cfi_def_cfa_offset 48
104+
; NOFP16-NEXT: .cfi_offset w19, -8
105+
; NOFP16-NEXT: .cfi_offset w20, -16
106+
; NOFP16-NEXT: .cfi_offset w21, -24
107+
; NOFP16-NEXT: .cfi_offset w22, -32
108+
; NOFP16-NEXT: .cfi_offset w23, -40
109+
; NOFP16-NEXT: .cfi_offset w30, -48
110+
; NOFP16-NEXT: and w0, w0, #0xffff
111+
; NOFP16-NEXT: mov x19, x4
112+
; NOFP16-NEXT: mov w20, w3
113+
; NOFP16-NEXT: mov w21, w2
114+
; NOFP16-NEXT: mov w22, w1
115+
; NOFP16-NEXT: bl __gnu_h2f_ieee
116+
; NOFP16-NEXT: mov w23, w0
117+
; NOFP16-NEXT: and w0, w22, #0xffff
118+
; NOFP16-NEXT: bl __gnu_h2f_ieee
119+
; NOFP16-NEXT: mov w22, w0
120+
; NOFP16-NEXT: and w0, w21, #0xffff
121+
; NOFP16-NEXT: bl __gnu_h2f_ieee
122+
; NOFP16-NEXT: mov w21, w0
123+
; NOFP16-NEXT: and w0, w20, #0xffff
124+
; NOFP16-NEXT: bl __gnu_h2f_ieee
125+
; NOFP16-NEXT: stp w21, w0, [x19, #8]
126+
; NOFP16-NEXT: stp w23, w22, [x19]
127+
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
128+
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
129+
; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
130+
; NOFP16-NEXT: ret
131+
%fpext = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %arg, metadata !"fpexcept.strict")
132+
store <4 x float> %fpext, ptr %ptr
133+
ret void
134+
}
135+
136+
; FIXME:
137+
; define half @f16_return(float %arg) #0 {
138+
; %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
139+
; ret half %fptrunc
140+
; }
141+
142+
; define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
143+
; %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
144+
; ret <2 x half> %fptrunc
145+
; }
146+
147+
; define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
148+
; %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
149+
; ret <3 x half> %fptrunc
150+
; }
151+
152+
; define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
153+
; %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
154+
; ret <4 x half> %fptrunc
155+
; }
156+
157+
; FIXME:
158+
; define void @outgoing_f16_arg(ptr %ptr) #0 {
159+
; %val = load half, ptr %ptr
160+
; call void @f16_user(half %val)
161+
; ret void
162+
; }
163+
164+
; define void @outgoing_v2f16_arg(ptr %ptr) #0 {
165+
; %val = load <2 x half>, ptr %ptr
166+
; call void @v2f16_user(<2 x half> %val)
167+
; ret void
168+
; }
169+
170+
; define void @outgoing_f16_return(ptr %ptr) #0 {
171+
; %val = call half @f16_result()
172+
; store half %val, ptr %ptr
173+
; ret void
174+
; }
175+
176+
; define void @outgoing_v2f16_return(ptr %ptr) #0 {
177+
; %val = call <2 x half> @v2f16_result()
178+
; store <2 x half> %val, ptr %ptr
179+
; ret void
180+
; }
181+
182+
define void @outgoing_v4f16_return(ptr %ptr) #0 {
183+
; NOFP16-LABEL: outgoing_v4f16_return:
184+
; NOFP16: // %bb.0:
185+
; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
186+
; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
187+
; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
188+
; NOFP16-NEXT: .cfi_def_cfa_offset 48
189+
; NOFP16-NEXT: .cfi_offset w19, -8
190+
; NOFP16-NEXT: .cfi_offset w20, -16
191+
; NOFP16-NEXT: .cfi_offset w21, -24
192+
; NOFP16-NEXT: .cfi_offset w22, -32
193+
; NOFP16-NEXT: .cfi_offset w23, -40
194+
; NOFP16-NEXT: .cfi_offset w30, -48
195+
; NOFP16-NEXT: mov x19, x0
196+
; NOFP16-NEXT: bl v4f16_result
197+
; NOFP16-NEXT: and w0, w0, #0xffff
198+
; NOFP16-NEXT: mov w20, w1
199+
; NOFP16-NEXT: mov w21, w2
200+
; NOFP16-NEXT: mov w22, w3
201+
; NOFP16-NEXT: bl __gnu_h2f_ieee
202+
; NOFP16-NEXT: mov w23, w0
203+
; NOFP16-NEXT: and w0, w20, #0xffff
204+
; NOFP16-NEXT: bl __gnu_h2f_ieee
205+
; NOFP16-NEXT: mov w20, w0
206+
; NOFP16-NEXT: and w0, w21, #0xffff
207+
; NOFP16-NEXT: bl __gnu_h2f_ieee
208+
; NOFP16-NEXT: mov w21, w0
209+
; NOFP16-NEXT: and w0, w22, #0xffff
210+
; NOFP16-NEXT: bl __gnu_h2f_ieee
211+
; NOFP16-NEXT: bl __gnu_f2h_ieee
212+
; NOFP16-NEXT: strh w0, [x19, #6]
213+
; NOFP16-NEXT: mov w0, w21
214+
; NOFP16-NEXT: bl __gnu_f2h_ieee
215+
; NOFP16-NEXT: strh w0, [x19, #4]
216+
; NOFP16-NEXT: mov w0, w20
217+
; NOFP16-NEXT: bl __gnu_f2h_ieee
218+
; NOFP16-NEXT: strh w0, [x19, #2]
219+
; NOFP16-NEXT: mov w0, w23
220+
; NOFP16-NEXT: bl __gnu_f2h_ieee
221+
; NOFP16-NEXT: strh w0, [x19]
222+
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
223+
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
224+
; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
225+
; NOFP16-NEXT: ret
226+
%val = call <4 x half> @v4f16_result()
227+
store <4 x half> %val, ptr %ptr
228+
ret void
229+
}
230+
231+
define void @outgoing_v8f16_return(ptr %ptr) #0 {
232+
; NOFP16-LABEL: outgoing_v8f16_return:
233+
; NOFP16: // %bb.0:
234+
; NOFP16-NEXT: stp x30, x27, [sp, #-80]! // 16-byte Folded Spill
235+
; NOFP16-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
236+
; NOFP16-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
237+
; NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
238+
; NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
239+
; NOFP16-NEXT: .cfi_def_cfa_offset 80
240+
; NOFP16-NEXT: .cfi_offset w19, -8
241+
; NOFP16-NEXT: .cfi_offset w20, -16
242+
; NOFP16-NEXT: .cfi_offset w21, -24
243+
; NOFP16-NEXT: .cfi_offset w22, -32
244+
; NOFP16-NEXT: .cfi_offset w23, -40
245+
; NOFP16-NEXT: .cfi_offset w24, -48
246+
; NOFP16-NEXT: .cfi_offset w25, -56
247+
; NOFP16-NEXT: .cfi_offset w26, -64
248+
; NOFP16-NEXT: .cfi_offset w27, -72
249+
; NOFP16-NEXT: .cfi_offset w30, -80
250+
; NOFP16-NEXT: mov x19, x0
251+
; NOFP16-NEXT: bl v8f16_result
252+
; NOFP16-NEXT: and w0, w0, #0xffff
253+
; NOFP16-NEXT: mov w21, w1
254+
; NOFP16-NEXT: mov w22, w2
255+
; NOFP16-NEXT: mov w23, w3
256+
; NOFP16-NEXT: mov w24, w4
257+
; NOFP16-NEXT: mov w25, w5
258+
; NOFP16-NEXT: mov w26, w6
259+
; NOFP16-NEXT: mov w27, w7
260+
; NOFP16-NEXT: bl __gnu_h2f_ieee
261+
; NOFP16-NEXT: mov w20, w0
262+
; NOFP16-NEXT: and w0, w21, #0xffff
263+
; NOFP16-NEXT: bl __gnu_h2f_ieee
264+
; NOFP16-NEXT: mov w21, w0
265+
; NOFP16-NEXT: and w0, w22, #0xffff
266+
; NOFP16-NEXT: bl __gnu_h2f_ieee
267+
; NOFP16-NEXT: mov w22, w0
268+
; NOFP16-NEXT: and w0, w23, #0xffff
269+
; NOFP16-NEXT: bl __gnu_h2f_ieee
270+
; NOFP16-NEXT: mov w23, w0
271+
; NOFP16-NEXT: and w0, w24, #0xffff
272+
; NOFP16-NEXT: bl __gnu_h2f_ieee
273+
; NOFP16-NEXT: mov w24, w0
274+
; NOFP16-NEXT: and w0, w25, #0xffff
275+
; NOFP16-NEXT: bl __gnu_h2f_ieee
276+
; NOFP16-NEXT: mov w25, w0
277+
; NOFP16-NEXT: and w0, w26, #0xffff
278+
; NOFP16-NEXT: bl __gnu_h2f_ieee
279+
; NOFP16-NEXT: mov w26, w0
280+
; NOFP16-NEXT: and w0, w27, #0xffff
281+
; NOFP16-NEXT: bl __gnu_h2f_ieee
282+
; NOFP16-NEXT: bl __gnu_f2h_ieee
283+
; NOFP16-NEXT: strh w0, [x19, #14]
284+
; NOFP16-NEXT: mov w0, w26
285+
; NOFP16-NEXT: bl __gnu_f2h_ieee
286+
; NOFP16-NEXT: strh w0, [x19, #12]
287+
; NOFP16-NEXT: mov w0, w25
288+
; NOFP16-NEXT: bl __gnu_f2h_ieee
289+
; NOFP16-NEXT: strh w0, [x19, #10]
290+
; NOFP16-NEXT: mov w0, w24
291+
; NOFP16-NEXT: bl __gnu_f2h_ieee
292+
; NOFP16-NEXT: strh w0, [x19, #8]
293+
; NOFP16-NEXT: mov w0, w23
294+
; NOFP16-NEXT: bl __gnu_f2h_ieee
295+
; NOFP16-NEXT: strh w0, [x19, #6]
296+
; NOFP16-NEXT: mov w0, w22
297+
; NOFP16-NEXT: bl __gnu_f2h_ieee
298+
; NOFP16-NEXT: strh w0, [x19, #4]
299+
; NOFP16-NEXT: mov w0, w21
300+
; NOFP16-NEXT: bl __gnu_f2h_ieee
301+
; NOFP16-NEXT: strh w0, [x19, #2]
302+
; NOFP16-NEXT: mov w0, w20
303+
; NOFP16-NEXT: bl __gnu_f2h_ieee
304+
; NOFP16-NEXT: strh w0, [x19]
305+
; NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
306+
; NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
307+
; NOFP16-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
308+
; NOFP16-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
309+
; NOFP16-NEXT: ldp x30, x27, [sp], #80 // 16-byte Folded Reload
310+
; NOFP16-NEXT: ret
311+
%val = call <8 x half> @v8f16_result()
312+
store <8 x half> %val, ptr %ptr
313+
ret void
314+
}
315+
316+
define half @call_split_type_used_outside_block_v8f16() #0 {
317+
; NOFP16-LABEL: call_split_type_used_outside_block_v8f16:
318+
; NOFP16: // %bb.0: // %bb0
319+
; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
320+
; NOFP16-NEXT: .cfi_def_cfa_offset 16
321+
; NOFP16-NEXT: .cfi_offset w30, -16
322+
; NOFP16-NEXT: bl v8f16_result
323+
; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
324+
; NOFP16-NEXT: ret
325+
bb0:
326+
%split.ret.type = call <8 x half> @v8f16_result()
327+
br label %bb1
328+
329+
bb1:
330+
%extract = extractelement <8 x half> %split.ret.type, i32 0
331+
ret half %extract
332+
}
333+
334+
declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0
335+
declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0
336+
declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0
337+
declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0
338+
339+
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0
340+
declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0
341+
declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0
342+
declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0
343+
344+
attributes #0 = { strictfp }

0 commit comments

Comments
 (0)