Skip to content

Commit 744c005

Browse files
authored
[AArch64][CodeGen] Fix crash when fptrunc returns fp16 with +nofp attr (#81724)
When performing lowering of the fptrunc opcode returning fp16 with the +nofp flag enabled we could trigger a compiler crash. This is because we had no custom lowering implemented. This patch the case in which we need to promote an fp16 return type for fptrunc when the +nofp attr is enabled.
1 parent 4f7ab78 commit 744c005

File tree

3 files changed

+138
-22
lines changed

3 files changed

+138
-22
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -541,10 +541,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
541541
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
542542
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
543543
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
544-
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
544+
if (Subtarget->hasFPARMv8())
545+
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
545546
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
546547
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
547-
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
548+
if (Subtarget->hasFPARMv8())
549+
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
548550
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
549551
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
550552

@@ -947,9 +949,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
947949
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
948950
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
949951

950-
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
951-
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
952-
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
952+
if (Subtarget->hasFPARMv8()) {
953+
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
954+
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
955+
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
956+
}
953957

954958
// Indexed loads and stores are supported.
955959
for (unsigned im = (unsigned)ISD::PRE_INC;
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64 -mattr=-fp-armv8 -o - %s | FileCheck %s
3+
4+
define half @f2h(float %a) {
5+
; CHECK-LABEL: f2h:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
8+
; CHECK-NEXT: .cfi_def_cfa_offset 16
9+
; CHECK-NEXT: .cfi_offset w30, -16
10+
; CHECK-NEXT: bl __gnu_f2h_ieee
11+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
12+
; CHECK-NEXT: ret
13+
entry:
14+
%0 = fptrunc float %a to half
15+
ret half %0
16+
}
17+
18+
define bfloat @f2bfloat(float %a) {
19+
; CHECK-LABEL: f2bfloat:
20+
; CHECK: // %bb.0: // %entry
21+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
22+
; CHECK-NEXT: .cfi_def_cfa_offset 16
23+
; CHECK-NEXT: .cfi_offset w30, -16
24+
; CHECK-NEXT: bl __truncsfbf2
25+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
26+
; CHECK-NEXT: ret
27+
entry:
28+
%0 = fptrunc float %a to bfloat
29+
ret bfloat %0
30+
}
31+

llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll

Lines changed: 98 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -131,26 +131,107 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
131131
ret void
132132
}
133133

134-
; FIXME:
135-
; define half @f16_return(float %arg) #0 {
136-
; %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
137-
; ret half %fptrunc
138-
; }
134+
define half @f16_return(float %arg) #0 {
135+
; NOFP16-LABEL: f16_return:
136+
; NOFP16: // %bb.0:
137+
; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
138+
; NOFP16-NEXT: .cfi_def_cfa_offset 16
139+
; NOFP16-NEXT: .cfi_offset w30, -16
140+
; NOFP16-NEXT: bl __gnu_f2h_ieee
141+
; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
142+
; NOFP16-NEXT: ret
143+
%fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
144+
ret half %fptrunc
145+
}
139146

140-
; define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
141-
; %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
142-
; ret <2 x half> %fptrunc
143-
; }
147+
define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
148+
; NOFP16-LABEL: v2f16_return:
149+
; NOFP16: // %bb.0:
150+
; NOFP16-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
151+
; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
152+
; NOFP16-NEXT: .cfi_def_cfa_offset 32
153+
; NOFP16-NEXT: .cfi_offset w19, -8
154+
; NOFP16-NEXT: .cfi_offset w20, -16
155+
; NOFP16-NEXT: .cfi_offset w30, -32
156+
; NOFP16-NEXT: mov w19, w0
157+
; NOFP16-NEXT: mov w0, w1
158+
; NOFP16-NEXT: bl __gnu_f2h_ieee
159+
; NOFP16-NEXT: mov w20, w0
160+
; NOFP16-NEXT: mov w0, w19
161+
; NOFP16-NEXT: bl __gnu_f2h_ieee
162+
; NOFP16-NEXT: mov w1, w20
163+
; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
164+
; NOFP16-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
165+
; NOFP16-NEXT: ret
166+
%fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
167+
ret <2 x half> %fptrunc
168+
}
144169

145-
; define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
146-
; %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
147-
; ret <3 x half> %fptrunc
148-
; }
170+
define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
171+
; NOFP16-LABEL: v3f16_return:
172+
; NOFP16: // %bb.0:
173+
; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
174+
; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
175+
; NOFP16-NEXT: .cfi_def_cfa_offset 32
176+
; NOFP16-NEXT: .cfi_offset w19, -8
177+
; NOFP16-NEXT: .cfi_offset w20, -16
178+
; NOFP16-NEXT: .cfi_offset w21, -24
179+
; NOFP16-NEXT: .cfi_offset w30, -32
180+
; NOFP16-NEXT: mov w20, w0
181+
; NOFP16-NEXT: mov w0, w2
182+
; NOFP16-NEXT: mov w19, w1
183+
; NOFP16-NEXT: bl __gnu_f2h_ieee
184+
; NOFP16-NEXT: mov w21, w0
185+
; NOFP16-NEXT: mov w0, w19
186+
; NOFP16-NEXT: bl __gnu_f2h_ieee
187+
; NOFP16-NEXT: mov w19, w0
188+
; NOFP16-NEXT: mov w0, w20
189+
; NOFP16-NEXT: bl __gnu_f2h_ieee
190+
; NOFP16-NEXT: mov w1, w19
191+
; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
192+
; NOFP16-NEXT: mov w2, w21
193+
; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
194+
; NOFP16-NEXT: ret
195+
%fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
196+
ret <3 x half> %fptrunc
197+
}
149198

150-
; define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
151-
; %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
152-
; ret <4 x half> %fptrunc
153-
; }
199+
define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
200+
; NOFP16-LABEL: v4f16_return:
201+
; NOFP16: // %bb.0:
202+
; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
203+
; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
204+
; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
205+
; NOFP16-NEXT: .cfi_def_cfa_offset 48
206+
; NOFP16-NEXT: .cfi_offset w19, -8
207+
; NOFP16-NEXT: .cfi_offset w20, -16
208+
; NOFP16-NEXT: .cfi_offset w21, -24
209+
; NOFP16-NEXT: .cfi_offset w22, -32
210+
; NOFP16-NEXT: .cfi_offset w30, -48
211+
; NOFP16-NEXT: mov w21, w0
212+
; NOFP16-NEXT: mov w0, w3
213+
; NOFP16-NEXT: mov w19, w2
214+
; NOFP16-NEXT: mov w20, w1
215+
; NOFP16-NEXT: bl __gnu_f2h_ieee
216+
; NOFP16-NEXT: mov w22, w0
217+
; NOFP16-NEXT: mov w0, w19
218+
; NOFP16-NEXT: bl __gnu_f2h_ieee
219+
; NOFP16-NEXT: mov w19, w0
220+
; NOFP16-NEXT: mov w0, w20
221+
; NOFP16-NEXT: bl __gnu_f2h_ieee
222+
; NOFP16-NEXT: mov w20, w0
223+
; NOFP16-NEXT: mov w0, w21
224+
; NOFP16-NEXT: bl __gnu_f2h_ieee
225+
; NOFP16-NEXT: mov w1, w20
226+
; NOFP16-NEXT: mov w2, w19
227+
; NOFP16-NEXT: mov w3, w22
228+
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
229+
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
230+
; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
231+
; NOFP16-NEXT: ret
232+
%fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
233+
ret <4 x half> %fptrunc
234+
}
154235

155236
; FIXME:
156237
; define void @outgoing_f16_arg(ptr %ptr) #0 {

0 commit comments

Comments
 (0)