Skip to content

Commit 1206313

Browse files
committed
[CodeGen][AArch64] Fix isel crash for truncating FP stores
When attempting to truncate a FP vector and store the result out to memory we crashed because we had no pattern for truncating FP stores. In fact, we don't support these types of stores and the correct fix is to stop marking these truncating stores as legal. Tests have been added here: CodeGen/AArch64/sve-fptrunc-store.ll Differential Revision: https://reviews.llvm.org/D100025
1 parent 5299843 commit 1206313

File tree

2 files changed

+69
-0
lines changed

2 files changed

+69
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11801180

11811181
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
11821182
MVT::nxv4f32, MVT::nxv2f64}) {
1183+
for (auto InnerVT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16,
1184+
MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) {
1185+
// Avoid marking truncating FP stores as legal to prevent the
1186+
// DAGCombiner from creating unsupported truncating stores.
1187+
setTruncStoreAction(VT, InnerVT, Expand);
1188+
}
1189+
11831190
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
11841191
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
11851192
setOperationAction(ISD::MGATHER, VT, Custom);
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
3+
4+
define void @fptrunc2_f64_f32(<vscale x 2 x float> *%dst, <vscale x 2 x double> *%src) {
5+
; CHECK-LABEL: fptrunc2_f64_f32:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: ptrue p0.d
8+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
9+
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
10+
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
11+
; CHECK-NEXT: ret
12+
entry:
13+
%0 = load <vscale x 2 x double>, <vscale x 2 x double>* %src, align 8
14+
%1 = fptrunc <vscale x 2 x double> %0 to <vscale x 2 x float>
15+
store <vscale x 2 x float> %1, <vscale x 2 x float>* %dst, align 4
16+
ret void
17+
}
18+
19+
define void @fptrunc2_f64_f16(<vscale x 2 x half> *%dst, <vscale x 2 x double> *%src) {
20+
; CHECK-LABEL: fptrunc2_f64_f16:
21+
; CHECK: // %bb.0: // %entry
22+
; CHECK-NEXT: ptrue p0.d
23+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
24+
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
25+
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
26+
; CHECK-NEXT: ret
27+
entry:
28+
%0 = load <vscale x 2 x double>, <vscale x 2 x double>* %src, align 8
29+
%1 = fptrunc <vscale x 2 x double> %0 to <vscale x 2 x half>
30+
store <vscale x 2 x half> %1, <vscale x 2 x half>* %dst, align 2
31+
ret void
32+
}
33+
34+
define void @fptrunc4_f32_f16(<vscale x 4 x half> *%dst, <vscale x 4 x float> *%src) {
35+
; CHECK-LABEL: fptrunc4_f32_f16:
36+
; CHECK: // %bb.0: // %entry
37+
; CHECK-NEXT: ptrue p0.s
38+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1]
39+
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
40+
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
41+
; CHECK-NEXT: ret
42+
entry:
43+
%0 = load <vscale x 4 x float>, <vscale x 4 x float>* %src, align 8
44+
%1 = fptrunc <vscale x 4 x float> %0 to <vscale x 4 x half>
45+
store <vscale x 4 x half> %1, <vscale x 4 x half>* %dst, align 2
46+
ret void
47+
}
48+
49+
define void @fptrunc2_f32_f16(<vscale x 2 x half> *%dst, <vscale x 2 x float> *%src) {
50+
; CHECK-LABEL: fptrunc2_f32_f16:
51+
; CHECK: // %bb.0: // %entry
52+
; CHECK-NEXT: ptrue p0.d
53+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1]
54+
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
55+
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
56+
; CHECK-NEXT: ret
57+
entry:
58+
%0 = load <vscale x 2 x float>, <vscale x 2 x float>* %src, align 8
59+
%1 = fptrunc <vscale x 2 x float> %0 to <vscale x 2 x half>
60+
store <vscale x 2 x half> %1, <vscale x 2 x half>* %dst, align 2
61+
ret void
62+
}

0 commit comments

Comments
 (0)