Skip to content

Commit 11b4dce

Browse files
[SVE] Lower fixed-length floating point loads and stores to integer variants.
There's no advatange to emitting floating point scalable accesses, whereas by lowering them to integer variants we can benefit from several combines that seek to replace explicit extends/truncates with extending/truncating accesses. Differential Revision: https://reviews.llvm.org/D132393
1 parent dc477a8 commit 11b4dce

File tree

4 files changed

+89
-139
lines changed

4 files changed

+89
-139
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21259,7 +21259,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
2125921259

2126021260
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
2126121261

21262-
if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
21262+
if (VT.isFloatingPoint()) {
2126321263
LoadVT = ContainerVT.changeTypeToInteger();
2126421264
MemVT = MemVT.changeTypeToInteger();
2126521265
}
@@ -21277,6 +21277,8 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
2127721277
Result = getSVESafeBitCast(ExtendVT, Result, DAG);
2127821278
Result = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
2127921279
Pg, Result, DAG.getUNDEF(ContainerVT));
21280+
} else if (VT.isFloatingPoint()) {
21281+
Result = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Result);
2128021282
}
2128121283

2128221284
Result = convertFromScalableVector(DAG, VT, Result);
@@ -21367,6 +21369,10 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
2136721369
DAG.getUNDEF(TruncVT));
2136821370
NewValue =
2136921371
getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
21372+
} else if (VT.isFloatingPoint()) {
21373+
MemVT = MemVT.changeTypeToInteger();
21374+
NewValue =
21375+
getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
2137021376
}
2137121377

2137221378
return DAG.getMaskedStore(Store->getChain(), DL, NewValue,

llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll

Lines changed: 16 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,8 @@ define void @fcvtzu_v16f16_v16i32(<16 x half>* %a, <16 x i32>* %b) #0 {
161161
;
162162
; VBITS_GE_512-LABEL: fcvtzu_v16f16_v16i32:
163163
; VBITS_GE_512: // %bb.0:
164-
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
165-
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
166164
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
167-
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
165+
; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
168166
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.h
169167
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
170168
; VBITS_GE_512-NEXT: ret
@@ -177,10 +175,8 @@ define void @fcvtzu_v16f16_v16i32(<16 x half>* %a, <16 x i32>* %b) #0 {
177175
define void @fcvtzu_v32f16_v32i32(<32 x half>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
178176
; CHECK-LABEL: fcvtzu_v32f16_v32i32:
179177
; CHECK: // %bb.0:
180-
; CHECK-NEXT: ptrue p0.h, vl32
181-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
182178
; CHECK-NEXT: ptrue p0.s, vl32
183-
; CHECK-NEXT: uunpklo z0.s, z0.h
179+
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
184180
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
185181
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
186182
; CHECK-NEXT: ret
@@ -193,10 +189,8 @@ define void @fcvtzu_v32f16_v32i32(<32 x half>* %a, <32 x i32>* %b) vscale_range(
193189
define void @fcvtzu_v64f16_v64i32(<64 x half>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
194190
; CHECK-LABEL: fcvtzu_v64f16_v64i32:
195191
; CHECK: // %bb.0:
196-
; CHECK-NEXT: ptrue p0.h, vl64
197-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
198192
; CHECK-NEXT: ptrue p0.s, vl64
199-
; CHECK-NEXT: uunpklo z0.s, z0.h
193+
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
200194
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
201195
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
202196
; CHECK-NEXT: ret
@@ -287,11 +281,8 @@ define void @fcvtzu_v8f16_v8i64(<8 x half>* %a, <8 x i64>* %b) #0 {
287281
define void @fcvtzu_v16f16_v16i64(<16 x half>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
288282
; CHECK-LABEL: fcvtzu_v16f16_v16i64:
289283
; CHECK: // %bb.0:
290-
; CHECK-NEXT: ptrue p0.h, vl16
291-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
292284
; CHECK-NEXT: ptrue p0.d, vl16
293-
; CHECK-NEXT: uunpklo z0.s, z0.h
294-
; CHECK-NEXT: uunpklo z0.d, z0.s
285+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
295286
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
296287
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
297288
; CHECK-NEXT: ret
@@ -304,11 +295,8 @@ define void @fcvtzu_v16f16_v16i64(<16 x half>* %a, <16 x i64>* %b) vscale_range(
304295
define void @fcvtzu_v32f16_v32i64(<32 x half>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
305296
; CHECK-LABEL: fcvtzu_v32f16_v32i64:
306297
; CHECK: // %bb.0:
307-
; CHECK-NEXT: ptrue p0.h, vl32
308-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
309298
; CHECK-NEXT: ptrue p0.d, vl32
310-
; CHECK-NEXT: uunpklo z0.s, z0.h
311-
; CHECK-NEXT: uunpklo z0.d, z0.s
299+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
312300
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
313301
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
314302
; CHECK-NEXT: ret
@@ -579,10 +567,8 @@ define void @fcvtzu_v8f32_v8i64(<8 x float>* %a, <8 x i64>* %b) #0 {
579567
;
580568
; VBITS_GE_512-LABEL: fcvtzu_v8f32_v8i64:
581569
; VBITS_GE_512: // %bb.0:
582-
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
583-
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
584570
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
585-
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
571+
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
586572
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.s
587573
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
588574
; VBITS_GE_512-NEXT: ret
@@ -595,10 +581,8 @@ define void @fcvtzu_v8f32_v8i64(<8 x float>* %a, <8 x i64>* %b) #0 {
595581
define void @fcvtzu_v16f32_v16i64(<16 x float>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
596582
; CHECK-LABEL: fcvtzu_v16f32_v16i64:
597583
; CHECK: // %bb.0:
598-
; CHECK-NEXT: ptrue p0.s, vl16
599-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
600584
; CHECK-NEXT: ptrue p0.d, vl16
601-
; CHECK-NEXT: uunpklo z0.d, z0.s
585+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
602586
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
603587
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
604588
; CHECK-NEXT: ret
@@ -611,10 +595,8 @@ define void @fcvtzu_v16f32_v16i64(<16 x float>* %a, <16 x i64>* %b) vscale_range
611595
define void @fcvtzu_v32f32_v32i64(<32 x float>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
612596
; CHECK-LABEL: fcvtzu_v32f32_v32i64:
613597
; CHECK: // %bb.0:
614-
; CHECK-NEXT: ptrue p0.s, vl32
615-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
616598
; CHECK-NEXT: ptrue p0.d, vl32
617-
; CHECK-NEXT: uunpklo z0.d, z0.s
599+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
618600
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
619601
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
620602
; CHECK-NEXT: ret
@@ -1087,10 +1069,8 @@ define void @fcvtzs_v16f16_v16i32(<16 x half>* %a, <16 x i32>* %b) #0 {
10871069
;
10881070
; VBITS_GE_512-LABEL: fcvtzs_v16f16_v16i32:
10891071
; VBITS_GE_512: // %bb.0:
1090-
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
1091-
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
10921072
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1093-
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
1073+
; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
10941074
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.h
10951075
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
10961076
; VBITS_GE_512-NEXT: ret
@@ -1103,10 +1083,8 @@ define void @fcvtzs_v16f16_v16i32(<16 x half>* %a, <16 x i32>* %b) #0 {
11031083
define void @fcvtzs_v32f16_v32i32(<32 x half>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
11041084
; CHECK-LABEL: fcvtzs_v32f16_v32i32:
11051085
; CHECK: // %bb.0:
1106-
; CHECK-NEXT: ptrue p0.h, vl32
1107-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
11081086
; CHECK-NEXT: ptrue p0.s, vl32
1109-
; CHECK-NEXT: uunpklo z0.s, z0.h
1087+
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
11101088
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
11111089
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
11121090
; CHECK-NEXT: ret
@@ -1119,10 +1097,8 @@ define void @fcvtzs_v32f16_v32i32(<32 x half>* %a, <32 x i32>* %b) vscale_range(
11191097
define void @fcvtzs_v64f16_v64i32(<64 x half>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
11201098
; CHECK-LABEL: fcvtzs_v64f16_v64i32:
11211099
; CHECK: // %bb.0:
1122-
; CHECK-NEXT: ptrue p0.h, vl64
1123-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
11241100
; CHECK-NEXT: ptrue p0.s, vl64
1125-
; CHECK-NEXT: uunpklo z0.s, z0.h
1101+
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
11261102
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
11271103
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
11281104
; CHECK-NEXT: ret
@@ -1213,11 +1189,8 @@ define void @fcvtzs_v8f16_v8i64(<8 x half>* %a, <8 x i64>* %b) #0 {
12131189
define void @fcvtzs_v16f16_v16i64(<16 x half>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
12141190
; CHECK-LABEL: fcvtzs_v16f16_v16i64:
12151191
; CHECK: // %bb.0:
1216-
; CHECK-NEXT: ptrue p0.h, vl16
1217-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
12181192
; CHECK-NEXT: ptrue p0.d, vl16
1219-
; CHECK-NEXT: uunpklo z0.s, z0.h
1220-
; CHECK-NEXT: uunpklo z0.d, z0.s
1193+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
12211194
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
12221195
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
12231196
; CHECK-NEXT: ret
@@ -1230,11 +1203,8 @@ define void @fcvtzs_v16f16_v16i64(<16 x half>* %a, <16 x i64>* %b) vscale_range(
12301203
define void @fcvtzs_v32f16_v32i64(<32 x half>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
12311204
; CHECK-LABEL: fcvtzs_v32f16_v32i64:
12321205
; CHECK: // %bb.0:
1233-
; CHECK-NEXT: ptrue p0.h, vl32
1234-
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
12351206
; CHECK-NEXT: ptrue p0.d, vl32
1236-
; CHECK-NEXT: uunpklo z0.s, z0.h
1237-
; CHECK-NEXT: uunpklo z0.d, z0.s
1207+
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
12381208
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
12391209
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
12401210
; CHECK-NEXT: ret
@@ -1505,10 +1475,8 @@ define void @fcvtzs_v8f32_v8i64(<8 x float>* %a, <8 x i64>* %b) #0 {
15051475
;
15061476
; VBITS_GE_512-LABEL: fcvtzs_v8f32_v8i64:
15071477
; VBITS_GE_512: // %bb.0:
1508-
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
1509-
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
15101478
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1511-
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
1479+
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
15121480
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.s
15131481
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
15141482
; VBITS_GE_512-NEXT: ret
@@ -1521,10 +1489,8 @@ define void @fcvtzs_v8f32_v8i64(<8 x float>* %a, <8 x i64>* %b) #0 {
15211489
define void @fcvtzs_v16f32_v16i64(<16 x float>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
15221490
; CHECK-LABEL: fcvtzs_v16f32_v16i64:
15231491
; CHECK: // %bb.0:
1524-
; CHECK-NEXT: ptrue p0.s, vl16
1525-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
15261492
; CHECK-NEXT: ptrue p0.d, vl16
1527-
; CHECK-NEXT: uunpklo z0.d, z0.s
1493+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
15281494
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
15291495
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
15301496
; CHECK-NEXT: ret
@@ -1537,10 +1503,8 @@ define void @fcvtzs_v16f32_v16i64(<16 x float>* %a, <16 x i64>* %b) vscale_range
15371503
define void @fcvtzs_v32f32_v32i64(<32 x float>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
15381504
; CHECK-LABEL: fcvtzs_v32f32_v32i64:
15391505
; CHECK: // %bb.0:
1540-
; CHECK-NEXT: ptrue p0.s, vl32
1541-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
15421506
; CHECK-NEXT: ptrue p0.d, vl32
1543-
; CHECK-NEXT: uunpklo z0.d, z0.s
1507+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
15441508
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
15451509
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
15461510
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -696,10 +696,9 @@ define void @ucvtf_v16i64_v16f16(<16 x i64>* %a, <16 x half>* %b) vscale_range(8
696696
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
697697
; CHECK-NEXT: ptrue p0.d
698698
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
699-
; CHECK-NEXT: ptrue p0.h, vl16
699+
; CHECK-NEXT: ptrue p0.s, vl16
700700
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
701-
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
702-
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
701+
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
703702
; CHECK-NEXT: ret
704703
%op1 = load <16 x i64>, <16 x i64>* %a
705704
%res = uitofp <16 x i64> %op1 to <16 x half>
@@ -714,10 +713,9 @@ define void @ucvtf_v32i64_v32f16(<32 x i64>* %a, <32 x half>* %b) vscale_range(1
714713
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
715714
; CHECK-NEXT: ptrue p0.d
716715
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
717-
; CHECK-NEXT: ptrue p0.h, vl32
716+
; CHECK-NEXT: ptrue p0.s, vl32
718717
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
719-
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
720-
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
718+
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
721719
; CHECK-NEXT: ret
722720
%op1 = load <32 x i64>, <32 x i64>* %a
723721
%res = uitofp <32 x i64> %op1 to <32 x half>
@@ -1638,10 +1636,9 @@ define void @scvtf_v16i64_v16f16(<16 x i64>* %a, <16 x half>* %b) vscale_range(8
16381636
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
16391637
; CHECK-NEXT: ptrue p0.d
16401638
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
1641-
; CHECK-NEXT: ptrue p0.h, vl16
1639+
; CHECK-NEXT: ptrue p0.s, vl16
16421640
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1643-
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1644-
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
1641+
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
16451642
; CHECK-NEXT: ret
16461643
%op1 = load <16 x i64>, <16 x i64>* %a
16471644
%res = sitofp <16 x i64> %op1 to <16 x half>
@@ -1656,10 +1653,9 @@ define void @scvtf_v32i64_v32f16(<32 x i64>* %a, <32 x half>* %b) vscale_range(1
16561653
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
16571654
; CHECK-NEXT: ptrue p0.d
16581655
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
1659-
; CHECK-NEXT: ptrue p0.h, vl32
1656+
; CHECK-NEXT: ptrue p0.s, vl32
16601657
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
1661-
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
1662-
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
1658+
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
16631659
; CHECK-NEXT: ret
16641660
%op1 = load <32 x i64>, <32 x i64>* %a
16651661
%res = sitofp <32 x i64> %op1 to <32 x half>

0 commit comments

Comments
 (0)