Skip to content

Commit 152d4b8

Browse files
authored
[AArch64] Use indexed dup for 128b segmented splat (#144688)
Matches a splat of 128b segments into a wider z register expressed as a concat_vectors sdnode and generate a dup zn.q, zd.q[0] instruction.
1 parent 349f8d6 commit 152d4b8

File tree

2 files changed

+152
-0
lines changed

2 files changed

+152
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29358,6 +29358,16 @@ SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
2935829358
EVT VT = Op.getValueType();
2935929359
EVT SrcVT = SrcOp1.getValueType();
2936029360

29361+
// Match a splat of 128b segments that fit in a single register.
29362+
if (SrcVT.is128BitVector() && all_equal(Op.getNode()->op_values())) {
29363+
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
29364+
SDValue Splat =
29365+
DAG.getNode(AArch64ISD::DUPLANE128, DL, ContainerVT,
29366+
convertToScalableVector(DAG, ContainerVT, SrcOp1),
29367+
DAG.getConstant(0, DL, MVT::i64, /*isTarget=*/true));
29368+
return convertFromScalableVector(DAG, VT, Splat);
29369+
}
29370+
2936129371
if (NumOperands > 2) {
2936229372
SmallVector<SDValue, 4> Ops;
2936329373
EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
3+
4+
;; Patterns that lower to concat_vectors where all incoming operands are the same.
5+
6+
define void @concat_i8q_256(<16 x i8> %data, ptr %addr) #0 {
7+
; CHECK-LABEL: concat_i8q_256:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
10+
; CHECK-NEXT: mov z0.q, q0
11+
; CHECK-NEXT: str z0, [x0]
12+
; CHECK-NEXT: ret
13+
%splat = shufflevector <16 x i8> %data, <16 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
14+
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15+
store <32 x i8> %splat, ptr %addr, align 1
16+
ret void
17+
}
18+
19+
define void @concat_i16q_256(<8 x i16> %data, ptr %addr) #0 {
20+
; CHECK-LABEL: concat_i16q_256:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
23+
; CHECK-NEXT: mov z0.q, q0
24+
; CHECK-NEXT: str z0, [x0]
25+
; CHECK-NEXT: ret
26+
%splat = shufflevector <8 x i16> %data, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
27+
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
28+
store <16 x i16> %splat, ptr %addr, align 1
29+
ret void
30+
}
31+
32+
define void @concat_i32q_256(<4 x i32> %data, ptr %addr) #0 {
33+
; CHECK-LABEL: concat_i32q_256:
34+
; CHECK: // %bb.0:
35+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
36+
; CHECK-NEXT: mov z0.q, q0
37+
; CHECK-NEXT: str z0, [x0]
38+
; CHECK-NEXT: ret
39+
%splat = shufflevector <4 x i32> %data, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
40+
i32 0, i32 1, i32 2, i32 3>
41+
store <8 x i32> %splat, ptr %addr, align 1
42+
ret void
43+
}
44+
45+
define void @concat_i64q_256(<2 x i64> %data, ptr %addr) #0 {
46+
; CHECK-LABEL: concat_i64q_256:
47+
; CHECK: // %bb.0:
48+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
49+
; CHECK-NEXT: mov z0.q, q0
50+
; CHECK-NEXT: str z0, [x0]
51+
; CHECK-NEXT: ret
52+
%splat = shufflevector <2 x i64> %data, <2 x i64> poison, <4 x i32> <i32 0, i32 1,
53+
i32 0, i32 1>
54+
store <4 x i64> %splat, ptr %addr, align 1
55+
ret void
56+
}
57+
58+
define void @concat_f16q_256(<8 x half> %data, ptr %addr) #0 {
59+
; CHECK-LABEL: concat_f16q_256:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
62+
; CHECK-NEXT: mov z0.q, q0
63+
; CHECK-NEXT: str z0, [x0]
64+
; CHECK-NEXT: ret
65+
%splat = shufflevector <8 x half> %data, <8 x half> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
66+
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
67+
store <16 x half> %splat, ptr %addr, align 1
68+
ret void
69+
}
70+
71+
define void @concat_bf16q_256(<8 x bfloat> %data, ptr %addr) #0 {
72+
; CHECK-LABEL: concat_bf16q_256:
73+
; CHECK: // %bb.0:
74+
; CHECK-NEXT: stp q0, q0, [x0]
75+
; CHECK-NEXT: ret
76+
%splat = shufflevector <8 x bfloat> %data, <8 x bfloat> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
77+
i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
78+
store <16 x bfloat> %splat, ptr %addr, align 1
79+
ret void
80+
}
81+
82+
define void @concat_f32q_256(<4 x float> %data, ptr %addr) #0 {
83+
; CHECK-LABEL: concat_f32q_256:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
86+
; CHECK-NEXT: mov z0.q, q0
87+
; CHECK-NEXT: str z0, [x0]
88+
; CHECK-NEXT: ret
89+
%splat = shufflevector <4 x float> %data, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
90+
i32 0, i32 1, i32 2, i32 3>
91+
store <8 x float> %splat, ptr %addr, align 1
92+
ret void
93+
}
94+
95+
define void @concat_f64q_256(<2 x double> %data, ptr %addr) #0 {
96+
; CHECK-LABEL: concat_f64q_256:
97+
; CHECK: // %bb.0:
98+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
99+
; CHECK-NEXT: mov z0.q, q0
100+
; CHECK-NEXT: str z0, [x0]
101+
; CHECK-NEXT: ret
102+
%splat = shufflevector <2 x double> %data, <2 x double> poison, <4 x i32> <i32 0, i32 1,
103+
i32 0, i32 1>
104+
store <4 x double> %splat, ptr %addr, align 1
105+
ret void
106+
}
107+
108+
;; Test a wider vector
109+
110+
define void @concat_i32q_512_with_256_vectors(<4 x i32> %data, ptr %addr) #0 {
111+
; CHECK-LABEL: concat_i32q_512_with_256_vectors:
112+
; CHECK: // %bb.0:
113+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
114+
; CHECK-NEXT: mov z0.q, q0
115+
; CHECK-NEXT: str z0, [x0, #1, mul vl]
116+
; CHECK-NEXT: str z0, [x0]
117+
; CHECK-NEXT: ret
118+
%splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3,
119+
i32 0, i32 1, i32 2, i32 3,
120+
i32 0, i32 1, i32 2, i32 3,
121+
i32 0, i32 1, i32 2, i32 3>
122+
store <16 x i32> %splat, ptr %addr, align 1
123+
ret void
124+
}
125+
126+
define void @concat_i32q_512_with_512_vectors(<4 x i32> %data, ptr %addr) #1 {
127+
; CHECK-LABEL: concat_i32q_512_with_512_vectors:
128+
; CHECK: // %bb.0:
129+
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
130+
; CHECK-NEXT: mov z0.q, q0
131+
; CHECK-NEXT: str z0, [x0]
132+
; CHECK-NEXT: ret
133+
%splat = shufflevector <4 x i32> %data, <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3,
134+
i32 0, i32 1, i32 2, i32 3,
135+
i32 0, i32 1, i32 2, i32 3,
136+
i32 0, i32 1, i32 2, i32 3>
137+
store <16 x i32> %splat, ptr %addr, align 1
138+
ret void
139+
}
140+
141+
attributes #0 = { vscale_range(2,2) "target-features"="+sve,+bf16" }
142+
attributes #1 = { vscale_range(4,4) "target-features"="+sve,+bf16" }

0 commit comments

Comments
 (0)