Skip to content

Commit 4b35624

Browse files
[AArch64] Add SVE lowering of fixed-length UABD/SABD (#104991)
1 parent b765fdd commit 4b35624

File tree

3 files changed

+477
-0
lines changed

3 files changed

+477
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2055,6 +2055,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
20552055
bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
20562056

20572057
// Lower fixed length vector operations to scalable equivalents.
2058+
setOperationAction(ISD::ABDS, VT, Default);
2059+
setOperationAction(ISD::ABDU, VT, Default);
20582060
setOperationAction(ISD::ABS, VT, Default);
20592061
setOperationAction(ISD::ADD, VT, Default);
20602062
setOperationAction(ISD::AND, VT, Default);
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3+
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4+
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5+
6+
target triple = "aarch64-unknown-linux-gnu"
7+
8+
; Don't use SVE for 128-bit vectors.
9+
define void @sabd_v16i8_v16i16(ptr %a, ptr %b) #0 {
10+
; CHECK-LABEL: sabd_v16i8_v16i16:
11+
; CHECK: // %bb.0:
12+
; CHECK-NEXT: ldr q0, [x0]
13+
; CHECK-NEXT: ldr q1, [x1]
14+
; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b
15+
; CHECK-NEXT: str q0, [x0]
16+
; CHECK-NEXT: ret
17+
%a.ld = load <16 x i8>, ptr %a
18+
%b.ld = load <16 x i8>, ptr %b
19+
%a.sext = sext <16 x i8> %a.ld to <16 x i16>
20+
%b.sext = sext <16 x i8> %b.ld to <16 x i16>
21+
%sub = sub <16 x i16> %a.sext, %b.sext
22+
%abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
23+
%trunc = trunc <16 x i16> %abs to <16 x i8>
24+
store <16 x i8> %trunc, ptr %a
25+
ret void
26+
}
27+
28+
; Don't use SVE for 128-bit vectors.
29+
define void @sabd_v16i8_v16i32(ptr %a, ptr %b) #0 {
30+
; CHECK-LABEL: sabd_v16i8_v16i32:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: ldr q0, [x0]
33+
; CHECK-NEXT: ldr q1, [x1]
34+
; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b
35+
; CHECK-NEXT: str q0, [x0]
36+
; CHECK-NEXT: ret
37+
%a.ld = load <16 x i8>, ptr %a
38+
%b.ld = load <16 x i8>, ptr %b
39+
%a.sext = sext <16 x i8> %a.ld to <16 x i32>
40+
%b.sext = sext <16 x i8> %b.ld to <16 x i32>
41+
%sub = sub <16 x i32> %a.sext, %b.sext
42+
%abs = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %sub, i1 true)
43+
%trunc = trunc <16 x i32> %abs to <16 x i8>
44+
store <16 x i8> %trunc, ptr %a
45+
ret void
46+
}
47+
48+
; Don't use SVE for 128-bit vectors.
49+
define void @sabd_v16i8_v16i64(ptr %a, ptr %b) #0 {
50+
; CHECK-LABEL: sabd_v16i8_v16i64:
51+
; CHECK: // %bb.0:
52+
; CHECK-NEXT: ldr q0, [x0]
53+
; CHECK-NEXT: ldr q1, [x1]
54+
; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b
55+
; CHECK-NEXT: str q0, [x0]
56+
; CHECK-NEXT: ret
57+
%a.ld = load <16 x i8>, ptr %a
58+
%b.ld = load <16 x i8>, ptr %b
59+
%a.sext = sext <16 x i8> %a.ld to <16 x i64>
60+
%b.sext = sext <16 x i8> %b.ld to <16 x i64>
61+
%sub = sub <16 x i64> %a.sext, %b.sext
62+
%abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 true)
63+
%trunc = trunc <16 x i64> %abs to <16 x i8>
64+
store <16 x i8> %trunc, ptr %a
65+
ret void
66+
}
67+
68+
define void @sabd_v32i8_v32i16(ptr %a, ptr %b) #0 {
69+
; CHECK-LABEL: sabd_v32i8_v32i16:
70+
; CHECK: // %bb.0:
71+
; CHECK-NEXT: ptrue p0.b, vl32
72+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
73+
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
74+
; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b
75+
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
76+
; CHECK-NEXT: ret
77+
%a.ld = load <32 x i8>, ptr %a
78+
%b.ld = load <32 x i8>, ptr %b
79+
%a.sext = sext <32 x i8> %a.ld to <32 x i16>
80+
%b.sext = sext <32 x i8> %b.ld to <32 x i16>
81+
%sub = sub <32 x i16> %a.sext, %b.sext
82+
%abs = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %sub, i1 true)
83+
%trunc = trunc <32 x i16> %abs to <32 x i8>
84+
store <32 x i8> %trunc, ptr %a
85+
ret void
86+
}
87+
88+
define void @uabd_v32i8_v32i16(ptr %a, ptr %b) #0 {
89+
; CHECK-LABEL: uabd_v32i8_v32i16:
90+
; CHECK: // %bb.0:
91+
; CHECK-NEXT: ptrue p0.b, vl32
92+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
93+
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
94+
; CHECK-NEXT: uabd z0.b, p0/m, z0.b, z1.b
95+
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
96+
; CHECK-NEXT: ret
97+
%a.ld = load <32 x i8>, ptr %a
98+
%b.ld = load <32 x i8>, ptr %b
99+
%a.zext = zext <32 x i8> %a.ld to <32 x i16>
100+
%b.zext = zext <32 x i8> %b.ld to <32 x i16>
101+
%sub = sub <32 x i16> %a.zext, %b.zext
102+
%abs = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %sub, i1 true)
103+
%trunc = trunc <32 x i16> %abs to <32 x i8>
104+
store <32 x i8> %trunc, ptr %a
105+
ret void
106+
}
107+
108+
define void @sabd_v32i8_v32i32(ptr %a, ptr %b) #0 {
109+
; CHECK-LABEL: sabd_v32i8_v32i32:
110+
; CHECK: // %bb.0:
111+
; CHECK-NEXT: ptrue p0.b, vl32
112+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
113+
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
114+
; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b
115+
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
116+
; CHECK-NEXT: ret
117+
%a.ld = load <32 x i8>, ptr %a
118+
%b.ld = load <32 x i8>, ptr %b
119+
%a.sext = sext <32 x i8> %a.ld to <32 x i32>
120+
%b.sext = sext <32 x i8> %b.ld to <32 x i32>
121+
%sub = sub <32 x i32> %a.sext, %b.sext
122+
%abs = call <32 x i32> @llvm.abs.v32i32(<32 x i32> %sub, i1 true)
123+
%trunc = trunc <32 x i32> %abs to <32 x i8>
124+
store <32 x i8> %trunc, ptr %a
125+
ret void
126+
}
127+
128+
define void @sabd_v32i8_v32i64(ptr %a, ptr %b) #0 {
129+
; CHECK-LABEL: sabd_v32i8_v32i64:
130+
; CHECK: // %bb.0:
131+
; CHECK-NEXT: ptrue p0.b, vl32
132+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
133+
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
134+
; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b
135+
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
136+
; CHECK-NEXT: ret
137+
%a.ld = load <32 x i8>, ptr %a
138+
%b.ld = load <32 x i8>, ptr %b
139+
%a.sext = sext <32 x i8> %a.ld to <32 x i64>
140+
%b.sext = sext <32 x i8> %b.ld to <32 x i64>
141+
%sub = sub <32 x i64> %a.sext, %b.sext
142+
%abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 true)
143+
%trunc = trunc <32 x i64> %abs to <32 x i8>
144+
store <32 x i8> %trunc, ptr %a
145+
ret void
146+
}
147+
148+
define void @sabd_v64i8_v64i64(ptr %a, ptr %b) #0 {
149+
; VBITS_GE_256-LABEL: sabd_v64i8_v64i64:
150+
; VBITS_GE_256: // %bb.0:
151+
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
152+
; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
153+
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
154+
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8]
155+
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0]
156+
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1]
157+
; VBITS_GE_256-NEXT: sabd z0.b, p0/m, z0.b, z1.b
158+
; VBITS_GE_256-NEXT: movprfx z1, z2
159+
; VBITS_GE_256-NEXT: sabd z1.b, p0/m, z1.b, z3.b
160+
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
161+
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
162+
; VBITS_GE_256-NEXT: ret
163+
;
164+
; VBITS_GE_512-LABEL: sabd_v64i8_v64i64:
165+
; VBITS_GE_512: // %bb.0:
166+
; VBITS_GE_512-NEXT: ptrue p0.b, vl64
167+
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
168+
; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
169+
; VBITS_GE_512-NEXT: sabd z0.b, p0/m, z0.b, z1.b
170+
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
171+
; VBITS_GE_512-NEXT: ret
172+
%a.ld = load <64 x i8>, ptr %a
173+
%b.ld = load <64 x i8>, ptr %b
174+
%a.sext = sext <64 x i8> %a.ld to <64 x i64>
175+
%b.sext = sext <64 x i8> %b.ld to <64 x i64>
176+
%sub = sub <64 x i64> %a.sext, %b.sext
177+
%abs = call <64 x i64> @llvm.abs.v64i64(<64 x i64> %sub, i1 true)
178+
%trunc = trunc <64 x i64> %abs to <64 x i8>
179+
store <64 x i8> %trunc, ptr %a
180+
ret void
181+
}
182+
183+
attributes #0 = { "target-features"="+neon,+sve" }

0 commit comments

Comments
 (0)