Skip to content

Commit 4547f27

Browse files
committed
[AArch64][Interleave]: Add test precommit
Change-Id: I5e2613156a482dcadae3e4cfa1bacdf7f3293fe2
1 parent 3f3e85c commit 4547f27

File tree

1 file changed

+106
-0
lines changed

1 file changed

+106
-0
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
3+
4+
%struct.xyzt = type { i32, i32, i32, i32 }
5+
6+
define void @interleave(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %a, <vscale x 4 x i32> %x) {
7+
; CHECK-LABEL: interleave:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: ptrue p0.s
10+
; CHECK-NEXT: ld2w { z1.s, z2.s }, p0/z, [x1]
11+
; CHECK-NEXT: ld2w { z3.s, z4.s }, p0/z, [x1, #2, mul vl]
12+
; CHECK-NEXT: uzp2 z5.s, z1.s, z3.s
13+
; CHECK-NEXT: uzp1 z6.s, z1.s, z3.s
14+
; CHECK-NEXT: uzp2 z7.s, z2.s, z4.s
15+
; CHECK-NEXT: uzp1 z1.s, z2.s, z4.s
16+
; CHECK-NEXT: add z2.s, z0.s, z6.s
17+
; CHECK-NEXT: movprfx z3, z5
18+
; CHECK-NEXT: lsl z3.s, p0/m, z3.s, z0.s
19+
; CHECK-NEXT: sub z1.s, z1.s, z0.s
20+
; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z7.s
21+
; CHECK-NEXT: zip1 z4.s, z2.s, z3.s
22+
; CHECK-NEXT: zip2 z2.s, z2.s, z3.s
23+
; CHECK-NEXT: zip1 z5.s, z1.s, z0.s
24+
; CHECK-NEXT: zip2 z3.s, z1.s, z0.s
25+
; CHECK-NEXT: st2w { z4.s, z5.s }, p0, [x0]
26+
; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, #2, mul vl]
27+
; CHECK-NEXT: ret
28+
%wide.vec = load <vscale x 16 x i32>, ptr %a, align 4
29+
%root.strided.vec = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %wide.vec)
30+
%3 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %root.strided.vec, 0
31+
%4 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %root.strided.vec, 1
32+
%root.strided.vec55 = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %3)
33+
%5 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %root.strided.vec55, 0
34+
%6 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %root.strided.vec55, 1
35+
%root.strided.vec56 = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %4)
36+
%7 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %root.strided.vec56, 0
37+
%8 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %root.strided.vec56, 1
38+
%9 = add nsw <vscale x 4 x i32> %x, %5
39+
%10 = sub nsw <vscale x 4 x i32> %7, %x
40+
%11 = shl <vscale x 4 x i32> %6, %x
41+
%12 = ashr <vscale x 4 x i32> %8, %x
42+
%interleaved.vec = tail call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %9, <vscale x 4 x i32> %11)
43+
%interleaved.vec61 = tail call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %10, <vscale x 4 x i32> %12)
44+
%interleaved.vec62 = tail call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> %interleaved.vec, <vscale x 8 x i32> %interleaved.vec61)
45+
store <vscale x 16 x i32> %interleaved.vec62, ptr %dst, align 4
46+
ret void
47+
}
48+
49+
define void @wide_interleave(ptr noalias nocapture noundef writeonly %dst, ptr nocapture noundef readonly %a, <vscale x 8 x i32> %x) {
50+
; CHECK-LABEL: wide_interleave:
51+
; CHECK: // %bb.0:
52+
; CHECK-NEXT: ptrue p0.s
53+
; CHECK-NEXT: ld2w { z2.s, z3.s }, p0/z, [x1]
54+
; CHECK-NEXT: ld2w { z4.s, z5.s }, p0/z, [x1, #2, mul vl]
55+
; CHECK-NEXT: ld2w { z6.s, z7.s }, p0/z, [x1, #4, mul vl]
56+
; CHECK-NEXT: ld2w { z24.s, z25.s }, p0/z, [x1, #6, mul vl]
57+
; CHECK-NEXT: uzp2 z26.s, z2.s, z4.s
58+
; CHECK-NEXT: uzp1 z27.s, z2.s, z4.s
59+
; CHECK-NEXT: uzp2 z28.s, z3.s, z5.s
60+
; CHECK-NEXT: uzp1 z2.s, z3.s, z5.s
61+
; CHECK-NEXT: add z3.s, z0.s, z27.s
62+
; CHECK-NEXT: movprfx z4, z26
63+
; CHECK-NEXT: lsl z4.s, p0/m, z4.s, z0.s
64+
; CHECK-NEXT: sub z2.s, z2.s, z0.s
65+
; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z28.s
66+
; CHECK-NEXT: zip1 z26.s, z3.s, z4.s
67+
; CHECK-NEXT: zip2 z3.s, z3.s, z4.s
68+
; CHECK-NEXT: zip1 z27.s, z2.s, z0.s
69+
; CHECK-NEXT: zip2 z4.s, z2.s, z0.s
70+
; CHECK-NEXT: uzp2 z0.s, z6.s, z24.s
71+
; CHECK-NEXT: uzp1 z2.s, z6.s, z24.s
72+
; CHECK-NEXT: st2w { z26.s, z27.s }, p0, [x0]
73+
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
74+
; CHECK-NEXT: add z2.s, z1.s, z2.s
75+
; CHECK-NEXT: st2w { z3.s, z4.s }, p0, [x0, #2, mul vl]
76+
; CHECK-NEXT: uzp2 z3.s, z7.s, z25.s
77+
; CHECK-NEXT: uzp1 z4.s, z7.s, z25.s
78+
; CHECK-NEXT: zip1 z5.s, z2.s, z0.s
79+
; CHECK-NEXT: sub z4.s, z4.s, z1.s
80+
; CHECK-NEXT: asrr z1.s, p0/m, z1.s, z3.s
81+
; CHECK-NEXT: zip2 z2.s, z2.s, z0.s
82+
; CHECK-NEXT: zip1 z6.s, z4.s, z1.s
83+
; CHECK-NEXT: zip2 z3.s, z4.s, z1.s
84+
; CHECK-NEXT: st2w { z5.s, z6.s }, p0, [x0, #4, mul vl]
85+
; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, #6, mul vl]
86+
; CHECK-NEXT: ret
87+
%wide.vec = load <vscale x 32 x i32>, ptr %a, align 4
88+
%root.strided.vec = tail call { <vscale x 16 x i32>, <vscale x 16 x i32> } @llvm.vector.deinterleave2.nxv32i32(<vscale x 32 x i32> %wide.vec)
89+
%3 = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i32> } %root.strided.vec, 0
90+
%4 = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i32> } %root.strided.vec, 1
91+
%root.strided.vec55 = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %3)
92+
%5 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %root.strided.vec55, 0
93+
%6 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %root.strided.vec55, 1
94+
%root.strided.vec56 = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %4)
95+
%7 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %root.strided.vec56, 0
96+
%8 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %root.strided.vec56, 1
97+
%9 = add nsw <vscale x 8 x i32> %x, %5
98+
%10 = sub nsw <vscale x 8 x i32> %7, %x
99+
%11 = shl <vscale x 8 x i32> %6, %x
100+
%12 = ashr <vscale x 8 x i32> %8, %x
101+
%interleaved.vec = tail call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> %9, <vscale x 8 x i32> %11)
102+
%interleaved.vec61 = tail call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> %10, <vscale x 8 x i32> %12)
103+
%interleaved.vec62 = tail call <vscale x 32 x i32> @llvm.vector.interleave2.nxv32i32(<vscale x 16 x i32> %interleaved.vec, <vscale x 16 x i32> %interleaved.vec61)
104+
store <vscale x 32 x i32> %interleaved.vec62, ptr %dst, align 4
105+
ret void
106+
}

0 commit comments

Comments
 (0)