Skip to content

Commit 07e231b

Browse files
[AArch64] Precommit testcase for optimised test of the LSB of a paired whileCC instruction
Change-Id: I5058e24c631ede0a04399b39e5096f898fa8f792
1 parent 04a75f5 commit 07e231b

File tree

1 file changed

+107
-0
lines changed

1 file changed

+107
-0
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s --check-prefix=CHECK-SVE2p1
4+
target triple = "aarch64-linux"
5+
6+
define void @f_while(i32 %i, i32 %n) #0 {
7+
; CHECK-LABEL: f_while:
8+
; CHECK: // %bb.0: // %E
9+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
10+
; CHECK-NEXT: whilelo p0.b, w0, w1
11+
; CHECK-NEXT: punpklo p0.h, p0.b
12+
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
13+
; CHECK-NEXT: fmov w8, s0
14+
; CHECK-NEXT: tbz w8, #0, .LBB0_2
15+
; CHECK-NEXT: // %bb.1: // %A
16+
; CHECK-NEXT: bl g0
17+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
18+
; CHECK-NEXT: ret
19+
; CHECK-NEXT: .LBB0_2: // %B
20+
; CHECK-NEXT: bl g1
21+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
22+
; CHECK-NEXT: ret
23+
;
24+
; CHECK-SVE2p1-LABEL: f_while:
25+
; CHECK-SVE2p1: // %bb.0: // %E
26+
; CHECK-SVE2p1-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
27+
; CHECK-SVE2p1-NEXT: whilelo p0.b, w0, w1
28+
; CHECK-SVE2p1-NEXT: punpklo p0.h, p0.b
29+
; CHECK-SVE2p1-NEXT: mov z0.h, p0/z, #1 // =0x1
30+
; CHECK-SVE2p1-NEXT: fmov w8, s0
31+
; CHECK-SVE2p1-NEXT: tbz w8, #0, .LBB0_2
32+
; CHECK-SVE2p1-NEXT: // %bb.1: // %A
33+
; CHECK-SVE2p1-NEXT: bl g0
34+
; CHECK-SVE2p1-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
35+
; CHECK-SVE2p1-NEXT: ret
36+
; CHECK-SVE2p1-NEXT: .LBB0_2: // %B
37+
; CHECK-SVE2p1-NEXT: bl g1
38+
; CHECK-SVE2p1-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
39+
; CHECK-SVE2p1-NEXT: ret
40+
E:
41+
%wide.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i32 %i, i32 %n)
42+
%mask = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %wide.mask, i64 0)
43+
%elt = extractelement <vscale x 8 x i1> %mask, i64 0
44+
br i1 %elt, label %A, label %B
45+
A:
46+
call void @g0()
47+
ret void
48+
B:
49+
call void @g1()
50+
ret void
51+
}
52+
53+
define void @f_while_x2(i32 %i, i32 %n) #0 {
54+
; CHECK-LABEL: f_while_x2:
55+
; CHECK: // %bb.0: // %E
56+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
57+
; CHECK-NEXT: whilelo p1.b, w0, w1
58+
; CHECK-NEXT: punpkhi p0.h, p1.b
59+
; CHECK-NEXT: punpklo p1.h, p1.b
60+
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
61+
; CHECK-NEXT: fmov w8, s0
62+
; CHECK-NEXT: tbz w8, #0, .LBB1_2
63+
; CHECK-NEXT: // %bb.1: // %A
64+
; CHECK-NEXT: bl g0
65+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
66+
; CHECK-NEXT: ret
67+
; CHECK-NEXT: .LBB1_2: // %B
68+
; CHECK-NEXT: bl g1
69+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
70+
; CHECK-NEXT: ret
71+
;
72+
; CHECK-SVE2p1-LABEL: f_while_x2:
73+
; CHECK-SVE2p1: // %bb.0: // %E
74+
; CHECK-SVE2p1-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
75+
; CHECK-SVE2p1-NEXT: mov w8, w1
76+
; CHECK-SVE2p1-NEXT: mov w9, w0
77+
; CHECK-SVE2p1-NEXT: whilelo { p0.h, p1.h }, x9, x8
78+
; CHECK-SVE2p1-NEXT: mov z0.h, p0/z, #1 // =0x1
79+
; CHECK-SVE2p1-NEXT: mov p0.b, p1.b
80+
; CHECK-SVE2p1-NEXT: fmov w8, s0
81+
; CHECK-SVE2p1-NEXT: tbz w8, #0, .LBB1_2
82+
; CHECK-SVE2p1-NEXT: // %bb.1: // %A
83+
; CHECK-SVE2p1-NEXT: bl g0
84+
; CHECK-SVE2p1-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
85+
; CHECK-SVE2p1-NEXT: ret
86+
; CHECK-SVE2p1-NEXT: .LBB1_2: // %B
87+
; CHECK-SVE2p1-NEXT: bl g1
88+
; CHECK-SVE2p1-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
89+
; CHECK-SVE2p1-NEXT: ret
90+
E:
91+
%wide.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i32 %i, i32 %n)
92+
%mask.hi = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %wide.mask, i64 8)
93+
%mask = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %wide.mask, i64 0)
94+
%elt = extractelement <vscale x 8 x i1> %mask, i64 0
95+
br i1 %elt, label %A, label %B
96+
A:
97+
call void @g0(<vscale x 8 x i1> %mask.hi)
98+
ret void
99+
B:
100+
call void @g1(<vscale x 8 x i1> %mask.hi)
101+
ret void
102+
}
103+
104+
declare void @g0(...)
105+
declare void @g1(...)
106+
107+
attributes #0 = { nounwind vscale_range(1,16) "target-cpu"="neoverse-v1" }

0 commit comments

Comments
 (0)