Skip to content

Commit 135413d

Browse files
committed
[NFC] Make vector types legal in UREM test
As discussed in D50222, this changes the vector types in tests required for that revision to ones legal for X86. Patch by @hermord (Dmytro Shynkevych) Differential Revision: https://reviews.llvm.org/D56372 llvm-svn: 353004
1 parent 64bebe9 commit 135413d

File tree

2 files changed

+99
-271
lines changed

2 files changed

+99
-271
lines changed

llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll

Lines changed: 39 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -25,45 +25,28 @@ define <4 x i32> @test_urem_odd_vec_i32(<4 x i32> %X) nounwind readnone {
2525
ret <4 x i32> %ret
2626
}
2727

28-
; Like test_urem_odd_vec_i32, but with 4 x i16 vectors.
29-
define <4 x i16> @test_urem_odd_vec_i16(<4 x i16> %X) nounwind readnone {
28+
; Like test_urem_odd_vec_i32, but with 8 x i16 vectors.
29+
define <8 x i16> @test_urem_odd_vec_i16(<8 x i16> %X) nounwind readnone {
3030
; CHECK-LABEL: test_urem_odd_vec_i16:
3131
; CHECK: // %bb.0:
32-
; CHECK-NEXT: mov w9, #52429
33-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
34-
; CHECK-NEXT: umov w8, v0.h[1]
35-
; CHECK-NEXT: movk w9, #52428, lsl #16
36-
; CHECK-NEXT: umull x12, w8, w9
37-
; CHECK-NEXT: lsr x12, x12, #34
38-
; CHECK-NEXT: umov w10, v0.h[0]
39-
; CHECK-NEXT: add w12, w12, w12, lsl #2
40-
; CHECK-NEXT: sub w8, w8, w12
41-
; CHECK-NEXT: umull x12, w10, w9
42-
; CHECK-NEXT: lsr x12, x12, #34
43-
; CHECK-NEXT: umov w11, v0.h[2]
44-
; CHECK-NEXT: add w12, w12, w12, lsl #2
45-
; CHECK-NEXT: sub w10, w10, w12
46-
; CHECK-NEXT: umull x12, w11, w9
47-
; CHECK-NEXT: lsr x12, x12, #34
48-
; CHECK-NEXT: add w12, w12, w12, lsl #2
49-
; CHECK-NEXT: sub w11, w11, w12
50-
; CHECK-NEXT: umov w12, v0.h[3]
51-
; CHECK-NEXT: umull x9, w12, w9
52-
; CHECK-NEXT: lsr x9, x9, #34
53-
; CHECK-NEXT: fmov s0, w10
54-
; CHECK-NEXT: add w9, w9, w9, lsl #2
55-
; CHECK-NEXT: mov v0.h[1], w8
56-
; CHECK-NEXT: sub w9, w12, w9
57-
; CHECK-NEXT: mov v0.h[2], w11
58-
; CHECK-NEXT: mov v0.h[3], w9
59-
; CHECK-NEXT: cmeq v0.4h, v0.4h, #0
60-
; CHECK-NEXT: movi v1.4h, #1
61-
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
32+
; CHECK-NEXT: mov w8, #52429
33+
; CHECK-NEXT: dup v2.8h, w8
34+
; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
35+
; CHECK-NEXT: umull v2.4s, v0.4h, v2.4h
36+
; CHECK-NEXT: uzp2 v2.8h, v2.8h, v3.8h
37+
; CHECK-NEXT: movi v1.8h, #5
38+
; CHECK-NEXT: ushr v2.8h, v2.8h, #2
39+
; CHECK-NEXT: mls v0.8h, v2.8h, v1.8h
40+
; CHECK-NEXT: cmeq v0.8h, v0.8h, #0
41+
; CHECK-NEXT: movi v1.8h, #1
42+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
6243
; CHECK-NEXT: ret
63-
%urem = urem <4 x i16> %X, <i16 5, i16 5, i16 5, i16 5>
64-
%cmp = icmp eq <4 x i16> %urem, <i16 0, i16 0, i16 0, i16 0>
65-
%ret = zext <4 x i1> %cmp to <4 x i16>
66-
ret <4 x i16> %ret
44+
%urem = urem <8 x i16> %X, <i16 5, i16 5, i16 5, i16 5,
45+
i16 5, i16 5, i16 5, i16 5>
46+
%cmp = icmp eq <8 x i16> %urem, <i16 0, i16 0, i16 0, i16 0,
47+
i16 0, i16 0, i16 0, i16 0>
48+
%ret = zext <8 x i1> %cmp to <8 x i16>
49+
ret <8 x i16> %ret
6750
}
6851

6952
; Tests BuildUREMEqFold for 4 x i32 splat vectors with even divisor.
@@ -92,48 +75,31 @@ define <4 x i32> @test_urem_even_vec_i32(<4 x i32> %X) nounwind readnone {
9275
ret <4 x i32> %ret
9376
}
9477

95-
; Like test_urem_even_vec_i32, but with 4 x i16 vectors.
78+
; Like test_urem_even_vec_i32, but with 8 x i16 vectors.
9679
; i16 is not legal for ROTR on AArch64, but ROTR also cannot be promoted to i32,
9780
; so this would crash if BuildUREMEqFold was applied.
98-
define <4 x i16> @test_urem_even_vec_i16(<4 x i16> %X) nounwind readnone {
81+
define <8 x i16> @test_urem_even_vec_i16(<8 x i16> %X) nounwind readnone {
9982
; CHECK-LABEL: test_urem_even_vec_i16:
10083
; CHECK: // %bb.0:
101-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
102-
; CHECK-NEXT: umov w8, v0.h[1]
103-
; CHECK-NEXT: mov w9, #9363
104-
; CHECK-NEXT: movk w9, #37449, lsl #16
105-
; CHECK-NEXT: umov w10, v0.h[0]
106-
; CHECK-NEXT: umov w11, v0.h[2]
107-
; CHECK-NEXT: umov w12, v0.h[3]
108-
; CHECK-NEXT: ubfx w13, w8, #1, #15
109-
; CHECK-NEXT: ubfx w14, w10, #1, #15
110-
; CHECK-NEXT: ubfx w15, w11, #1, #15
111-
; CHECK-NEXT: ubfx w16, w12, #1, #15
112-
; CHECK-NEXT: umull x13, w13, w9
113-
; CHECK-NEXT: umull x14, w14, w9
114-
; CHECK-NEXT: umull x15, w15, w9
115-
; CHECK-NEXT: umull x9, w16, w9
116-
; CHECK-NEXT: orr w16, wzr, #0xe
117-
; CHECK-NEXT: lsr x13, x13, #34
118-
; CHECK-NEXT: msub w8, w13, w16, w8
119-
; CHECK-NEXT: lsr x13, x14, #34
120-
; CHECK-NEXT: msub w10, w13, w16, w10
121-
; CHECK-NEXT: lsr x13, x15, #34
122-
; CHECK-NEXT: fmov s0, w10
123-
; CHECK-NEXT: msub w11, w13, w16, w11
124-
; CHECK-NEXT: lsr x9, x9, #34
125-
; CHECK-NEXT: mov v0.h[1], w8
126-
; CHECK-NEXT: msub w9, w9, w16, w12
127-
; CHECK-NEXT: mov v0.h[2], w11
128-
; CHECK-NEXT: mov v0.h[3], w9
129-
; CHECK-NEXT: cmeq v0.4h, v0.4h, #0
130-
; CHECK-NEXT: movi v1.4h, #1
131-
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
84+
; CHECK-NEXT: mov w8, #18725
85+
; CHECK-NEXT: ushr v1.8h, v0.8h, #1
86+
; CHECK-NEXT: dup v3.8h, w8
87+
; CHECK-NEXT: umull2 v4.4s, v1.8h, v3.8h
88+
; CHECK-NEXT: umull v1.4s, v1.4h, v3.4h
89+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
90+
; CHECK-NEXT: movi v2.8h, #14
91+
; CHECK-NEXT: ushr v1.8h, v1.8h, #1
92+
; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
93+
; CHECK-NEXT: cmeq v0.8h, v0.8h, #0
94+
; CHECK-NEXT: movi v1.8h, #1
95+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
13296
; CHECK-NEXT: ret
133-
%urem = urem <4 x i16> %X, <i16 14, i16 14, i16 14, i16 14>
134-
%cmp = icmp eq <4 x i16> %urem, <i16 0, i16 0, i16 0, i16 0>
135-
%ret = zext <4 x i1> %cmp to <4 x i16>
136-
ret <4 x i16> %ret
97+
%urem = urem <8 x i16> %X, <i16 14, i16 14, i16 14, i16 14,
98+
i16 14, i16 14, i16 14, i16 14>
99+
%cmp = icmp eq <8 x i16> %urem, <i16 0, i16 0, i16 0, i16 0,
100+
i16 0, i16 0, i16 0, i16 0>
101+
%ret = zext <8 x i1> %cmp to <8 x i16>
102+
ret <8 x i16> %ret
137103
}
138104

139105
; We should not proceed with this fold if the divisor is 1 or -1

0 commit comments

Comments
 (0)