@@ -25,45 +25,28 @@ define <4 x i32> @test_urem_odd_vec_i32(<4 x i32> %X) nounwind readnone {
25
25
ret <4 x i32 > %ret
26
26
}
27
27
28
- ; Like test_urem_odd_vec_i32, but with 4 x i16 vectors.
29
- define <4 x i16 > @test_urem_odd_vec_i16 (<4 x i16 > %X ) nounwind readnone {
28
+ ; Like test_urem_odd_vec_i32, but with 8 x i16 vectors.
29
+ define <8 x i16 > @test_urem_odd_vec_i16 (<8 x i16 > %X ) nounwind readnone {
30
30
; CHECK-LABEL: test_urem_odd_vec_i16:
31
31
; CHECK: // %bb.0:
32
- ; CHECK-NEXT: mov w9, #52429
33
- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
34
- ; CHECK-NEXT: umov w8, v0.h[1]
35
- ; CHECK-NEXT: movk w9, #52428, lsl #16
36
- ; CHECK-NEXT: umull x12, w8, w9
37
- ; CHECK-NEXT: lsr x12, x12, #34
38
- ; CHECK-NEXT: umov w10, v0.h[0]
39
- ; CHECK-NEXT: add w12, w12, w12, lsl #2
40
- ; CHECK-NEXT: sub w8, w8, w12
41
- ; CHECK-NEXT: umull x12, w10, w9
42
- ; CHECK-NEXT: lsr x12, x12, #34
43
- ; CHECK-NEXT: umov w11, v0.h[2]
44
- ; CHECK-NEXT: add w12, w12, w12, lsl #2
45
- ; CHECK-NEXT: sub w10, w10, w12
46
- ; CHECK-NEXT: umull x12, w11, w9
47
- ; CHECK-NEXT: lsr x12, x12, #34
48
- ; CHECK-NEXT: add w12, w12, w12, lsl #2
49
- ; CHECK-NEXT: sub w11, w11, w12
50
- ; CHECK-NEXT: umov w12, v0.h[3]
51
- ; CHECK-NEXT: umull x9, w12, w9
52
- ; CHECK-NEXT: lsr x9, x9, #34
53
- ; CHECK-NEXT: fmov s0, w10
54
- ; CHECK-NEXT: add w9, w9, w9, lsl #2
55
- ; CHECK-NEXT: mov v0.h[1], w8
56
- ; CHECK-NEXT: sub w9, w12, w9
57
- ; CHECK-NEXT: mov v0.h[2], w11
58
- ; CHECK-NEXT: mov v0.h[3], w9
59
- ; CHECK-NEXT: cmeq v0.4h, v0.4h, #0
60
- ; CHECK-NEXT: movi v1.4h, #1
61
- ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
32
+ ; CHECK-NEXT: mov w8, #52429
33
+ ; CHECK-NEXT: dup v2.8h, w8
34
+ ; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
35
+ ; CHECK-NEXT: umull v2.4s, v0.4h, v2.4h
36
+ ; CHECK-NEXT: uzp2 v2.8h, v2.8h, v3.8h
37
+ ; CHECK-NEXT: movi v1.8h, #5
38
+ ; CHECK-NEXT: ushr v2.8h, v2.8h, #2
39
+ ; CHECK-NEXT: mls v0.8h, v2.8h, v1.8h
40
+ ; CHECK-NEXT: cmeq v0.8h, v0.8h, #0
41
+ ; CHECK-NEXT: movi v1.8h, #1
42
+ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
62
43
; CHECK-NEXT: ret
63
- %urem = urem <4 x i16 > %X , <i16 5 , i16 5 , i16 5 , i16 5 >
64
- %cmp = icmp eq <4 x i16 > %urem , <i16 0 , i16 0 , i16 0 , i16 0 >
65
- %ret = zext <4 x i1 > %cmp to <4 x i16 >
66
- ret <4 x i16 > %ret
44
+ %urem = urem <8 x i16 > %X , <i16 5 , i16 5 , i16 5 , i16 5 ,
45
+ i16 5 , i16 5 , i16 5 , i16 5 >
46
+ %cmp = icmp eq <8 x i16 > %urem , <i16 0 , i16 0 , i16 0 , i16 0 ,
47
+ i16 0 , i16 0 , i16 0 , i16 0 >
48
+ %ret = zext <8 x i1 > %cmp to <8 x i16 >
49
+ ret <8 x i16 > %ret
67
50
}
68
51
69
52
; Tests BuildUREMEqFold for 4 x i32 splat vectors with even divisor.
@@ -92,48 +75,31 @@ define <4 x i32> @test_urem_even_vec_i32(<4 x i32> %X) nounwind readnone {
92
75
ret <4 x i32 > %ret
93
76
}
94
77
95
- ; Like test_urem_even_vec_i32, but with 4 x i16 vectors.
78
+ ; Like test_urem_even_vec_i32, but with 8 x i16 vectors.
96
79
; i16 is not legal for ROTR on AArch64, but ROTR also cannot be promoted to i32,
97
80
; so this would crash if BuildUREMEqFold was applied.
98
- define <4 x i16 > @test_urem_even_vec_i16 (<4 x i16 > %X ) nounwind readnone {
81
+ define <8 x i16 > @test_urem_even_vec_i16 (<8 x i16 > %X ) nounwind readnone {
99
82
; CHECK-LABEL: test_urem_even_vec_i16:
100
83
; CHECK: // %bb.0:
101
- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
102
- ; CHECK-NEXT: umov w8, v0.h[1]
103
- ; CHECK-NEXT: mov w9, #9363
104
- ; CHECK-NEXT: movk w9, #37449, lsl #16
105
- ; CHECK-NEXT: umov w10, v0.h[0]
106
- ; CHECK-NEXT: umov w11, v0.h[2]
107
- ; CHECK-NEXT: umov w12, v0.h[3]
108
- ; CHECK-NEXT: ubfx w13, w8, #1, #15
109
- ; CHECK-NEXT: ubfx w14, w10, #1, #15
110
- ; CHECK-NEXT: ubfx w15, w11, #1, #15
111
- ; CHECK-NEXT: ubfx w16, w12, #1, #15
112
- ; CHECK-NEXT: umull x13, w13, w9
113
- ; CHECK-NEXT: umull x14, w14, w9
114
- ; CHECK-NEXT: umull x15, w15, w9
115
- ; CHECK-NEXT: umull x9, w16, w9
116
- ; CHECK-NEXT: orr w16, wzr, #0xe
117
- ; CHECK-NEXT: lsr x13, x13, #34
118
- ; CHECK-NEXT: msub w8, w13, w16, w8
119
- ; CHECK-NEXT: lsr x13, x14, #34
120
- ; CHECK-NEXT: msub w10, w13, w16, w10
121
- ; CHECK-NEXT: lsr x13, x15, #34
122
- ; CHECK-NEXT: fmov s0, w10
123
- ; CHECK-NEXT: msub w11, w13, w16, w11
124
- ; CHECK-NEXT: lsr x9, x9, #34
125
- ; CHECK-NEXT: mov v0.h[1], w8
126
- ; CHECK-NEXT: msub w9, w9, w16, w12
127
- ; CHECK-NEXT: mov v0.h[2], w11
128
- ; CHECK-NEXT: mov v0.h[3], w9
129
- ; CHECK-NEXT: cmeq v0.4h, v0.4h, #0
130
- ; CHECK-NEXT: movi v1.4h, #1
131
- ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
84
+ ; CHECK-NEXT: mov w8, #18725
85
+ ; CHECK-NEXT: ushr v1.8h, v0.8h, #1
86
+ ; CHECK-NEXT: dup v3.8h, w8
87
+ ; CHECK-NEXT: umull2 v4.4s, v1.8h, v3.8h
88
+ ; CHECK-NEXT: umull v1.4s, v1.4h, v3.4h
89
+ ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
90
+ ; CHECK-NEXT: movi v2.8h, #14
91
+ ; CHECK-NEXT: ushr v1.8h, v1.8h, #1
92
+ ; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
93
+ ; CHECK-NEXT: cmeq v0.8h, v0.8h, #0
94
+ ; CHECK-NEXT: movi v1.8h, #1
95
+ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
132
96
; CHECK-NEXT: ret
133
- %urem = urem <4 x i16 > %X , <i16 14 , i16 14 , i16 14 , i16 14 >
134
- %cmp = icmp eq <4 x i16 > %urem , <i16 0 , i16 0 , i16 0 , i16 0 >
135
- %ret = zext <4 x i1 > %cmp to <4 x i16 >
136
- ret <4 x i16 > %ret
97
+ %urem = urem <8 x i16 > %X , <i16 14 , i16 14 , i16 14 , i16 14 ,
98
+ i16 14 , i16 14 , i16 14 , i16 14 >
99
+ %cmp = icmp eq <8 x i16 > %urem , <i16 0 , i16 0 , i16 0 , i16 0 ,
100
+ i16 0 , i16 0 , i16 0 , i16 0 >
101
+ %ret = zext <8 x i1 > %cmp to <8 x i16 >
102
+ ret <8 x i16 > %ret
137
103
}
138
104
139
105
; We should not proceed with this fold if the divisor is 1 or -1
0 commit comments