Skip to content

Commit e4e671c

Browse files
committed
[AArch64] add tests for demanded bits of multiply; NFC
This is adapted from existing tests for instcombine. We want to keep the backend logic synchronized with that as much as possible. See D119139 / D119060 / D118539
1 parent f8d889a commit e4e671c

File tree

1 file changed

+158
-0
lines changed

1 file changed

+158
-0
lines changed
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3+
4+
; PR48683 'Quadratic Reciprocity' - and(mul(x,x),2) -> 0
5+
6+
define i1 @PR48683(i32 %x) {
7+
; CHECK-LABEL: PR48683:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: mov w0, wzr
10+
; CHECK-NEXT: ret
11+
%a = mul i32 %x, %x
12+
%b = and i32 %a, 2
13+
%c = icmp ne i32 %b, 0
14+
ret i1 %c
15+
}
16+
17+
define <4 x i1> @PR48683_vec(<4 x i32> %x) {
18+
; CHECK-LABEL: PR48683_vec:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: movi v0.2d, #0000000000000000
21+
; CHECK-NEXT: ret
22+
%a = mul <4 x i32> %x, %x
23+
%b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 2>
24+
%c = icmp ne <4 x i32> %b, zeroinitializer
25+
ret <4 x i1> %c
26+
}
27+
28+
define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
29+
; CHECK-LABEL: PR48683_vec_undef:
30+
; CHECK: // %bb.0:
31+
; CHECK-NEXT: movi v1.4s, #2
32+
; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s
33+
; CHECK-NEXT: cmtst v0.4s, v0.4s, v1.4s
34+
; CHECK-NEXT: xtn v0.4h, v0.4s
35+
; CHECK-NEXT: ret
36+
%a = mul <4 x i32> %x, %x
37+
%b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 undef>
38+
%c = icmp ne <4 x i32> %b, zeroinitializer
39+
ret <4 x i1> %c
40+
}
41+
42+
; mul(x,x) - bit[1] is 0, but if demanding the other bits the source must not be undef
43+
44+
define i64 @combine_mul_self_demandedbits(i64 %x) {
45+
; CHECK-LABEL: combine_mul_self_demandedbits:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: mul x8, x0, x0
48+
; CHECK-NEXT: and x0, x8, #0xfffffffffffffffd
49+
; CHECK-NEXT: ret
50+
%1 = mul i64 %x, %x
51+
%2 = and i64 %1, -3
52+
ret i64 %2
53+
}
54+
55+
define <4 x i32> @combine_mul_self_demandedbits_vector(<4 x i32> %x) {
56+
; CHECK-LABEL: combine_mul_self_demandedbits_vector:
57+
; CHECK: // %bb.0:
58+
; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s
59+
; CHECK-NEXT: ret
60+
%1 = freeze <4 x i32> %x
61+
%2 = mul <4 x i32> %1, %1
62+
%3 = and <4 x i32> %2, <i32 -3, i32 -3, i32 -3, i32 -3>
63+
ret <4 x i32> %3
64+
}
65+
66+
define i8 @one_demanded_bit(i8 %x) {
67+
; CHECK-LABEL: one_demanded_bit:
68+
; CHECK: // %bb.0:
69+
; CHECK-NEXT: neg w8, w0, lsl #6
70+
; CHECK-NEXT: orr w0, w8, #0xffffffbf
71+
; CHECK-NEXT: ret
72+
%m = mul i8 %x, 192 ; 0b1100_0000
73+
%r = or i8 %m, 191 ; 0b1011_1111
74+
ret i8 %r
75+
}
76+
77+
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
78+
; CHECK-LABEL: one_demanded_bit_splat:
79+
; CHECK: // %bb.0:
80+
; CHECK-NEXT: fmov x8, d0
81+
; CHECK-NEXT: mov x9, v0.d[1]
82+
; CHECK-NEXT: add x8, x8, x8, lsl #2
83+
; CHECK-NEXT: lsl x8, x8, #5
84+
; CHECK-NEXT: add x9, x9, x9, lsl #2
85+
; CHECK-NEXT: fmov d0, x8
86+
; CHECK-NEXT: lsl x8, x9, #5
87+
; CHECK-NEXT: mov w9, #32
88+
; CHECK-NEXT: mov v0.d[1], x8
89+
; CHECK-NEXT: dup v1.2d, x9
90+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
91+
; CHECK-NEXT: ret
92+
%m = mul <2 x i64> %x, <i64 160, i64 160> ; 0b1010_0000
93+
%r = and <2 x i64> %m, <i64 32, i64 32> ; 0b0010_0000
94+
ret <2 x i64> %r
95+
}
96+
97+
define i32 @one_demanded_low_bit(i32 %x) {
98+
; CHECK-LABEL: one_demanded_low_bit:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: neg w8, w0
101+
; CHECK-NEXT: and w0, w8, #0x1
102+
; CHECK-NEXT: ret
103+
%m = mul i32 %x, -63 ; any odd number will do
104+
%r = and i32 %m, 1
105+
ret i32 %r
106+
}
107+
108+
define i16 @squared_one_demanded_low_bit(i16 %x) {
109+
; CHECK-LABEL: squared_one_demanded_low_bit:
110+
; CHECK: // %bb.0:
111+
; CHECK-NEXT: mul w8, w0, w0
112+
; CHECK-NEXT: and w0, w8, #0x1
113+
; CHECK-NEXT: ret
114+
%mul = mul i16 %x, %x
115+
%and = and i16 %mul, 1
116+
ret i16 %and
117+
}
118+
119+
define <4 x i32> @squared_one_demanded_low_bit_splat(<4 x i32> %x) {
120+
; CHECK-LABEL: squared_one_demanded_low_bit_splat:
121+
; CHECK: // %bb.0:
122+
; CHECK-NEXT: mvni v1.4s, #1
123+
; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s
124+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
125+
; CHECK-NEXT: ret
126+
%mul = mul <4 x i32> %x, %x
127+
%and = or <4 x i32> %mul, <i32 -2, i32 -2, i32 -2, i32 -2>
128+
ret <4 x i32> %and
129+
}
130+
131+
define i32 @squared_demanded_2_low_bits(i32 %x) {
132+
; CHECK-LABEL: squared_demanded_2_low_bits:
133+
; CHECK: // %bb.0:
134+
; CHECK-NEXT: mul w8, w0, w0
135+
; CHECK-NEXT: and w0, w8, #0x3
136+
; CHECK-NEXT: ret
137+
%mul = mul i32 %x, %x
138+
%and = and i32 %mul, 3
139+
ret i32 %and
140+
}
141+
142+
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
143+
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
144+
; CHECK: // %bb.0:
145+
; CHECK-NEXT: fmov x8, d0
146+
; CHECK-NEXT: mov x9, v0.d[1]
147+
; CHECK-NEXT: mul x8, x8, x8
148+
; CHECK-NEXT: mul x9, x9, x9
149+
; CHECK-NEXT: fmov d0, x8
150+
; CHECK-NEXT: mov x8, #-2
151+
; CHECK-NEXT: mov v0.d[1], x9
152+
; CHECK-NEXT: dup v1.2d, x8
153+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
154+
; CHECK-NEXT: ret
155+
%mul = mul <2 x i64> %x, %x
156+
%and = or <2 x i64> %mul, <i64 -2, i64 -2>
157+
ret <2 x i64> %and
158+
}

0 commit comments

Comments
 (0)