Skip to content

Commit 8bea511

Browse files
authored
[X86] Fold AND(Y, XOR(X, SUB(0, X))) to ANDN(Y, BLSMSK(X)) (#128348)
XOR(X, SUB(0, X)) corresponds to a bitwise-negated BLSMSK instruction (i.e., x ^ (x - 1)). On its own, this transformation is probably not really profitable but when the XOR operation is an operand of an AND operation, we can use an ANDN instruction to reduce the number of emitted instructions by one. Fixes #103501.
1 parent 9889de8 commit 8bea511

File tree

2 files changed

+355
-0
lines changed

2 files changed

+355
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51064,6 +51064,31 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
5106451064
return SDValue();
5106551065
}
5106651066

51067+
/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
51068+
static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
51069+
SelectionDAG &DAG,
51070+
const X86Subtarget &Subtarget) {
51071+
using namespace llvm::SDPatternMatch;
51072+
51073+
EVT VT = And->getValueType(0);
51074+
// Make sure this node is a candidate for BMI instructions.
51075+
if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
51076+
return SDValue();
51077+
51078+
SDValue X;
51079+
SDValue Y;
51080+
if (!sd_match(And, m_And(m_OneUse(m_Xor(m_Value(X),
51081+
m_OneUse(m_Neg(m_Deferred(X))))),
51082+
m_Value(Y))))
51083+
return SDValue();
51084+
51085+
SDValue BLSMSK =
51086+
DAG.getNode(ISD::XOR, DL, VT, X,
51087+
DAG.getNode(ISD::SUB, DL, VT, X, DAG.getConstant(1, DL, VT)));
51088+
SDValue AndN = DAG.getNode(ISD::AND, DL, VT, Y, DAG.getNOT(DL, BLSMSK, VT));
51089+
return AndN;
51090+
}
51091+
5106751092
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
5106851093
SelectionDAG &DAG,
5106951094
TargetLowering::DAGCombinerInfo &DCI,
@@ -51472,6 +51497,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5147251497
if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
5147351498
return R;
5147451499

51500+
if (SDValue R = combineAndXorSubWithBMI(N, dl, DAG, Subtarget))
51501+
return R;
51502+
5147551503
return SDValue();
5147651504
}
5147751505

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse2 | FileCheck %s --check-prefixes=X86,X86-NOBMI
3+
; RUN: llc < %s -mtriple=i686-- -mattr=+bmi,+sse2 | FileCheck %s --check-prefixes=X86,X86-BMI
4+
; RUN: llc < %s -mtriple=x86_64-- -mattr=-bmi | FileCheck %s --check-prefixes=X64,X64-NOBMI
5+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI
6+
7+
declare void @use(i32)
8+
9+
define i32 @fold_and_xor_neg_v1_32(i32 %x, i32 %y) nounwind {
10+
; X86-NOBMI-LABEL: fold_and_xor_neg_v1_32:
11+
; X86-NOBMI: # %bb.0:
12+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
13+
; X86-NOBMI-NEXT: movl %ecx, %eax
14+
; X86-NOBMI-NEXT: negl %eax
15+
; X86-NOBMI-NEXT: xorl %ecx, %eax
16+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
17+
; X86-NOBMI-NEXT: retl
18+
;
19+
; X86-BMI-LABEL: fold_and_xor_neg_v1_32:
20+
; X86-BMI: # %bb.0:
21+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
22+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
23+
; X86-BMI-NEXT: retl
24+
;
25+
; X64-NOBMI-LABEL: fold_and_xor_neg_v1_32:
26+
; X64-NOBMI: # %bb.0:
27+
; X64-NOBMI-NEXT: movl %edi, %eax
28+
; X64-NOBMI-NEXT: negl %eax
29+
; X64-NOBMI-NEXT: xorl %edi, %eax
30+
; X64-NOBMI-NEXT: andl %esi, %eax
31+
; X64-NOBMI-NEXT: retq
32+
;
33+
; X64-BMI-LABEL: fold_and_xor_neg_v1_32:
34+
; X64-BMI: # %bb.0:
35+
; X64-BMI-NEXT: blsmskl %edi, %eax
36+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
37+
; X64-BMI-NEXT: retq
38+
%neg = sub i32 0, %x
39+
%xor = xor i32 %x, %neg
40+
%and = and i32 %xor, %y
41+
ret i32 %and
42+
}
43+
44+
define i32 @fold_and_xor_neg_v2_32(i32 %x, i32 %y) nounwind {
45+
; X86-NOBMI-LABEL: fold_and_xor_neg_v2_32:
46+
; X86-NOBMI: # %bb.0:
47+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
48+
; X86-NOBMI-NEXT: movl %ecx, %eax
49+
; X86-NOBMI-NEXT: negl %eax
50+
; X86-NOBMI-NEXT: xorl %ecx, %eax
51+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
52+
; X86-NOBMI-NEXT: retl
53+
;
54+
; X86-BMI-LABEL: fold_and_xor_neg_v2_32:
55+
; X86-BMI: # %bb.0:
56+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
57+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
58+
; X86-BMI-NEXT: retl
59+
;
60+
; X64-NOBMI-LABEL: fold_and_xor_neg_v2_32:
61+
; X64-NOBMI: # %bb.0:
62+
; X64-NOBMI-NEXT: movl %edi, %eax
63+
; X64-NOBMI-NEXT: negl %eax
64+
; X64-NOBMI-NEXT: xorl %edi, %eax
65+
; X64-NOBMI-NEXT: andl %esi, %eax
66+
; X64-NOBMI-NEXT: retq
67+
;
68+
; X64-BMI-LABEL: fold_and_xor_neg_v2_32:
69+
; X64-BMI: # %bb.0:
70+
; X64-BMI-NEXT: blsmskl %edi, %eax
71+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
72+
; X64-BMI-NEXT: retq
73+
%neg = sub i32 0, %x
74+
%xor = xor i32 %x, %neg
75+
%and = and i32 %y, %xor
76+
ret i32 %and
77+
}
78+
79+
define i32 @fold_and_xor_neg_v3_32(i32 %x, i32 %y) nounwind {
80+
; X86-NOBMI-LABEL: fold_and_xor_neg_v3_32:
81+
; X86-NOBMI: # %bb.0:
82+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
83+
; X86-NOBMI-NEXT: movl %ecx, %eax
84+
; X86-NOBMI-NEXT: negl %eax
85+
; X86-NOBMI-NEXT: xorl %ecx, %eax
86+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
87+
; X86-NOBMI-NEXT: retl
88+
;
89+
; X86-BMI-LABEL: fold_and_xor_neg_v3_32:
90+
; X86-BMI: # %bb.0:
91+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
92+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
93+
; X86-BMI-NEXT: retl
94+
;
95+
; X64-NOBMI-LABEL: fold_and_xor_neg_v3_32:
96+
; X64-NOBMI: # %bb.0:
97+
; X64-NOBMI-NEXT: movl %edi, %eax
98+
; X64-NOBMI-NEXT: negl %eax
99+
; X64-NOBMI-NEXT: xorl %edi, %eax
100+
; X64-NOBMI-NEXT: andl %esi, %eax
101+
; X64-NOBMI-NEXT: retq
102+
;
103+
; X64-BMI-LABEL: fold_and_xor_neg_v3_32:
104+
; X64-BMI: # %bb.0:
105+
; X64-BMI-NEXT: blsmskl %edi, %eax
106+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
107+
; X64-BMI-NEXT: retq
108+
%neg = sub i32 0, %x
109+
%xor = xor i32 %neg, %x
110+
%and = and i32 %xor, %y
111+
ret i32 %and
112+
}
113+
114+
define i32 @fold_and_xor_neg_v4_32(i32 %x, i32 %y) nounwind {
115+
; X86-NOBMI-LABEL: fold_and_xor_neg_v4_32:
116+
; X86-NOBMI: # %bb.0:
117+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
118+
; X86-NOBMI-NEXT: movl %ecx, %eax
119+
; X86-NOBMI-NEXT: negl %eax
120+
; X86-NOBMI-NEXT: xorl %ecx, %eax
121+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
122+
; X86-NOBMI-NEXT: retl
123+
;
124+
; X86-BMI-LABEL: fold_and_xor_neg_v4_32:
125+
; X86-BMI: # %bb.0:
126+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
127+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
128+
; X86-BMI-NEXT: retl
129+
;
130+
; X64-NOBMI-LABEL: fold_and_xor_neg_v4_32:
131+
; X64-NOBMI: # %bb.0:
132+
; X64-NOBMI-NEXT: movl %edi, %eax
133+
; X64-NOBMI-NEXT: negl %eax
134+
; X64-NOBMI-NEXT: xorl %edi, %eax
135+
; X64-NOBMI-NEXT: andl %esi, %eax
136+
; X64-NOBMI-NEXT: retq
137+
;
138+
; X64-BMI-LABEL: fold_and_xor_neg_v4_32:
139+
; X64-BMI: # %bb.0:
140+
; X64-BMI-NEXT: blsmskl %edi, %eax
141+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
142+
; X64-BMI-NEXT: retq
143+
%neg = sub i32 0, %x
144+
%xor = xor i32 %neg, %x
145+
%and = and i32 %y, %xor
146+
ret i32 %and
147+
}
148+
149+
define i64 @fold_and_xor_neg_v1_64(i64 %x, i64 %y) nounwind {
150+
; X86-LABEL: fold_and_xor_neg_v1_64:
151+
; X86: # %bb.0:
152+
; X86-NEXT: pushl %esi
153+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
154+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
155+
; X86-NEXT: xorl %edx, %edx
156+
; X86-NEXT: movl %ecx, %eax
157+
; X86-NEXT: negl %eax
158+
; X86-NEXT: sbbl %esi, %edx
159+
; X86-NEXT: xorl %esi, %edx
160+
; X86-NEXT: xorl %ecx, %eax
161+
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
162+
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
163+
; X86-NEXT: popl %esi
164+
; X86-NEXT: retl
165+
;
166+
; X64-NOBMI-LABEL: fold_and_xor_neg_v1_64:
167+
; X64-NOBMI: # %bb.0:
168+
; X64-NOBMI-NEXT: movq %rdi, %rax
169+
; X64-NOBMI-NEXT: negq %rax
170+
; X64-NOBMI-NEXT: xorq %rdi, %rax
171+
; X64-NOBMI-NEXT: andq %rsi, %rax
172+
; X64-NOBMI-NEXT: retq
173+
;
174+
; X64-BMI-LABEL: fold_and_xor_neg_v1_64:
175+
; X64-BMI: # %bb.0:
176+
; X64-BMI-NEXT: blsmskq %rdi, %rax
177+
; X64-BMI-NEXT: andnq %rsi, %rax, %rax
178+
; X64-BMI-NEXT: retq
179+
%neg = sub i64 0, %x
180+
%xor = xor i64 %x, %neg
181+
%and = and i64 %xor, %y
182+
ret i64 %and
183+
}
184+
185+
; Negative test
186+
define i16 @fold_and_xor_neg_v1_16_negative(i16 %x, i16 %y) nounwind {
187+
; X86-LABEL: fold_and_xor_neg_v1_16_negative:
188+
; X86: # %bb.0:
189+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
190+
; X86-NEXT: movl %ecx, %eax
191+
; X86-NEXT: negl %eax
192+
; X86-NEXT: xorl %ecx, %eax
193+
; X86-NEXT: andw {{[0-9]+}}(%esp), %ax
194+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
195+
; X86-NEXT: retl
196+
;
197+
; X64-LABEL: fold_and_xor_neg_v1_16_negative:
198+
; X64: # %bb.0:
199+
; X64-NEXT: movl %edi, %eax
200+
; X64-NEXT: negl %eax
201+
; X64-NEXT: xorl %edi, %eax
202+
; X64-NEXT: andl %esi, %eax
203+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
204+
; X64-NEXT: retq
205+
%neg = sub i16 0, %x
206+
%xor = xor i16 %x, %neg
207+
%and = and i16 %xor, %y
208+
ret i16 %and
209+
}
210+
211+
; Negative test
212+
define <4 x i32> @fold_and_xor_neg_v1_v4x32_negative(<4 x i32> %x, <4 x i32> %y) nounwind {
213+
; X86-LABEL: fold_and_xor_neg_v1_v4x32_negative:
214+
; X86: # %bb.0:
215+
; X86-NEXT: pxor %xmm2, %xmm2
216+
; X86-NEXT: psubd %xmm0, %xmm2
217+
; X86-NEXT: pxor %xmm2, %xmm0
218+
; X86-NEXT: pand %xmm1, %xmm0
219+
; X86-NEXT: retl
220+
;
221+
; X64-LABEL: fold_and_xor_neg_v1_v4x32_negative:
222+
; X64: # %bb.0:
223+
; X64-NEXT: pxor %xmm2, %xmm2
224+
; X64-NEXT: psubd %xmm0, %xmm2
225+
; X64-NEXT: pxor %xmm2, %xmm0
226+
; X64-NEXT: pand %xmm1, %xmm0
227+
; X64-NEXT: retq
228+
%neg = sub <4 x i32> zeroinitializer, %x
229+
%xor = xor <4 x i32> %x, %neg
230+
%and = and <4 x i32> %xor, %y
231+
ret <4 x i32> %and
232+
}
233+
234+
; Negative test
235+
define i32 @fold_and_xor_neg_v1_32_two_uses_xor_negative(i32 %x, i32 %y) nounwind {
236+
; X86-LABEL: fold_and_xor_neg_v1_32_two_uses_xor_negative:
237+
; X86: # %bb.0:
238+
; X86-NEXT: pushl %esi
239+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
240+
; X86-NEXT: movl %eax, %ecx
241+
; X86-NEXT: negl %ecx
242+
; X86-NEXT: xorl %eax, %ecx
243+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
244+
; X86-NEXT: andl %ecx, %esi
245+
; X86-NEXT: pushl %ecx
246+
; X86-NEXT: calll use@PLT
247+
; X86-NEXT: addl $4, %esp
248+
; X86-NEXT: movl %esi, %eax
249+
; X86-NEXT: popl %esi
250+
; X86-NEXT: retl
251+
;
252+
; X64-LABEL: fold_and_xor_neg_v1_32_two_uses_xor_negative:
253+
; X64: # %bb.0:
254+
; X64-NEXT: pushq %rbx
255+
; X64-NEXT: movl %esi, %ebx
256+
; X64-NEXT: movl %edi, %eax
257+
; X64-NEXT: negl %eax
258+
; X64-NEXT: xorl %eax, %edi
259+
; X64-NEXT: andl %edi, %ebx
260+
; X64-NEXT: callq use@PLT
261+
; X64-NEXT: movl %ebx, %eax
262+
; X64-NEXT: popq %rbx
263+
; X64-NEXT: retq
264+
%neg = sub i32 0, %x
265+
%xor = xor i32 %x, %neg
266+
%and = and i32 %xor, %y
267+
call void @use(i32 %xor)
268+
ret i32 %and
269+
}
270+
271+
; Negative test
272+
define i32 @fold_and_xor_neg_v1_32_two_uses_sub_negative(i32 %x, i32 %y) nounwind {
273+
; X86-LABEL: fold_and_xor_neg_v1_32_two_uses_sub_negative:
274+
; X86: # %bb.0:
275+
; X86-NEXT: pushl %esi
276+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
277+
; X86-NEXT: movl %esi, %eax
278+
; X86-NEXT: negl %eax
279+
; X86-NEXT: xorl %eax, %esi
280+
; X86-NEXT: andl {{[0-9]+}}(%esp), %esi
281+
; X86-NEXT: pushl %eax
282+
; X86-NEXT: calll use@PLT
283+
; X86-NEXT: addl $4, %esp
284+
; X86-NEXT: movl %esi, %eax
285+
; X86-NEXT: popl %esi
286+
; X86-NEXT: retl
287+
;
288+
; X64-LABEL: fold_and_xor_neg_v1_32_two_uses_sub_negative:
289+
; X64: # %bb.0:
290+
; X64-NEXT: pushq %rbx
291+
; X64-NEXT: movl %edi, %ebx
292+
; X64-NEXT: negl %edi
293+
; X64-NEXT: xorl %edi, %ebx
294+
; X64-NEXT: andl %esi, %ebx
295+
; X64-NEXT: callq use@PLT
296+
; X64-NEXT: movl %ebx, %eax
297+
; X64-NEXT: popq %rbx
298+
; X64-NEXT: retq
299+
%neg = sub i32 0, %x
300+
%xor = xor i32 %x, %neg
301+
%and = and i32 %xor, %y
302+
call void @use(i32 %neg)
303+
ret i32 %and
304+
}
305+
306+
; Negative test
307+
define i32 @fold_and_xor_neg_v1_32_no_blsmsk_negative(i32 %x, i32 %y, i32 %z) nounwind {
308+
; X86-LABEL: fold_and_xor_neg_v1_32_no_blsmsk_negative:
309+
; X86: # %bb.0:
310+
; X86-NEXT: xorl %eax, %eax
311+
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
312+
; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
313+
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
314+
; X86-NEXT: retl
315+
;
316+
; X64-LABEL: fold_and_xor_neg_v1_32_no_blsmsk_negative:
317+
; X64: # %bb.0:
318+
; X64-NEXT: movl %edx, %eax
319+
; X64-NEXT: negl %eax
320+
; X64-NEXT: xorl %edi, %eax
321+
; X64-NEXT: andl %esi, %eax
322+
; X64-NEXT: retq
323+
%neg = sub i32 0, %z
324+
%xor = xor i32 %x, %neg
325+
%and = and i32 %xor, %y
326+
ret i32 %and
327+
}

0 commit comments

Comments
 (0)