Skip to content

Commit 2365de5

Browse files
committed
[X86] Fold AND(Y, XOR(X, SUB(0, X))) to ANDN(Y, BLSMSK(X))
XOR(X, SUB(0, X)) corresponds to a bitwise-negated BLSMSK instruction (i.e., x ^ (x - 1)). On its own, this transformation is probably not really profitable but when the XOR operation is an operand of an AND operation, we can use an ANDN instruction to reduce the number of emitted instructions by one. Fixes #103501.
1 parent c2b477d commit 2365de5

File tree

2 files changed

+149
-72
lines changed

2 files changed

+149
-72
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51045,6 +51045,31 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
5104551045
return SDValue();
5104651046
}
5104751047

51048+
/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
51049+
static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
51050+
SelectionDAG &DAG,
51051+
const X86Subtarget &Subtarget) {
51052+
using namespace llvm::SDPatternMatch;
51053+
51054+
EVT VT = And->getValueType(0);
51055+
// Make sure this node is a candidate for BMI instructions.
51056+
if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
51057+
return SDValue();
51058+
51059+
SDValue X;
51060+
SDValue Y;
51061+
if (!sd_match(And, m_And(m_OneUse(m_Xor(m_Value(X),
51062+
m_OneUse(m_Neg(m_Deferred(X))))),
51063+
m_Value(Y))))
51064+
return SDValue();
51065+
51066+
SDValue BLSMSK =
51067+
DAG.getNode(ISD::XOR, DL, VT, X,
51068+
DAG.getNode(ISD::SUB, DL, VT, X, DAG.getConstant(1, DL, VT)));
51069+
SDValue AndN = DAG.getNode(ISD::AND, DL, VT, Y, DAG.getNOT(DL, BLSMSK, VT));
51070+
return AndN;
51071+
}
51072+
5104851073
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
5104951074
SelectionDAG &DAG,
5105051075
TargetLowering::DAGCombinerInfo &DCI,
@@ -51453,6 +51478,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5145351478
if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
5145451479
return R;
5145551480

51481+
if (SDValue R = combineAndXorSubWithBMI(N, dl, DAG, Subtarget))
51482+
return R;
51483+
5145651484
return SDValue();
5145751485
}
5145851486

llvm/test/CodeGen/X86/andnot-blsmsk.ll

Lines changed: 121 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -7,91 +7,139 @@
77
declare void @use(i32)
88

99
define i32 @fold_and_xor_neg_v1_32(i32 %x, i32 %y) nounwind {
10-
; X86-LABEL: fold_and_xor_neg_v1_32:
11-
; X86: # %bb.0:
12-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
13-
; X86-NEXT: movl %ecx, %eax
14-
; X86-NEXT: negl %eax
15-
; X86-NEXT: xorl %ecx, %eax
16-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
17-
; X86-NEXT: retl
10+
; X86-NOBMI-LABEL: fold_and_xor_neg_v1_32:
11+
; X86-NOBMI: # %bb.0:
12+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
13+
; X86-NOBMI-NEXT: movl %ecx, %eax
14+
; X86-NOBMI-NEXT: negl %eax
15+
; X86-NOBMI-NEXT: xorl %ecx, %eax
16+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
17+
; X86-NOBMI-NEXT: retl
1818
;
19-
; X64-LABEL: fold_and_xor_neg_v1_32:
20-
; X64: # %bb.0:
21-
; X64-NEXT: movl %edi, %eax
22-
; X64-NEXT: negl %eax
23-
; X64-NEXT: xorl %edi, %eax
24-
; X64-NEXT: andl %esi, %eax
25-
; X64-NEXT: retq
19+
; X86-BMI-LABEL: fold_and_xor_neg_v1_32:
20+
; X86-BMI: # %bb.0:
21+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
22+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
23+
; X86-BMI-NEXT: retl
24+
;
25+
; X64-NOBMI-LABEL: fold_and_xor_neg_v1_32:
26+
; X64-NOBMI: # %bb.0:
27+
; X64-NOBMI-NEXT: movl %edi, %eax
28+
; X64-NOBMI-NEXT: negl %eax
29+
; X64-NOBMI-NEXT: xorl %edi, %eax
30+
; X64-NOBMI-NEXT: andl %esi, %eax
31+
; X64-NOBMI-NEXT: retq
32+
;
33+
; X64-BMI-LABEL: fold_and_xor_neg_v1_32:
34+
; X64-BMI: # %bb.0:
35+
; X64-BMI-NEXT: blsmskl %edi, %eax
36+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
37+
; X64-BMI-NEXT: retq
2638
%neg = sub i32 0, %x
2739
%xor = xor i32 %x, %neg
2840
%and = and i32 %xor, %y
2941
ret i32 %and
3042
}
3143

3244
define i32 @fold_and_xor_neg_v2_32(i32 %x, i32 %y) nounwind {
33-
; X86-LABEL: fold_and_xor_neg_v2_32:
34-
; X86: # %bb.0:
35-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
36-
; X86-NEXT: movl %ecx, %eax
37-
; X86-NEXT: negl %eax
38-
; X86-NEXT: xorl %ecx, %eax
39-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
40-
; X86-NEXT: retl
45+
; X86-NOBMI-LABEL: fold_and_xor_neg_v2_32:
46+
; X86-NOBMI: # %bb.0:
47+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
48+
; X86-NOBMI-NEXT: movl %ecx, %eax
49+
; X86-NOBMI-NEXT: negl %eax
50+
; X86-NOBMI-NEXT: xorl %ecx, %eax
51+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
52+
; X86-NOBMI-NEXT: retl
4153
;
42-
; X64-LABEL: fold_and_xor_neg_v2_32:
43-
; X64: # %bb.0:
44-
; X64-NEXT: movl %edi, %eax
45-
; X64-NEXT: negl %eax
46-
; X64-NEXT: xorl %edi, %eax
47-
; X64-NEXT: andl %esi, %eax
48-
; X64-NEXT: retq
54+
; X86-BMI-LABEL: fold_and_xor_neg_v2_32:
55+
; X86-BMI: # %bb.0:
56+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
57+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
58+
; X86-BMI-NEXT: retl
59+
;
60+
; X64-NOBMI-LABEL: fold_and_xor_neg_v2_32:
61+
; X64-NOBMI: # %bb.0:
62+
; X64-NOBMI-NEXT: movl %edi, %eax
63+
; X64-NOBMI-NEXT: negl %eax
64+
; X64-NOBMI-NEXT: xorl %edi, %eax
65+
; X64-NOBMI-NEXT: andl %esi, %eax
66+
; X64-NOBMI-NEXT: retq
67+
;
68+
; X64-BMI-LABEL: fold_and_xor_neg_v2_32:
69+
; X64-BMI: # %bb.0:
70+
; X64-BMI-NEXT: blsmskl %edi, %eax
71+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
72+
; X64-BMI-NEXT: retq
4973
%neg = sub i32 0, %x
5074
%xor = xor i32 %x, %neg
5175
%and = and i32 %y, %xor
5276
ret i32 %and
5377
}
5478

5579
define i32 @fold_and_xor_neg_v3_32(i32 %x, i32 %y) nounwind {
56-
; X86-LABEL: fold_and_xor_neg_v3_32:
57-
; X86: # %bb.0:
58-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
59-
; X86-NEXT: movl %ecx, %eax
60-
; X86-NEXT: negl %eax
61-
; X86-NEXT: xorl %ecx, %eax
62-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
63-
; X86-NEXT: retl
80+
; X86-NOBMI-LABEL: fold_and_xor_neg_v3_32:
81+
; X86-NOBMI: # %bb.0:
82+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
83+
; X86-NOBMI-NEXT: movl %ecx, %eax
84+
; X86-NOBMI-NEXT: negl %eax
85+
; X86-NOBMI-NEXT: xorl %ecx, %eax
86+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
87+
; X86-NOBMI-NEXT: retl
6488
;
65-
; X64-LABEL: fold_and_xor_neg_v3_32:
66-
; X64: # %bb.0:
67-
; X64-NEXT: movl %edi, %eax
68-
; X64-NEXT: negl %eax
69-
; X64-NEXT: xorl %edi, %eax
70-
; X64-NEXT: andl %esi, %eax
71-
; X64-NEXT: retq
89+
; X86-BMI-LABEL: fold_and_xor_neg_v3_32:
90+
; X86-BMI: # %bb.0:
91+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
92+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
93+
; X86-BMI-NEXT: retl
94+
;
95+
; X64-NOBMI-LABEL: fold_and_xor_neg_v3_32:
96+
; X64-NOBMI: # %bb.0:
97+
; X64-NOBMI-NEXT: movl %edi, %eax
98+
; X64-NOBMI-NEXT: negl %eax
99+
; X64-NOBMI-NEXT: xorl %edi, %eax
100+
; X64-NOBMI-NEXT: andl %esi, %eax
101+
; X64-NOBMI-NEXT: retq
102+
;
103+
; X64-BMI-LABEL: fold_and_xor_neg_v3_32:
104+
; X64-BMI: # %bb.0:
105+
; X64-BMI-NEXT: blsmskl %edi, %eax
106+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
107+
; X64-BMI-NEXT: retq
72108
%neg = sub i32 0, %x
73109
%xor = xor i32 %neg, %x
74110
%and = and i32 %xor, %y
75111
ret i32 %and
76112
}
77113

78114
define i32 @fold_and_xor_neg_v4_32(i32 %x, i32 %y) nounwind {
79-
; X86-LABEL: fold_and_xor_neg_v4_32:
80-
; X86: # %bb.0:
81-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
82-
; X86-NEXT: movl %ecx, %eax
83-
; X86-NEXT: negl %eax
84-
; X86-NEXT: xorl %ecx, %eax
85-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
86-
; X86-NEXT: retl
115+
; X86-NOBMI-LABEL: fold_and_xor_neg_v4_32:
116+
; X86-NOBMI: # %bb.0:
117+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
118+
; X86-NOBMI-NEXT: movl %ecx, %eax
119+
; X86-NOBMI-NEXT: negl %eax
120+
; X86-NOBMI-NEXT: xorl %ecx, %eax
121+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
122+
; X86-NOBMI-NEXT: retl
87123
;
88-
; X64-LABEL: fold_and_xor_neg_v4_32:
89-
; X64: # %bb.0:
90-
; X64-NEXT: movl %edi, %eax
91-
; X64-NEXT: negl %eax
92-
; X64-NEXT: xorl %edi, %eax
93-
; X64-NEXT: andl %esi, %eax
94-
; X64-NEXT: retq
124+
; X86-BMI-LABEL: fold_and_xor_neg_v4_32:
125+
; X86-BMI: # %bb.0:
126+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
127+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
128+
; X86-BMI-NEXT: retl
129+
;
130+
; X64-NOBMI-LABEL: fold_and_xor_neg_v4_32:
131+
; X64-NOBMI: # %bb.0:
132+
; X64-NOBMI-NEXT: movl %edi, %eax
133+
; X64-NOBMI-NEXT: negl %eax
134+
; X64-NOBMI-NEXT: xorl %edi, %eax
135+
; X64-NOBMI-NEXT: andl %esi, %eax
136+
; X64-NOBMI-NEXT: retq
137+
;
138+
; X64-BMI-LABEL: fold_and_xor_neg_v4_32:
139+
; X64-BMI: # %bb.0:
140+
; X64-BMI-NEXT: blsmskl %edi, %eax
141+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
142+
; X64-BMI-NEXT: retq
95143
%neg = sub i32 0, %x
96144
%xor = xor i32 %neg, %x
97145
%and = and i32 %y, %xor
@@ -115,13 +163,19 @@ define i64 @fold_and_xor_neg_v1_64(i64 %x, i64 %y) nounwind {
115163
; X86-NEXT: popl %esi
116164
; X86-NEXT: retl
117165
;
118-
; X64-LABEL: fold_and_xor_neg_v1_64:
119-
; X64: # %bb.0:
120-
; X64-NEXT: movq %rdi, %rax
121-
; X64-NEXT: negq %rax
122-
; X64-NEXT: xorq %rdi, %rax
123-
; X64-NEXT: andq %rsi, %rax
124-
; X64-NEXT: retq
166+
; X64-NOBMI-LABEL: fold_and_xor_neg_v1_64:
167+
; X64-NOBMI: # %bb.0:
168+
; X64-NOBMI-NEXT: movq %rdi, %rax
169+
; X64-NOBMI-NEXT: negq %rax
170+
; X64-NOBMI-NEXT: xorq %rdi, %rax
171+
; X64-NOBMI-NEXT: andq %rsi, %rax
172+
; X64-NOBMI-NEXT: retq
173+
;
174+
; X64-BMI-LABEL: fold_and_xor_neg_v1_64:
175+
; X64-BMI: # %bb.0:
176+
; X64-BMI-NEXT: blsmskq %rdi, %rax
177+
; X64-BMI-NEXT: andnq %rsi, %rax, %rax
178+
; X64-BMI-NEXT: retq
125179
%neg = sub i64 0, %x
126180
%xor = xor i64 %x, %neg
127181
%and = and i64 %xor, %y
@@ -271,8 +325,3 @@ define i32 @fold_and_xor_neg_v1_32_no_blsmsk_negative(i32 %x, i32 %y, i32 %z) no
271325
%and = and i32 %xor, %y
272326
ret i32 %and
273327
}
274-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
275-
; X64-BMI: {{.*}}
276-
; X64-NOBMI: {{.*}}
277-
; X86-BMI: {{.*}}
278-
; X86-NOBMI: {{.*}}

0 commit comments

Comments
 (0)