Skip to content

Commit c941de3

Browse files
committed
[X86] Fold AND(Y, XOR(X, SUB(0, X))) to ANDN(Y, BLSMSK(X))
XOR(X, SUB(0, X)) corresponds to a bitwise-negated BLSMSK instruction (i.e., x ^ (x - 1)). On its own, this transformation is probably not really profitable but when the XOR operation is an operand of an AND operation, we can use an ANDN instruction to reduce the number of emitted instructions by one. Fixes #103501.
1 parent d378ea6 commit c941de3

File tree

2 files changed

+151
-72
lines changed

2 files changed

+151
-72
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51045,6 +51045,31 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
5104551045
return SDValue();
5104651046
}
5104751047

51048+
/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
51049+
static SDValue combineAndXorSubWithBMI(SDValue Op, SDValue OtherOp, SDLoc DL,
51050+
SelectionDAG &DAG,
51051+
const X86Subtarget &Subtarget) {
51052+
using namespace llvm::SDPatternMatch;
51053+
51054+
EVT VT = Op.getValueType();
51055+
// Make sure this node is a candidate for BMI instructions.
51056+
if (!Subtarget.hasBMI() || !VT.isScalarInteger() ||
51057+
(VT != MVT::i32 && VT != MVT::i64))
51058+
return SDValue();
51059+
51060+
SDValue X;
51061+
if (!sd_match(Op,
51062+
m_OneUse(m_Xor(m_Value(X), m_OneUse(m_Neg(m_Deferred(X)))))))
51063+
return SDValue();
51064+
51065+
SDValue BLSMSK =
51066+
DAG.getNode(ISD::XOR, DL, VT, X,
51067+
DAG.getNode(ISD::SUB, DL, VT, X, DAG.getConstant(1, DL, VT)));
51068+
SDValue AndN = DAG.getNode(ISD::AND, SDLoc(Op), VT, OtherOp,
51069+
DAG.getNOT(SDLoc(Op), BLSMSK, VT));
51070+
return AndN;
51071+
}
51072+
5104851073
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
5104951074
SelectionDAG &DAG,
5105051075
TargetLowering::DAGCombinerInfo &DCI,
@@ -51453,6 +51478,11 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5145351478
if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
5145451479
return R;
5145551480

51481+
if (SDValue R = combineAndXorSubWithBMI(N0, N1, dl, DAG, Subtarget))
51482+
return R;
51483+
if (SDValue R = combineAndXorSubWithBMI(N1, N0, dl, DAG, Subtarget))
51484+
return R;
51485+
5145651486
return SDValue();
5145751487
}
5145851488

llvm/test/CodeGen/X86/andnot-blsmsk.ll

Lines changed: 121 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -7,91 +7,139 @@
77
declare void @use(i32)
88

99
define i32 @fold_and_xor_neg_v1_32(i32 %x, i32 %y) {
10-
; X86-LABEL: fold_and_xor_neg_v1_32:
11-
; X86: # %bb.0:
12-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
13-
; X86-NEXT: movl %ecx, %eax
14-
; X86-NEXT: negl %eax
15-
; X86-NEXT: xorl %ecx, %eax
16-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
17-
; X86-NEXT: retl
10+
; X86-NOBMI-LABEL: fold_and_xor_neg_v1_32:
11+
; X86-NOBMI: # %bb.0:
12+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
13+
; X86-NOBMI-NEXT: movl %ecx, %eax
14+
; X86-NOBMI-NEXT: negl %eax
15+
; X86-NOBMI-NEXT: xorl %ecx, %eax
16+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
17+
; X86-NOBMI-NEXT: retl
1818
;
19-
; X64-LABEL: fold_and_xor_neg_v1_32:
20-
; X64: # %bb.0:
21-
; X64-NEXT: movl %edi, %eax
22-
; X64-NEXT: negl %eax
23-
; X64-NEXT: xorl %edi, %eax
24-
; X64-NEXT: andl %esi, %eax
25-
; X64-NEXT: retq
19+
; X86-BMI-LABEL: fold_and_xor_neg_v1_32:
20+
; X86-BMI: # %bb.0:
21+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
22+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
23+
; X86-BMI-NEXT: retl
24+
;
25+
; X64-NOBMI-LABEL: fold_and_xor_neg_v1_32:
26+
; X64-NOBMI: # %bb.0:
27+
; X64-NOBMI-NEXT: movl %edi, %eax
28+
; X64-NOBMI-NEXT: negl %eax
29+
; X64-NOBMI-NEXT: xorl %edi, %eax
30+
; X64-NOBMI-NEXT: andl %esi, %eax
31+
; X64-NOBMI-NEXT: retq
32+
;
33+
; X64-BMI-LABEL: fold_and_xor_neg_v1_32:
34+
; X64-BMI: # %bb.0:
35+
; X64-BMI-NEXT: blsmskl %edi, %eax
36+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
37+
; X64-BMI-NEXT: retq
2638
%neg = sub i32 0, %x
2739
%xor = xor i32 %x, %neg
2840
%and = and i32 %xor, %y
2941
ret i32 %and
3042
}
3143

3244
define i32 @fold_and_xor_neg_v2_32(i32 %x, i32 %y) {
33-
; X86-LABEL: fold_and_xor_neg_v2_32:
34-
; X86: # %bb.0:
35-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
36-
; X86-NEXT: movl %ecx, %eax
37-
; X86-NEXT: negl %eax
38-
; X86-NEXT: xorl %ecx, %eax
39-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
40-
; X86-NEXT: retl
45+
; X86-NOBMI-LABEL: fold_and_xor_neg_v2_32:
46+
; X86-NOBMI: # %bb.0:
47+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
48+
; X86-NOBMI-NEXT: movl %ecx, %eax
49+
; X86-NOBMI-NEXT: negl %eax
50+
; X86-NOBMI-NEXT: xorl %ecx, %eax
51+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
52+
; X86-NOBMI-NEXT: retl
4153
;
42-
; X64-LABEL: fold_and_xor_neg_v2_32:
43-
; X64: # %bb.0:
44-
; X64-NEXT: movl %edi, %eax
45-
; X64-NEXT: negl %eax
46-
; X64-NEXT: xorl %edi, %eax
47-
; X64-NEXT: andl %esi, %eax
48-
; X64-NEXT: retq
54+
; X86-BMI-LABEL: fold_and_xor_neg_v2_32:
55+
; X86-BMI: # %bb.0:
56+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
57+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
58+
; X86-BMI-NEXT: retl
59+
;
60+
; X64-NOBMI-LABEL: fold_and_xor_neg_v2_32:
61+
; X64-NOBMI: # %bb.0:
62+
; X64-NOBMI-NEXT: movl %edi, %eax
63+
; X64-NOBMI-NEXT: negl %eax
64+
; X64-NOBMI-NEXT: xorl %edi, %eax
65+
; X64-NOBMI-NEXT: andl %esi, %eax
66+
; X64-NOBMI-NEXT: retq
67+
;
68+
; X64-BMI-LABEL: fold_and_xor_neg_v2_32:
69+
; X64-BMI: # %bb.0:
70+
; X64-BMI-NEXT: blsmskl %edi, %eax
71+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
72+
; X64-BMI-NEXT: retq
4973
%neg = sub i32 0, %x
5074
%xor = xor i32 %x, %neg
5175
%and = and i32 %y, %xor
5276
ret i32 %and
5377
}
5478

5579
define i32 @fold_and_xor_neg_v3_32(i32 %x, i32 %y) {
56-
; X86-LABEL: fold_and_xor_neg_v3_32:
57-
; X86: # %bb.0:
58-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
59-
; X86-NEXT: movl %ecx, %eax
60-
; X86-NEXT: negl %eax
61-
; X86-NEXT: xorl %ecx, %eax
62-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
63-
; X86-NEXT: retl
80+
; X86-NOBMI-LABEL: fold_and_xor_neg_v3_32:
81+
; X86-NOBMI: # %bb.0:
82+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
83+
; X86-NOBMI-NEXT: movl %ecx, %eax
84+
; X86-NOBMI-NEXT: negl %eax
85+
; X86-NOBMI-NEXT: xorl %ecx, %eax
86+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
87+
; X86-NOBMI-NEXT: retl
6488
;
65-
; X64-LABEL: fold_and_xor_neg_v3_32:
66-
; X64: # %bb.0:
67-
; X64-NEXT: movl %edi, %eax
68-
; X64-NEXT: negl %eax
69-
; X64-NEXT: xorl %edi, %eax
70-
; X64-NEXT: andl %esi, %eax
71-
; X64-NEXT: retq
89+
; X86-BMI-LABEL: fold_and_xor_neg_v3_32:
90+
; X86-BMI: # %bb.0:
91+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
92+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
93+
; X86-BMI-NEXT: retl
94+
;
95+
; X64-NOBMI-LABEL: fold_and_xor_neg_v3_32:
96+
; X64-NOBMI: # %bb.0:
97+
; X64-NOBMI-NEXT: movl %edi, %eax
98+
; X64-NOBMI-NEXT: negl %eax
99+
; X64-NOBMI-NEXT: xorl %edi, %eax
100+
; X64-NOBMI-NEXT: andl %esi, %eax
101+
; X64-NOBMI-NEXT: retq
102+
;
103+
; X64-BMI-LABEL: fold_and_xor_neg_v3_32:
104+
; X64-BMI: # %bb.0:
105+
; X64-BMI-NEXT: blsmskl %edi, %eax
106+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
107+
; X64-BMI-NEXT: retq
72108
%neg = sub i32 0, %x
73109
%xor = xor i32 %neg, %x
74110
%and = and i32 %xor, %y
75111
ret i32 %and
76112
}
77113

78114
define i32 @fold_and_xor_neg_v4_32(i32 %x, i32 %y) {
79-
; X86-LABEL: fold_and_xor_neg_v4_32:
80-
; X86: # %bb.0:
81-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
82-
; X86-NEXT: movl %ecx, %eax
83-
; X86-NEXT: negl %eax
84-
; X86-NEXT: xorl %ecx, %eax
85-
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
86-
; X86-NEXT: retl
115+
; X86-NOBMI-LABEL: fold_and_xor_neg_v4_32:
116+
; X86-NOBMI: # %bb.0:
117+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
118+
; X86-NOBMI-NEXT: movl %ecx, %eax
119+
; X86-NOBMI-NEXT: negl %eax
120+
; X86-NOBMI-NEXT: xorl %ecx, %eax
121+
; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
122+
; X86-NOBMI-NEXT: retl
87123
;
88-
; X64-LABEL: fold_and_xor_neg_v4_32:
89-
; X64: # %bb.0:
90-
; X64-NEXT: movl %edi, %eax
91-
; X64-NEXT: negl %eax
92-
; X64-NEXT: xorl %edi, %eax
93-
; X64-NEXT: andl %esi, %eax
94-
; X64-NEXT: retq
124+
; X86-BMI-LABEL: fold_and_xor_neg_v4_32:
125+
; X86-BMI: # %bb.0:
126+
; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
127+
; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
128+
; X86-BMI-NEXT: retl
129+
;
130+
; X64-NOBMI-LABEL: fold_and_xor_neg_v4_32:
131+
; X64-NOBMI: # %bb.0:
132+
; X64-NOBMI-NEXT: movl %edi, %eax
133+
; X64-NOBMI-NEXT: negl %eax
134+
; X64-NOBMI-NEXT: xorl %edi, %eax
135+
; X64-NOBMI-NEXT: andl %esi, %eax
136+
; X64-NOBMI-NEXT: retq
137+
;
138+
; X64-BMI-LABEL: fold_and_xor_neg_v4_32:
139+
; X64-BMI: # %bb.0:
140+
; X64-BMI-NEXT: blsmskl %edi, %eax
141+
; X64-BMI-NEXT: andnl %esi, %eax, %eax
142+
; X64-BMI-NEXT: retq
95143
%neg = sub i32 0, %x
96144
%xor = xor i32 %neg, %x
97145
%and = and i32 %y, %xor
@@ -118,13 +166,19 @@ define i64 @fold_and_xor_neg_v1_64(i64 %x, i64 %y) {
118166
; X86-NEXT: .cfi_def_cfa_offset 4
119167
; X86-NEXT: retl
120168
;
121-
; X64-LABEL: fold_and_xor_neg_v1_64:
122-
; X64: # %bb.0:
123-
; X64-NEXT: movq %rdi, %rax
124-
; X64-NEXT: negq %rax
125-
; X64-NEXT: xorq %rdi, %rax
126-
; X64-NEXT: andq %rsi, %rax
127-
; X64-NEXT: retq
169+
; X64-NOBMI-LABEL: fold_and_xor_neg_v1_64:
170+
; X64-NOBMI: # %bb.0:
171+
; X64-NOBMI-NEXT: movq %rdi, %rax
172+
; X64-NOBMI-NEXT: negq %rax
173+
; X64-NOBMI-NEXT: xorq %rdi, %rax
174+
; X64-NOBMI-NEXT: andq %rsi, %rax
175+
; X64-NOBMI-NEXT: retq
176+
;
177+
; X64-BMI-LABEL: fold_and_xor_neg_v1_64:
178+
; X64-BMI: # %bb.0:
179+
; X64-BMI-NEXT: blsmskq %rdi, %rax
180+
; X64-BMI-NEXT: andnq %rsi, %rax, %rax
181+
; X64-BMI-NEXT: retq
128182
%neg = sub i64 0, %x
129183
%xor = xor i64 %x, %neg
130184
%and = and i64 %xor, %y
@@ -290,8 +344,3 @@ define i32 @fold_and_xor_neg_v1_32_no_blsmsk_negative(i32 %x, i32 %y, i32 %z) {
290344
%and = and i32 %xor, %y
291345
ret i32 %and
292346
}
293-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
294-
; X64-BMI: {{.*}}
295-
; X64-NOBMI: {{.*}}
296-
; X86-BMI: {{.*}}
297-
; X86-NOBMI: {{.*}}

0 commit comments

Comments
 (0)