Skip to content

Commit e981e6d

Browse files
committed
Add transform for (and/or (icmp eq/ne A,-1),(icmp eq/ne A,-1+C))->(and/or (icmp eq/ne (and ~A,-1+C),0))
This works of `-1+C` is a negative power of 2. This can be more useful than the `AddAnd` case as `~A` does not necessarily require materializing a constant. This makes the transform worth it for X86 vector types. Alive2 Links: EQ: https://alive2.llvm.org/ce/z/P6u8cq NE: https://alive2.llvm.org/ce/z/_Kkqp1 Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D144284
1 parent 8c74c54 commit e981e6d

File tree

4 files changed

+91
-76
lines changed

4 files changed

+91
-76
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,10 +284,11 @@ class TargetLoweringBase {
284284

285285
/// Enum of different potentially desirable ways to fold (and/or (setcc ...),
286286
/// (setcc ...)).
287-
enum class AndOrSETCCFoldKind {
288-
None,
289-
AddAnd,
290-
ABS,
287+
enum AndOrSETCCFoldKind : uint8_t {
288+
None = 0, // No fold is preferable.
289+
AddAnd = 1, // Fold with `Add` op and `And` op is preferable.
290+
NotAnd = 2, // Fold with `Not` op and `And` op is preferable.
291+
ABS = 4, // Fold with `llvm.abs` op is preferable.
291292
};
292293

293294
class ArgListEntry {

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5926,7 +5926,7 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
59265926
// Preference is to use ISD::ABS or we already have an ISD::ABS (in which
59275927
// case this is just a compare).
59285928
if (APLhs == (-APRhs) &&
5929-
(TargetPreference == AndOrSETCCFoldKind::ABS ||
5929+
((TargetPreference & AndOrSETCCFoldKind::ABS) ||
59305930
DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
59315931
const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
59325932
// (icmp eq A, C) | (icmp eq A, -C)
@@ -5936,23 +5936,45 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
59365936
SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
59375937
return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
59385938
DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
5939-
} else if (TargetPreference == AndOrSETCCFoldKind::AddAnd) {
5939+
} else if (TargetPreference &
5940+
(AndOrSETCCFoldKind::AddAnd | AndOrSETCCFoldKind::NotAnd)) {
5941+
5942+
// AndOrSETCCFoldKind::AddAnd:
59405943
// A == C0 | A == C1
59415944
// IF IsPow2(smax(C0, C1)-smin(C0, C1))
59425945
// -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
59435946
// A != C0 & A != C1
59445947
// IF IsPow2(smax(C0, C1)-smin(C0, C1))
59455948
// -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
5949+
5950+
// AndOrSETCCFoldKind::NotAnd:
5951+
// A == C0 | A == C1
5952+
// IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
5953+
// -> ~A & smin(C0, C1) == 0
5954+
// A != C0 & A != C1
5955+
// IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
5956+
// -> ~A & smin(C0, C1) != 0
5957+
59465958
const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
59475959
const APInt &MinC = APIntOps::smin(APRhs, APLhs);
59485960
APInt Dif = MaxC - MinC;
59495961
if (!Dif.isZero() && Dif.isPowerOf2()) {
5950-
SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
5951-
DAG.getConstant(-MinC, DL, OpVT));
5952-
SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
5953-
DAG.getConstant(~Dif, DL, OpVT));
5954-
return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
5955-
DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
5962+
if (MaxC.isAllOnes() &&
5963+
(TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
5964+
SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
5965+
SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
5966+
DAG.getConstant(MinC, DL, OpVT));
5967+
return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
5968+
DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
5969+
} else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
5970+
5971+
SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
5972+
DAG.getConstant(-MinC, DL, OpVT));
5973+
SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
5974+
DAG.getConstant(~Dif, DL, OpVT));
5975+
return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
5976+
DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
5977+
}
59565978
}
59575979
}
59585980
}

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57260,9 +57260,18 @@ X86TargetLowering::isDesirableToCombineLogicOpOfSETCC(
5726057260
EVT OpVT = SETCC0->getOperand(0).getValueType();
5726157261
if (!VT.isInteger())
5726257262
return AndOrSETCCFoldKind::None;
57263+
5726357264
if (VT.isVector())
57264-
return isOperationLegal(ISD::ABS, OpVT) ? AndOrSETCCFoldKind::ABS
57265-
: AndOrSETCCFoldKind::None;
57265+
return AndOrSETCCFoldKind(AndOrSETCCFoldKind::NotAnd |
57266+
(isOperationLegal(ISD::ABS, OpVT)
57267+
? AndOrSETCCFoldKind::ABS
57268+
: AndOrSETCCFoldKind::None));
57269+
57270+
// Don't use `NotAnd` as even though `not` is generally shorter code size than
57271+
// `add`, `add` can lower to LEA which can save moves / spills. Any case where
57272+
// `NotAnd` applies, `AddAnd` does as well.
57273+
// TODO: Currently we lower (icmp eq/ne (and ~X, Y), 0) -> `test (not X), Y`,
57274+
// if we change that to `andn Y, X` it may be worth prefering `NotAnd` here.
5726657275
return AndOrSETCCFoldKind::AddAnd;
5726757276
}
5726857277

llvm/test/CodeGen/X86/icmp-pow2-diff.ll

Lines changed: 45 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -39,28 +39,24 @@ define <4 x i1> @andnot_eq_v4i32_todo_no_splat(<4 x i32> %x) nounwind {
3939
define <4 x i1> @andnot_eq_v4i32(<4 x i32> %x) nounwind {
4040
; AVX512-LABEL: andnot_eq_v4i32:
4141
; AVX512: # %bb.0:
42-
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
43-
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
44-
; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
45-
; AVX512-NEXT: korw %k1, %k0, %k1
46-
; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
42+
; AVX512-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
43+
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
44+
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
4745
; AVX512-NEXT: retq
4846
;
4947
; AVX2-LABEL: andnot_eq_v4i32:
5048
; AVX2: # %bb.0:
51-
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
52-
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
53-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967287,4294967287,4294967287,4294967287]
54-
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
55-
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
49+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967287,4294967287,4294967287,4294967287]
50+
; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
51+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
52+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5653
; AVX2-NEXT: retq
5754
;
5855
; SSE-LABEL: andnot_eq_v4i32:
5956
; SSE: # %bb.0:
60-
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
61-
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
62-
; SSE-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
63-
; SSE-NEXT: por %xmm1, %xmm0
57+
; SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
58+
; SSE-NEXT: pxor %xmm1, %xmm1
59+
; SSE-NEXT: pcmpeqd %xmm1, %xmm0
6460
; SSE-NEXT: retq
6561
%cmp1 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
6662
%cmp2 = icmp eq <4 x i32> %x, <i32 -9, i32 -9, i32 -9, i32 -9>
@@ -115,41 +111,32 @@ define <2 x i1> @andnot_eq_v2i64_fail_max_not_n1(<2 x i64> %x) nounwind {
115111
define <2 x i1> @andnot_eq_v2i64(<2 x i64> %x) nounwind {
116112
; AVX512-LABEL: andnot_eq_v2i64:
117113
; AVX512: # %bb.0:
118-
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k0
119-
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
120-
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
121-
; AVX512-NEXT: korw %k1, %k0, %k1
122-
; AVX512-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z}
114+
; AVX512-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
115+
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
116+
; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
123117
; AVX512-NEXT: retq
124118
;
125119
; AVX2-LABEL: andnot_eq_v2i64:
126120
; AVX2: # %bb.0:
127-
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
128-
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
129-
; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
130-
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
121+
; AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
122+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
123+
; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
131124
; AVX2-NEXT: retq
132125
;
133126
; SSE41-LABEL: andnot_eq_v2i64:
134127
; SSE41: # %bb.0:
135-
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551611,18446744073709551611]
136-
; SSE41-NEXT: pcmpeqq %xmm0, %xmm1
137-
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
138-
; SSE41-NEXT: pcmpeqq %xmm2, %xmm0
139-
; SSE41-NEXT: por %xmm1, %xmm0
128+
; SSE41-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
129+
; SSE41-NEXT: pxor %xmm1, %xmm1
130+
; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
140131
; SSE41-NEXT: retq
141132
;
142133
; SSE2-LABEL: andnot_eq_v2i64:
143134
; SSE2: # %bb.0:
144-
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551611,18446744073709551611]
145-
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
146-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
147-
; SSE2-NEXT: pand %xmm1, %xmm2
148-
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
135+
; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
136+
; SSE2-NEXT: pxor %xmm1, %xmm1
149137
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
150138
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
151139
; SSE2-NEXT: pand %xmm1, %xmm0
152-
; SSE2-NEXT: por %xmm2, %xmm0
153140
; SSE2-NEXT: retq
154141
%cmp1 = icmp eq <2 x i64> %x, <i64 -5, i64 -5>
155142
%cmp2 = icmp eq <2 x i64> %x, <i64 -1, i64 -1>
@@ -195,30 +182,28 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind {
195182
define <8 x i1> @andnot_ne_v8i16(<8 x i16> %x) nounwind {
196183
; AVX512-LABEL: andnot_ne_v8i16:
197184
; AVX512: # %bb.0:
198-
; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
199-
; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
200-
; AVX512-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
201-
; AVX512-NEXT: vpternlogq $18, %xmm2, %xmm1, %xmm0
185+
; AVX512-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
186+
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
187+
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
188+
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
202189
; AVX512-NEXT: retq
203190
;
204191
; AVX2-LABEL: andnot_ne_v8i16:
205192
; AVX2: # %bb.0:
206-
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
207-
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
193+
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
194+
; AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
195+
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
208196
; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
209-
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
210-
; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
197+
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
211198
; AVX2-NEXT: retq
212199
;
213200
; SSE-LABEL: andnot_ne_v8i16:
214201
; SSE: # %bb.0:
215-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [49151,49151,49151,49151,49151,49151,49151,49151]
216-
; SSE-NEXT: pcmpeqw %xmm0, %xmm1
217-
; SSE-NEXT: pcmpeqd %xmm2, %xmm2
202+
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
203+
; SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
204+
; SSE-NEXT: pxor %xmm2, %xmm2
218205
; SSE-NEXT: pcmpeqw %xmm2, %xmm0
219-
; SSE-NEXT: pxor %xmm2, %xmm0
220-
; SSE-NEXT: pandn %xmm0, %xmm1
221-
; SSE-NEXT: movdqa %xmm1, %xmm0
206+
; SSE-NEXT: pxor %xmm1, %xmm0
222207
; SSE-NEXT: retq
223208
%cmp1 = icmp ne <8 x i16> %x, <i16 -16385, i16 -16385, i16 -16385, i16 -16385, i16 -16385, i16 -16385, i16 -16385, i16 -16385>
224209
%cmp2 = icmp ne <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -264,30 +249,28 @@ define <16 x i1> @andnot_ne_v16i8_fail_max_not_n1(<16 x i8> %x) nounwind {
264249
define <16 x i1> @andnot_ne_v16i8(<16 x i8> %x) nounwind {
265250
; AVX512-LABEL: andnot_ne_v16i8:
266251
; AVX512: # %bb.0:
267-
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
268-
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
269-
; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
270-
; AVX512-NEXT: vpternlogq $18, %xmm1, %xmm2, %xmm0
252+
; AVX512-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
253+
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
254+
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
255+
; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
271256
; AVX512-NEXT: retq
272257
;
273258
; AVX2-LABEL: andnot_ne_v16i8:
274259
; AVX2: # %bb.0:
275260
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
276-
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2
277-
; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
261+
; AVX2-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
262+
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
263+
; AVX2-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
278264
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
279-
; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0
280265
; AVX2-NEXT: retq
281266
;
282267
; SSE-LABEL: andnot_ne_v16i8:
283268
; SSE: # %bb.0:
284-
; SSE-NEXT: pcmpeqd %xmm2, %xmm2
285-
; SSE-NEXT: movdqa %xmm0, %xmm1
286-
; SSE-NEXT: pcmpeqb %xmm2, %xmm1
287-
; SSE-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
288-
; SSE-NEXT: pxor %xmm2, %xmm0
289-
; SSE-NEXT: pandn %xmm0, %xmm1
290-
; SSE-NEXT: movdqa %xmm1, %xmm0
269+
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
270+
; SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
271+
; SSE-NEXT: pxor %xmm2, %xmm2
272+
; SSE-NEXT: pcmpeqb %xmm2, %xmm0
273+
; SSE-NEXT: pxor %xmm1, %xmm0
291274
; SSE-NEXT: retq
292275
%cmp1 = icmp ne <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
293276
%cmp2 = icmp ne <16 x i8> %x, <i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33, i8 -33>

0 commit comments

Comments
 (0)