Skip to content

Commit 29c3a2c

Browse files
committed
[X86] combinePredicateReduction - fold any_of(setcc(x,y,ne)) -> pmovmskb(not(pcmpeqb()))
Improves codegen for v2i64 cases, similar to what we already do for all_of(setcc(x,y,eq))
1 parent 3f42cc1 commit 29c3a2c

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44299,20 +44299,20 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
4429944299
Movmsk = DAG.getBitcast(MovmskVT, Match);
4430044300
} else {
4430144301
// For all_of(setcc(x,y,eq)) - use PMOVMSKB(PCMPEQB()).
44302-
// TODO: any_of(setcc(x,y,ne)) - use PMOVMSKB(NOT(PCMPEQB())).
44302+
// For any_of(setcc(x,y,ne)) - use PMOVMSKB(NOT(PCMPEQB())).
4430344303
if (Match.getOpcode() == ISD::SETCC) {
4430444304
ISD::CondCode CC = cast<CondCodeSDNode>(Match.getOperand(2))->get();
44305-
if (BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) {
44305+
if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) ||
44306+
(BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) {
4430644307
EVT VecVT = Match.getOperand(0).getValueType();
4430744308
EVT VecSVT = VecVT.getScalarType();
4430844309
if (VecSVT != MVT::i8 && (VecSVT.getSizeInBits() % 8) == 0) {
4430944310
NumElts *= VecSVT.getSizeInBits() / 8;
4431044311
EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, NumElts);
4431144312
MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
44312-
Match = DAG.getSetCC(DL, MatchVT,
44313-
DAG.getBitcast(CmpVT, Match.getOperand(0)),
44314-
DAG.getBitcast(CmpVT, Match.getOperand(1)),
44315-
ISD::CondCode::SETEQ);
44313+
Match = DAG.getSetCC(
44314+
DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
44315+
DAG.getBitcast(CmpVT, Match.getOperand(1)), CC);
4431644316
}
4431744317
}
4431844318
}

llvm/test/CodeGen/X86/vector-compare-any_of.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,24 +1050,22 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) {
10501050
define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
10511051
; SSE2-LABEL: bool_reduction_v2i64:
10521052
; SSE2: # %bb.0:
1053-
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1054-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
1055-
; SSE2-NEXT: pand %xmm0, %xmm1
1056-
; SSE2-NEXT: movmskpd %xmm1, %eax
1057-
; SSE2-NEXT: cmpl $3, %eax
1053+
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
1054+
; SSE2-NEXT: pmovmskb %xmm0, %eax
1055+
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
10581056
; SSE2-NEXT: setne %al
10591057
; SSE2-NEXT: retq
10601058
;
10611059
; SSE42-LABEL: bool_reduction_v2i64:
10621060
; SSE42: # %bb.0:
1063-
; SSE42-NEXT: psubq %xmm1, %xmm0
1061+
; SSE42-NEXT: psubb %xmm1, %xmm0
10641062
; SSE42-NEXT: ptest %xmm0, %xmm0
10651063
; SSE42-NEXT: setne %al
10661064
; SSE42-NEXT: retq
10671065
;
10681066
; AVX-LABEL: bool_reduction_v2i64:
10691067
; AVX: # %bb.0:
1070-
; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0
1068+
; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
10711069
; AVX-NEXT: vptest %xmm0, %xmm0
10721070
; AVX-NEXT: setne %al
10731071
; AVX-NEXT: retq

0 commit comments

Comments
 (0)