Skip to content

Commit d1baed7

Browse files
committed
[DAG] select Cond, -1, C --> or (sext Cond), C if Cond is MVT::i1
This seems to be beneficial overall, except for midpoint-int.ll . The X86 backend seems to generate zeroing that are not necesary. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D131260
1 parent 823ce6a commit d1baed7

14 files changed

+389
-331
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10238,6 +10238,25 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
1023810238
return SDValue();
1023910239
}
1024010240

10241+
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
10242+
const TargetLowering &TLI) {
10243+
if (!TLI.convertSelectOfConstantsToMath(VT))
10244+
return false;
10245+
10246+
if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
10247+
return true;
10248+
if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
10249+
return true;
10250+
10251+
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10252+
if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
10253+
return true;
10254+
if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
10255+
return true;
10256+
10257+
return false;
10258+
}
10259+
1024110260
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
1024210261
SDValue Cond = N->getOperand(0);
1024310262
SDValue N1 = N->getOperand(1);
@@ -10288,9 +10307,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
1028810307

1028910308
// Use a target hook because some targets may prefer to transform in the
1029010309
// other direction.
10291-
if (TLI.convertSelectOfConstantsToMath(VT)) {
10292-
// For any constants that differ by 1, we can transform the select into an
10293-
// extend and add.
10310+
if (shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) {
10311+
// For any constants that differ by 1, we can transform the select into
10312+
// an extend and add.
1029410313
const APInt &C1Val = C1->getAPIntValue();
1029510314
const APInt &C2Val = C2->getAPIntValue();
1029610315
if (C1Val - 1 == C2Val) {
@@ -10299,6 +10318,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
1029910318
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
1030010319
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
1030110320
}
10321+
1030210322
if (C1Val + 1 == C2Val) {
1030310323
// select Cond, C1, C1+1 --> add (sext Cond), C1+1
1030410324
if (VT != MVT::i1)
@@ -10315,6 +10335,12 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
1031510335
return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
1031610336
}
1031710337

10338+
// select Cond, -1, C --> or (sext Cond), C
10339+
if (C1->isAllOnes()) {
10340+
Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
10341+
return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
10342+
}
10343+
1031810344
if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
1031910345
return V;
1032010346
}
@@ -10451,10 +10477,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
1045110477
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
1045210478
return V;
1045310479

10454-
if (SDValue V = foldSelectOfConstants(N))
10480+
if (SDValue V = foldBoolSelectToLogic(N, DAG))
1045510481
return V;
1045610482

10457-
if (SDValue V = foldBoolSelectToLogic(N, DAG))
10483+
// select (not Cond), N1, N2 -> select Cond, N2, N1
10484+
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
10485+
SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
10486+
SelectOp->setFlags(Flags);
10487+
return SelectOp;
10488+
}
10489+
10490+
if (SDValue V = foldSelectOfConstants(N))
1045810491
return V;
1045910492

1046010493
// If we can fold this based on the true/false value, do so.
@@ -10539,13 +10572,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
1053910572
}
1054010573
}
1054110574

10542-
// select (not Cond), N1, N2 -> select Cond, N2, N1
10543-
if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
10544-
SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
10545-
SelectOp->setFlags(Flags);
10546-
return SelectOp;
10547-
}
10548-
1054910575
// Fold selects based on a setcc into other things, such as min/max/abs.
1055010576
if (N0.getOpcode() == ISD::SETCC) {
1055110577
SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
@@ -10955,7 +10981,7 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
1095510981
SDValue N2 = N->getOperand(2);
1095610982
EVT VT = N->getValueType(0);
1095710983
if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10958-
!TLI.convertSelectOfConstantsToMath(VT) ||
10984+
!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) ||
1095910985
!ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
1096010986
!ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
1096110987
return SDValue();
@@ -11282,6 +11308,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
1128211308
if (N2 == N3)
1128311309
return N2;
1128411310

11311+
// select_cc bool, 0, x, y, seteq -> select bool, y, x
11312+
if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
11313+
isNullConstant(N1))
11314+
return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
11315+
1128511316
// Determine if the condition we're dealing with is constant
1128611317
if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
1128711318
CC, SDLoc(N), false)) {
@@ -12125,7 +12156,7 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
1212512156
if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
1212612157
return SCC;
1212712158

12128-
if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
12159+
if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
1212912160
EVT SetCCVT = getSetCCResultType(N00VT);
1213012161
// Don't do this transform for i1 because there's a select transform
1213112162
// that would reverse it.

llvm/test/CodeGen/PowerPC/crbits.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -322,15 +322,15 @@ define signext i32 @exttest7(i32 signext %a) #0 {
322322
; CHECK-LABEL: exttest7:
323323
; CHECK: # %bb.0: # %entry
324324
; CHECK-NEXT: li 4, 8
325-
; CHECK-NEXT: cmpwi 3, 5
325+
; CHECK-NEXT: cmplwi 3, 5
326326
; CHECK-NEXT: li 3, 7
327327
; CHECK-NEXT: iseleq 3, 3, 4
328328
; CHECK-NEXT: blr
329329
;
330330
; CHECK-NO-ISEL-LABEL: exttest7:
331331
; CHECK-NO-ISEL: # %bb.0: # %entry
332332
; CHECK-NO-ISEL-NEXT: li 4, 8
333-
; CHECK-NO-ISEL-NEXT: cmpwi 3, 5
333+
; CHECK-NO-ISEL-NEXT: cmplwi 3, 5
334334
; CHECK-NO-ISEL-NEXT: li 3, 7
335335
; CHECK-NO-ISEL-NEXT: bclr 12, 2, 0
336336
; CHECK-NO-ISEL-NEXT: # %bb.1: # %entry
@@ -339,7 +339,7 @@ define signext i32 @exttest7(i32 signext %a) #0 {
339339
;
340340
; CHECK-P10-LABEL: exttest7:
341341
; CHECK-P10: # %bb.0: # %entry
342-
; CHECK-P10-NEXT: cmpwi r3, 5
342+
; CHECK-P10-NEXT: cmplwi r3, 5
343343
; CHECK-P10-NEXT: li r3, 8
344344
; CHECK-P10-NEXT: li r4, 7
345345
; CHECK-P10-NEXT: iseleq r3, r4, r3

llvm/test/CodeGen/PowerPC/prefer-dqform.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,12 @@ define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4)
1414
; CHECK-P9-LABEL: test:
1515
; CHECK-P9: # %bb.0: # %test_entry
1616
; CHECK-P9-NEXT: andi. r3, r6, 15
17+
; CHECK-P9-NEXT: li r3, 2
18+
; CHECK-P9-NEXT: li r10, 1
1719
; CHECK-P9-NEXT: lwz r4, 0(r4)
1820
; CHECK-P9-NEXT: lwz r5, 0(r5)
19-
; CHECK-P9-NEXT: li r11, 1
20-
; CHECK-P9-NEXT: addic r3, r3, -1
21-
; CHECK-P9-NEXT: subfe r10, r3, r3
22-
; CHECK-P9-NEXT: li r3, 2
23-
; CHECK-P9-NEXT: not r10, r10
24-
; CHECK-P9-NEXT: iseleq r3, r11, r3
21+
; CHECK-P9-NEXT: iseleq r3, r10, r3
22+
; CHECK-P9-NEXT: subfic r10, r3, 1
2523
; CHECK-P9-NEXT: add r4, r10, r4
2624
; CHECK-P9-NEXT: srawi r4, r4, 4
2725
; CHECK-P9-NEXT: addze r4, r4
@@ -67,14 +65,13 @@ define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4)
6765
;
6866
; CHECK-P10-LABEL: test:
6967
; CHECK-P10: # %bb.0: # %test_entry
70-
; CHECK-P10-NEXT: lwz r4, 0(r4)
7168
; CHECK-P10-NEXT: andi. r3, r6, 15
7269
; CHECK-P10-NEXT: li r3, 2
7370
; CHECK-P10-NEXT: li r10, 1
71+
; CHECK-P10-NEXT: lwz r4, 0(r4)
7472
; CHECK-P10-NEXT: lwz r5, 0(r5)
7573
; CHECK-P10-NEXT: iseleq r3, r10, r3
76-
; CHECK-P10-NEXT: setnbc r10, eq
77-
; CHECK-P10-NEXT: not r10, r10
74+
; CHECK-P10-NEXT: subfic r10, r3, 1
7875
; CHECK-P10-NEXT: add r4, r10, r4
7976
; CHECK-P10-NEXT: srawi r4, r4, 4
8077
; CHECK-P10-NEXT: addze r4, r4

llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,10 @@ define i32 @length3(ptr %X, ptr %Y) nounwind {
160160
; X86-NEXT: popl %esi
161161
; X86-NEXT: retl
162162
; X86-NEXT: .LBB9_3: # %res_block
163-
; X86-NEXT: setae %al
164-
; X86-NEXT: movzbl %al, %eax
165-
; X86-NEXT: leal -1(%eax,%eax), %eax
163+
; X86-NEXT: xorl %eax, %eax
164+
; X86-NEXT: cmpw %si, %dx
165+
; X86-NEXT: sbbl %eax, %eax
166+
; X86-NEXT: orl $1, %eax
166167
; X86-NEXT: popl %esi
167168
; X86-NEXT: retl
168169
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 3) nounwind
@@ -292,9 +293,10 @@ define i32 @length5(ptr %X, ptr %Y) nounwind {
292293
; X86-NEXT: popl %esi
293294
; X86-NEXT: retl
294295
; X86-NEXT: .LBB16_3: # %res_block
295-
; X86-NEXT: setae %al
296-
; X86-NEXT: movzbl %al, %eax
297-
; X86-NEXT: leal -1(%eax,%eax), %eax
296+
; X86-NEXT: xorl %eax, %eax
297+
; X86-NEXT: cmpl %esi, %edx
298+
; X86-NEXT: sbbl %eax, %eax
299+
; X86-NEXT: orl $1, %eax
298300
; X86-NEXT: popl %esi
299301
; X86-NEXT: retl
300302
%m = tail call i32 @memcmp(ptr %X, ptr %Y, i32 5) nounwind
@@ -337,9 +339,10 @@ define i1 @length5_lt(ptr %X, ptr %Y) nounwind {
337339
; X86-NEXT: subl %ecx, %eax
338340
; X86-NEXT: jmp .LBB18_2
339341
; X86-NEXT: .LBB18_3: # %res_block
340-
; X86-NEXT: setae %al
341-
; X86-NEXT: movzbl %al, %eax
342-
; X86-NEXT: leal -1(%eax,%eax), %eax
342+
; X86-NEXT: xorl %eax, %eax
343+
; X86-NEXT: cmpl %esi, %edx
344+
; X86-NEXT: sbbl %eax, %eax
345+
; X86-NEXT: orl $1, %eax
343346
; X86-NEXT: .LBB18_2: # %endblock
344347
; X86-NEXT: shrl $31, %eax
345348
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -373,8 +376,8 @@ define i32 @length7(ptr %X, ptr %Y) nounwind {
373376
; X86-NEXT: .LBB19_2: # %res_block
374377
; X86-NEXT: xorl %eax, %eax
375378
; X86-NEXT: cmpl %edx, %ecx
376-
; X86-NEXT: setae %al
377-
; X86-NEXT: leal -1(%eax,%eax), %eax
379+
; X86-NEXT: sbbl %eax, %eax
380+
; X86-NEXT: orl $1, %eax
378381
; X86-NEXT: .LBB19_3: # %endblock
379382
; X86-NEXT: popl %esi
380383
; X86-NEXT: retl
@@ -422,8 +425,8 @@ define i1 @length7_lt(ptr %X, ptr %Y) nounwind {
422425
; X86-NEXT: .LBB21_2: # %res_block
423426
; X86-NEXT: xorl %eax, %eax
424427
; X86-NEXT: cmpl %edx, %ecx
425-
; X86-NEXT: setae %al
426-
; X86-NEXT: leal -1(%eax,%eax), %eax
428+
; X86-NEXT: sbbl %eax, %eax
429+
; X86-NEXT: orl $1, %eax
427430
; X86-NEXT: .LBB21_3: # %endblock
428431
; X86-NEXT: shrl $31, %eax
429432
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -457,8 +460,8 @@ define i32 @length8(ptr %X, ptr %Y) nounwind {
457460
; X86-NEXT: .LBB22_2: # %res_block
458461
; X86-NEXT: xorl %eax, %eax
459462
; X86-NEXT: cmpl %edx, %ecx
460-
; X86-NEXT: setae %al
461-
; X86-NEXT: leal -1(%eax,%eax), %eax
463+
; X86-NEXT: sbbl %eax, %eax
464+
; X86-NEXT: orl $1, %eax
462465
; X86-NEXT: .LBB22_3: # %endblock
463466
; X86-NEXT: popl %esi
464467
; X86-NEXT: retl
@@ -619,8 +622,8 @@ define i32 @length12(ptr %X, ptr %Y) nounwind {
619622
; X86-NEXT: .LBB29_3: # %res_block
620623
; X86-NEXT: xorl %eax, %eax
621624
; X86-NEXT: cmpl %edx, %ecx
622-
; X86-NEXT: setae %al
623-
; X86-NEXT: leal -1(%eax,%eax), %eax
625+
; X86-NEXT: sbbl %eax, %eax
626+
; X86-NEXT: orl $1, %eax
624627
; X86-NEXT: .LBB29_4: # %endblock
625628
; X86-NEXT: popl %esi
626629
; X86-NEXT: retl
@@ -744,8 +747,8 @@ define i32 @length16(ptr %X, ptr %Y) nounwind {
744747
; X86-NEXT: .LBB33_4: # %res_block
745748
; X86-NEXT: xorl %eax, %eax
746749
; X86-NEXT: cmpl %edx, %ecx
747-
; X86-NEXT: setae %al
748-
; X86-NEXT: leal -1(%eax,%eax), %eax
750+
; X86-NEXT: sbbl %eax, %eax
751+
; X86-NEXT: orl $1, %eax
749752
; X86-NEXT: .LBB33_5: # %endblock
750753
; X86-NEXT: popl %esi
751754
; X86-NEXT: retl
@@ -858,8 +861,8 @@ define i1 @length16_lt(ptr %x, ptr %y) nounwind {
858861
; X86-NEXT: .LBB35_4: # %res_block
859862
; X86-NEXT: xorl %eax, %eax
860863
; X86-NEXT: cmpl %edx, %ecx
861-
; X86-NEXT: setae %al
862-
; X86-NEXT: leal -1(%eax,%eax), %eax
864+
; X86-NEXT: sbbl %eax, %eax
865+
; X86-NEXT: orl $1, %eax
863866
; X86-NEXT: .LBB35_5: # %endblock
864867
; X86-NEXT: shrl $31, %eax
865868
; X86-NEXT: # kill: def $al killed $al killed $eax
@@ -907,8 +910,8 @@ define i1 @length16_gt(ptr %x, ptr %y) nounwind {
907910
; X86-NEXT: .LBB36_4: # %res_block
908911
; X86-NEXT: xorl %edx, %edx
909912
; X86-NEXT: cmpl %ecx, %eax
910-
; X86-NEXT: setae %dl
911-
; X86-NEXT: leal -1(%edx,%edx), %edx
913+
; X86-NEXT: sbbl %edx, %edx
914+
; X86-NEXT: orl $1, %edx
912915
; X86-NEXT: .LBB36_5: # %endblock
913916
; X86-NEXT: testl %edx, %edx
914917
; X86-NEXT: setg %al

0 commit comments

Comments
 (0)