Skip to content

Commit c83fe87

Browse files
committed
[SwitchLowering] Support merging 0 and power-of-2 case.
1 parent 7462da1 commit c83fe87

File tree

6 files changed

+128
-93
lines changed

6 files changed

+128
-93
lines changed

llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -405,13 +405,13 @@ class IRTranslator : public MachineFunctionPass {
405405
BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I,
406406
MachineBasicBlock *Fallthrough, bool FallthroughUnreachable);
407407

408-
bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond,
409-
MachineBasicBlock *Fallthrough,
410-
bool FallthroughUnreachable,
411-
BranchProbability UnhandledProbs,
412-
MachineBasicBlock *CurMBB,
413-
MachineIRBuilder &MIB,
414-
MachineBasicBlock *SwitchMBB);
408+
bool lowerSwitchAndOrRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond,
409+
MachineBasicBlock *Fallthrough,
410+
bool FallthroughUnreachable,
411+
BranchProbability UnhandledProbs,
412+
MachineBasicBlock *CurMBB,
413+
MachineIRBuilder &MIB,
414+
MachineBasicBlock *SwitchMBB);
415415

416416
bool lowerBitTestWorkItem(
417417
SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,

llvm/include/llvm/CodeGen/SwitchLoweringUtils.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ enum CaseClusterKind {
3535
/// A cluster of cases suitable for jump table lowering.
3636
CC_JumpTable,
3737
/// A cluster of cases suitable for bit test lowering.
38-
CC_BitTests
38+
CC_BitTests,
39+
CC_And
3940
};
4041

4142
/// A cluster of case labels.
@@ -141,6 +142,8 @@ struct CaseBlock {
141142
BranchProbability TrueProb, FalseProb;
142143
bool IsUnpredictable;
143144

145+
bool EmitAnd = false;
146+
144147
// Constructor for SelectionDAG.
145148
CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
146149
const Value *cmpmiddle, MachineBasicBlock *truebb,

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,18 +1059,15 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
10591059
}
10601060
return true;
10611061
}
1062-
bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
1063-
Value *Cond,
1064-
MachineBasicBlock *Fallthrough,
1065-
bool FallthroughUnreachable,
1066-
BranchProbability UnhandledProbs,
1067-
MachineBasicBlock *CurMBB,
1068-
MachineIRBuilder &MIB,
1069-
MachineBasicBlock *SwitchMBB) {
1062+
bool IRTranslator::lowerSwitchAndOrRangeWorkItem(
1063+
SwitchCG::CaseClusterIt I, Value *Cond, MachineBasicBlock *Fallthrough,
1064+
bool FallthroughUnreachable, BranchProbability UnhandledProbs,
1065+
MachineBasicBlock *CurMBB, MachineIRBuilder &MIB,
1066+
MachineBasicBlock *SwitchMBB) {
10701067
using namespace SwitchCG;
10711068
const Value *RHS, *LHS, *MHS;
10721069
CmpInst::Predicate Pred;
1073-
if (I->Low == I->High) {
1070+
if (I->Low == I->High || I->Kind == CC_And) {
10741071
// Check Cond == I->Low.
10751072
Pred = CmpInst::ICMP_EQ;
10761073
LHS = Cond;
@@ -1088,6 +1085,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
10881085
// The false probability is the sum of all unhandled cases.
10891086
CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
10901087
CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
1088+
CB.EmitAnd = I->Kind == CC_And;
10911089

10921090
emitSwitchCase(CB, SwitchMBB, MIB);
10931091
return true;
@@ -1327,10 +1325,11 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
13271325
}
13281326
break;
13291327
}
1328+
case CC_And:
13301329
case CC_Range: {
1331-
if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
1332-
FallthroughUnreachable, UnhandledProbs,
1333-
CurMBB, MIB, SwitchMBB)) {
1330+
if (!lowerSwitchAndOrRangeWorkItem(I, Cond, Fallthrough,
1331+
FallthroughUnreachable, UnhandledProbs,
1332+
CurMBB, MIB, SwitchMBB)) {
13341333
LLVM_DEBUG(dbgs() << "Failed to lower switch range");
13351334
return false;
13361335
}

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2857,7 +2857,17 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
28572857
EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
28582858

28592859
// Build the setcc now.
2860-
if (!CB.CmpMHS) {
2860+
if (CB.EmitAnd) {
2861+
SDLoc dl = getCurSDLoc();
2862+
2863+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2864+
EVT VT = TLI.getValueType(DAG.getDataLayout(), CB.CmpRHS->getType(), true);
2865+
SDValue C = DAG.getConstant(*cast<ConstantInt>(CB.CmpRHS), dl, VT);
2866+
SDValue Zero = DAG.getConstant(0, dl, VT);
2867+
SDValue CondLHS = getValue(CB.CmpLHS);
2868+
SDValue And = DAG.getNode(ISD::AND, dl, C.getValueType(), CondLHS, C);
2869+
Cond = DAG.getSetCC(dl, MVT::i1, And, Zero, ISD::SETEQ);
2870+
} else if (!CB.CmpMHS) {
28612871
// Fold "(X == true)" to X and "(X == false)" to !X to
28622872
// handle common cases produced by branch lowering.
28632873
if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
@@ -12250,10 +12260,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
1225012260
}
1225112261
break;
1225212262
}
12263+
case CC_And:
1225312264
case CC_Range: {
1225412265
const Value *RHS, *LHS, *MHS;
1225512266
ISD::CondCode CC;
12256-
if (I->Low == I->High) {
12267+
if (I->Low == I->High || I->Kind == CC_And) {
1225712268
// Check Cond == I->Low.
1225812269
CC = ISD::SETEQ;
1225912270
LHS = Cond;
@@ -12275,6 +12286,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
1227512286
CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
1227612287
getCurSDLoc(), I->Prob, UnhandledProbs);
1227712288

12289+
CB.EmitAnd = I->Kind == CC_And;
1227812290
if (CurMBB == SwitchMBB)
1227912291
visitSwitchCase(CB, SwitchMBB);
1228012292
else

llvm/lib/CodeGen/SwitchLoweringUtils.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,41 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters,
362362
}
363363
}
364364
Clusters.resize(DstIndex);
365+
366+
// Check if the clusters contain one checking for 0 and another one checking
367+
// for a power-of-2 constant with matching destinations. Those clusters can be
368+
// combined to a single ane with CC_And.
369+
unsigned ZeroIdx = -1;
370+
for (const auto &[Idx, C] : enumerate(Clusters)) {
371+
if (C.Kind != CC_Range || C.Low != C.High)
372+
continue;
373+
if (C.Low->isZero()) {
374+
ZeroIdx = Idx;
375+
break;
376+
}
377+
}
378+
if (ZeroIdx == -1u)
379+
return;
380+
381+
unsigned Pow2Idx = -1;
382+
for (const auto &[Idx, C] : enumerate(Clusters)) {
383+
if (C.Kind != CC_Range || C.Low != C.High || C.MBB != Clusters[ZeroIdx].MBB)
384+
continue;
385+
if (C.Low->getValue().isPowerOf2()) {
386+
Pow2Idx = Idx;
387+
break;
388+
}
389+
}
390+
if (Pow2Idx == -1u)
391+
return;
392+
393+
APInt Pow2 = Clusters[Pow2Idx].Low->getValue();
394+
APInt NewC = (Pow2 + 1) * -1;
395+
Clusters[ZeroIdx].Low = ConstantInt::get(SI->getContext(), NewC);
396+
Clusters[ZeroIdx].High = ConstantInt::get(SI->getContext(), NewC);
397+
Clusters[ZeroIdx].Kind = CC_And;
398+
Clusters[ZeroIdx].Prob += Clusters[Pow2Idx].Prob;
399+
Clusters.erase(Clusters.begin() + Pow2Idx);
365400
}
366401

367402
bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,

llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll

Lines changed: 57 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,25 @@
44
define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) {
55
; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases:
66
; CHECK: ; %bb.0: ; %entry
7-
; CHECK-NEXT: mov w9, #100 ; =0x64
8-
; CHECK-NEXT: mov w8, #20 ; =0x14
7+
; CHECK-NEXT: mov w8, #100 ; =0x64
8+
; CHECK-NEXT: mov w9, #223 ; =0xdf
99
; CHECK-NEXT: LBB0_1: ; %loop.header
1010
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
11-
; CHECK-NEXT: ands w10, w0, #0xff
12-
; CHECK-NEXT: b.eq LBB0_6
11+
; CHECK-NEXT: tst w0, w9
12+
; CHECK-NEXT: b.eq LBB0_4
1313
; CHECK-NEXT: ; %bb.2: ; %loop.header
1414
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
15-
; CHECK-NEXT: cmp w10, #32
16-
; CHECK-NEXT: b.eq LBB0_6
17-
; CHECK-NEXT: ; %bb.3: ; %loop.header
18-
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
15+
; CHECK-NEXT: and w10, w0, #0xff
1916
; CHECK-NEXT: cmp w10, #124
20-
; CHECK-NEXT: b.eq LBB0_7
21-
; CHECK-NEXT: ; %bb.4: ; %loop.latch
17+
; CHECK-NEXT: b.eq LBB0_5
18+
; CHECK-NEXT: ; %bb.3: ; %loop.latch
2219
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
23-
; CHECK-NEXT: subs w9, w9, #1
20+
; CHECK-NEXT: subs w8, w8, #1
2421
; CHECK-NEXT: b.ne LBB0_1
25-
; CHECK-NEXT: ; %bb.5:
26-
; CHECK-NEXT: mov w8, #20 ; =0x14
27-
; CHECK-NEXT: LBB0_6: ; %common.ret
28-
; CHECK-NEXT: mov w0, w8
22+
; CHECK-NEXT: LBB0_4:
23+
; CHECK-NEXT: mov w0, #20 ; =0x14
2924
; CHECK-NEXT: ret
30-
; CHECK-NEXT: LBB0_7: ; %e2
25+
; CHECK-NEXT: LBB0_5: ; %e2
3126
; CHECK-NEXT: mov w0, #30 ; =0x1e
3227
; CHECK-NEXT: ret
3328
entry:
@@ -56,30 +51,28 @@ e2:
5651
define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(i8 %v) {
5752
; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_swapped:
5853
; CHECK: ; %bb.0: ; %entry
59-
; CHECK-NEXT: mov w9, #100 ; =0x64
60-
; CHECK-NEXT: mov w8, #20 ; =0x14
54+
; CHECK-NEXT: mov w8, #100 ; =0x64
55+
; CHECK-NEXT: mov w9, #223 ; =0xdf
6156
; CHECK-NEXT: LBB1_1: ; %loop.header
6257
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
63-
; CHECK-NEXT: ands w10, w0, #0xff
64-
; CHECK-NEXT: b.eq LBB1_6
58+
; CHECK-NEXT: tst w0, w9
59+
; CHECK-NEXT: b.eq LBB1_5
6560
; CHECK-NEXT: ; %bb.2: ; %loop.header
6661
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
67-
; CHECK-NEXT: cmp w10, #32
68-
; CHECK-NEXT: b.eq LBB1_6
69-
; CHECK-NEXT: ; %bb.3: ; %loop.header
70-
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
62+
; CHECK-NEXT: and w10, w0, #0xff
7163
; CHECK-NEXT: cmp w10, #124
72-
; CHECK-NEXT: b.eq LBB1_7
73-
; CHECK-NEXT: ; %bb.4: ; %loop.latch
64+
; CHECK-NEXT: b.eq LBB1_6
65+
; CHECK-NEXT: ; %bb.3: ; %loop.latch
7466
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
75-
; CHECK-NEXT: subs w9, w9, #1
67+
; CHECK-NEXT: subs w8, w8, #1
7668
; CHECK-NEXT: b.ne LBB1_1
77-
; CHECK-NEXT: ; %bb.5:
78-
; CHECK-NEXT: mov w8, #10 ; =0xa
79-
; CHECK-NEXT: LBB1_6: ; %common.ret
80-
; CHECK-NEXT: mov w0, w8
69+
; CHECK-NEXT: ; %bb.4:
70+
; CHECK-NEXT: mov w0, #10 ; =0xa
8171
; CHECK-NEXT: ret
82-
; CHECK-NEXT: LBB1_7: ; %e2
72+
; CHECK-NEXT: LBB1_5:
73+
; CHECK-NEXT: mov w0, #20 ; =0x14
74+
; CHECK-NEXT: ret
75+
; CHECK-NEXT: LBB1_6: ; %e2
8376
; CHECK-NEXT: mov w0, #30 ; =0x1e
8477
; CHECK-NEXT: ret
8578
entry:
@@ -111,35 +104,33 @@ e2:
111104
define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(i8 %v, i1 %c) {
112105
; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_with_phi:
113106
; CHECK: ; %bb.0: ; %entry
114-
; CHECK-NEXT: tbz w1, #0, LBB2_8
107+
; CHECK-NEXT: tbz w1, #0, LBB2_6
115108
; CHECK-NEXT: ; %bb.1: ; %loop.header.preheader
116-
; CHECK-NEXT: mov w9, #100 ; =0x64
117-
; CHECK-NEXT: mov w8, #20 ; =0x14
109+
; CHECK-NEXT: mov w8, #100 ; =0x64
110+
; CHECK-NEXT: mov w9, #223 ; =0xdf
118111
; CHECK-NEXT: LBB2_2: ; %loop.header
119112
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
120-
; CHECK-NEXT: ands w10, w0, #0xff
113+
; CHECK-NEXT: tst w0, w9
121114
; CHECK-NEXT: b.eq LBB2_7
122115
; CHECK-NEXT: ; %bb.3: ; %loop.header
123116
; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1
124-
; CHECK-NEXT: cmp w10, #32
125-
; CHECK-NEXT: b.eq LBB2_7
126-
; CHECK-NEXT: ; %bb.4: ; %loop.header
127-
; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1
117+
; CHECK-NEXT: and w10, w0, #0xff
128118
; CHECK-NEXT: cmp w10, #124
129-
; CHECK-NEXT: b.eq LBB2_9
130-
; CHECK-NEXT: ; %bb.5: ; %loop.latch
119+
; CHECK-NEXT: b.eq LBB2_8
120+
; CHECK-NEXT: ; %bb.4: ; %loop.latch
131121
; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1
132-
; CHECK-NEXT: subs w9, w9, #1
122+
; CHECK-NEXT: subs w8, w8, #1
133123
; CHECK-NEXT: b.ne LBB2_2
134-
; CHECK-NEXT: ; %bb.6:
135-
; CHECK-NEXT: mov w8, #10 ; =0xa
136-
; CHECK-NEXT: LBB2_7: ; %common.ret
137-
; CHECK-NEXT: mov w0, w8
124+
; CHECK-NEXT: ; %bb.5:
125+
; CHECK-NEXT: mov w0, #10 ; =0xa
138126
; CHECK-NEXT: ret
139-
; CHECK-NEXT: LBB2_8:
127+
; CHECK-NEXT: LBB2_6:
140128
; CHECK-NEXT: mov w0, wzr
141129
; CHECK-NEXT: ret
142-
; CHECK-NEXT: LBB2_9: ; %e2
130+
; CHECK-NEXT: LBB2_7:
131+
; CHECK-NEXT: mov w0, #20 ; =0x14
132+
; CHECK-NEXT: ret
133+
; CHECK-NEXT: LBB2_8: ; %e2
143134
; CHECK-NEXT: mov w0, #30 ; =0x1e
144135
; CHECK-NEXT: ret
145136
entry:
@@ -240,21 +231,18 @@ define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(ptr %start) {
240231
; CHECK-NEXT: LBB4_1: ; %loop
241232
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
242233
; CHECK-NEXT: ldrb w9, [x8], #1
243-
; CHECK-NEXT: cbz w9, LBB4_4
234+
; CHECK-NEXT: tst w9, #0xffffffdf
235+
; CHECK-NEXT: b.eq LBB4_4
244236
; CHECK-NEXT: ; %bb.2: ; %loop
245237
; CHECK-NEXT: ; in Loop: Header=BB4_1 Depth=1
246238
; CHECK-NEXT: cmp w9, #124
247-
; CHECK-NEXT: b.eq LBB4_5
248-
; CHECK-NEXT: ; %bb.3: ; %loop
249-
; CHECK-NEXT: ; in Loop: Header=BB4_1 Depth=1
250-
; CHECK-NEXT: cmp w9, #32
251239
; CHECK-NEXT: b.ne LBB4_1
240+
; CHECK-NEXT: ; %bb.3: ; %e2.loopexit
241+
; CHECK-NEXT: mov w0, wzr
242+
; CHECK-NEXT: ret
252243
; CHECK-NEXT: LBB4_4: ; %e1
253244
; CHECK-NEXT: mov w0, #-1 ; =0xffffffff
254245
; CHECK-NEXT: ret
255-
; CHECK-NEXT: LBB4_5: ; %e2.loopexit
256-
; CHECK-NEXT: mov w0, wzr
257-
; CHECK-NEXT: ret
258246
entry:
259247
br label %loop
260248

@@ -376,8 +364,7 @@ define void @test_successor_with_loop_phi(ptr %A, ptr %B) {
376364
; CHECK-NEXT: ldr w8, [x0]
377365
; CHECK-NEXT: str wzr, [x0]
378366
; CHECK-NEXT: mov x0, x1
379-
; CHECK-NEXT: orr w8, w8, #0x4
380-
; CHECK-NEXT: cmp w8, #4
367+
; CHECK-NEXT: tst w8, #0xfffffffb
381368
; CHECK-NEXT: b.eq LBB7_1
382369
; CHECK-NEXT: ; %bb.2: ; %exit
383370
; CHECK-NEXT: ret
@@ -556,22 +543,21 @@ e1:
556543
define void @merge_with_stores(ptr %A, i16 %v) {
557544
; CHECK-LABEL: merge_with_stores:
558545
; CHECK: ; %bb.0: ; %entry
559-
; CHECK-NEXT: and w8, w1, #0xffff
560-
; CHECK-NEXT: sub w9, w8, #10
561-
; CHECK-NEXT: cmp w9, #2
562-
; CHECK-NEXT: b.lo LBB11_4
546+
; CHECK-NEXT: mov w8, #65533 ; =0xfffd
547+
; CHECK-NEXT: tst w1, w8
548+
; CHECK-NEXT: b.eq LBB11_3
563549
; CHECK-NEXT: ; %bb.1: ; %entry
564-
; CHECK-NEXT: cbz w8, LBB11_5
565-
; CHECK-NEXT: ; %bb.2: ; %entry
550+
; CHECK-NEXT: and w8, w1, #0xffff
551+
; CHECK-NEXT: sub w8, w8, #10
566552
; CHECK-NEXT: cmp w8, #2
567-
; CHECK-NEXT: b.eq LBB11_5
568-
; CHECK-NEXT: ; %bb.3: ; %default.dst
569-
; CHECK-NEXT: strh wzr, [x0]
570-
; CHECK-NEXT: ret
571-
; CHECK-NEXT: LBB11_4: ; %other.dst
553+
; CHECK-NEXT: b.hs LBB11_4
554+
; CHECK-NEXT: ; %bb.2: ; %other.dst
572555
; CHECK-NEXT: mov w8, #1 ; =0x1
573556
; CHECK-NEXT: strh w8, [x0, #36]
574-
; CHECK-NEXT: LBB11_5: ; %pow2.dst
557+
; CHECK-NEXT: LBB11_3: ; %pow2.dst
558+
; CHECK-NEXT: ret
559+
; CHECK-NEXT: LBB11_4: ; %default.dst
560+
; CHECK-NEXT: strh wzr, [x0]
575561
; CHECK-NEXT: ret
576562
entry:
577563
switch i16 %v, label %default.dst [

0 commit comments

Comments
 (0)