Skip to content

Commit dba29f7

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Fold G_AND into G_BRCOND
When the G_BRCOND is fed by a eq or ne G_ICMP, it may be possible to fold a G_AND into the branch by producing a tbnz/tbz instead. This happens when 1. We have a ne/eq G_ICMP feeding into the G_BRCOND 2. The G_ICMP is a comparison against 0 3. One of the operands of the G_AND is a power of 2 constant This is very similar to the code in AArch64TargetLowering::LowerBR_CC. Add opt-and-tbnz-tbz to test this. Differential Revision: https://reviews.llvm.org/D73573
1 parent 4e799ad commit dba29f7

File tree

2 files changed

+330
-3
lines changed

2 files changed

+330
-3
lines changed

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ class AArch64InstructionSelector : public InstructionSelector {
8989
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
9090
MachineRegisterInfo &MRI) const;
9191

92+
bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
93+
int64_t CmpConstant,
94+
const CmpInst::Predicate &Pred,
95+
MachineBasicBlock *DstMBB,
96+
MachineIRBuilder &MIB) const;
9297
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
9398
MachineRegisterInfo &MRI) const;
9499

@@ -983,6 +988,64 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
983988
}
984989
}
985990

991+
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
992+
MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
993+
MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
994+
// Given something like this:
995+
//
996+
// %x = ...Something...
997+
// %one = G_CONSTANT i64 1
998+
// %zero = G_CONSTANT i64 0
999+
// %and = G_AND %x, %one
1000+
// %cmp = G_ICMP intpred(ne), %and, %zero
1001+
// %cmp_trunc = G_TRUNC %cmp
1002+
// G_BRCOND %cmp_trunc, %bb.3
1003+
//
1004+
// We want to try and fold the AND into the G_BRCOND and produce either a
1005+
// TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1006+
//
1007+
// In this case, we'd get
1008+
//
1009+
// TBNZ %x %bb.3
1010+
//
1011+
if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
1012+
return false;
1013+
1014+
// Need to be comparing against 0 to fold.
1015+
if (CmpConstant != 0)
1016+
return false;
1017+
1018+
MachineRegisterInfo &MRI = *MIB.getMRI();
1019+
unsigned Opc = 0;
1020+
Register TestReg = AndInst->getOperand(1).getReg();
1021+
unsigned TestSize = MRI.getType(TestReg).getSizeInBits();
1022+
1023+
// Only support EQ and NE. If we have LT, then it *is* possible to fold, but
1024+
// we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
1025+
// so folding would be redundant.
1026+
if (Pred == CmpInst::Predicate::ICMP_EQ)
1027+
Opc = TestSize == 32 ? AArch64::TBZW : AArch64::TBZX;
1028+
else if (Pred == CmpInst::Predicate::ICMP_NE)
1029+
Opc = TestSize == 32 ? AArch64::TBNZW : AArch64::TBNZX;
1030+
else
1031+
return false;
1032+
1033+
// Check if the AND has a constant on its RHS which we can use as a mask.
1034+
// If it's a power of 2, then it's the same as checking a specific bit.
1035+
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1036+
auto MaybeBit =
1037+
getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
1038+
if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1039+
return false;
1040+
uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1041+
1042+
// Construct the branch.
1043+
auto BranchMI =
1044+
MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1045+
constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1046+
return true;
1047+
}
1048+
9861049
bool AArch64InstructionSelector::selectCompareBranch(
9871050
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
9881051

@@ -1000,9 +1063,9 @@ bool AArch64InstructionSelector::selectCompareBranch(
10001063
if (!VRegAndVal)
10011064
std::swap(RHS, LHS);
10021065

1066+
MachineIRBuilder MIB(I);
10031067
VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
10041068
if (!VRegAndVal || VRegAndVal->Value != 0) {
1005-
MachineIRBuilder MIB(I);
10061069
// If we can't select a CBZ then emit a cmp + Bcc.
10071070
if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
10081071
CCMI->getOperand(1), MIB))
@@ -1014,11 +1077,18 @@ bool AArch64InstructionSelector::selectCompareBranch(
10141077
return true;
10151078
}
10161079

1080+
// Try to fold things into the branch.
1081+
const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
1082+
MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
1083+
if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
1084+
MIB)) {
1085+
I.eraseFromParent();
1086+
return true;
1087+
}
1088+
10171089
const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
10181090
if (RB.getID() != AArch64::GPRRegBankID)
10191091
return false;
1020-
1021-
const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
10221092
if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
10231093
return false;
10241094

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
#
4+
# Verify that we can fold G_AND into G_BRCOND when all of the following hold:
5+
# 1. We have a ne/eq G_ICMP feeding into the G_BRCOND
6+
# 2. The G_ICMP is being compared against 0
7+
# 3. One of the operands of the G_AND is a power of 2
8+
#
9+
# If all of these hold, we should produce a tbnz or a tbz.
10+
...
11+
---
12+
name: tbnz_and_s64
13+
alignment: 4
14+
legalized: true
15+
regBankSelected: true
16+
body: |
17+
; CHECK-LABEL: name: tbnz_and_s64
18+
; CHECK: bb.0:
19+
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
20+
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
21+
; CHECK: TBNZX [[COPY]], 3, %bb.1
22+
; CHECK: B %bb.0
23+
; CHECK: bb.1:
24+
; CHECK: RET_ReallyLR
25+
bb.0:
26+
successors: %bb.0, %bb.1
27+
liveins: $x0
28+
%0:gpr(s64) = COPY $x0
29+
%1:gpr(s64) = G_CONSTANT i64 8 ; Power of 2 => TBNZ uses 3 as mask
30+
%3:gpr(s64) = G_CONSTANT i64 0
31+
%2:gpr(s64) = G_AND %0, %1
32+
%5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
33+
%4:gpr(s1) = G_TRUNC %5(s32)
34+
G_BRCOND %4(s1), %bb.1
35+
G_BR %bb.0
36+
bb.1:
37+
RET_ReallyLR
38+
39+
...
40+
---
41+
name: tbz_and_s64
42+
alignment: 4
43+
legalized: true
44+
regBankSelected: true
45+
tracksRegLiveness: true
46+
body: |
47+
; CHECK-LABEL: name: tbz_and_s64
48+
; CHECK: bb.0:
49+
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
50+
; CHECK: liveins: $x0
51+
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
52+
; CHECK: TBZX [[COPY]], 4, %bb.1
53+
; CHECK: B %bb.0
54+
; CHECK: bb.1:
55+
; CHECK: RET_ReallyLR
56+
bb.0:
57+
successors: %bb.0, %bb.1
58+
liveins: $x0
59+
%0:gpr(s64) = COPY $x0
60+
%1:gpr(s64) = G_CONSTANT i64 16 ; Power of 2 => TBNZ uses 4 as mask
61+
%3:gpr(s64) = G_CONSTANT i64 0
62+
%2:gpr(s64) = G_AND %0, %1
63+
%5:gpr(s32) = G_ICMP intpred(eq), %2(s64), %3
64+
%4:gpr(s1) = G_TRUNC %5(s32)
65+
G_BRCOND %4(s1), %bb.1
66+
G_BR %bb.0
67+
bb.1:
68+
RET_ReallyLR
69+
70+
...
71+
---
72+
name: tbnz_and_s32
73+
alignment: 4
74+
legalized: true
75+
regBankSelected: true
76+
tracksRegLiveness: true
77+
body: |
78+
; CHECK-LABEL: name: tbnz_and_s32
79+
; CHECK: bb.0:
80+
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
81+
; CHECK: liveins: $w0
82+
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
83+
; CHECK: TBNZW [[COPY]], 0, %bb.1
84+
; CHECK: B %bb.0
85+
; CHECK: bb.1:
86+
; CHECK: RET_ReallyLR
87+
bb.0:
88+
successors: %bb.0, %bb.1
89+
liveins: $w0
90+
%0:gpr(s32) = COPY $w0
91+
%1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask
92+
%3:gpr(s32) = G_CONSTANT i32 0
93+
%2:gpr(s32) = G_AND %0, %1
94+
%5:gpr(s32) = G_ICMP intpred(ne), %2(s32), %3
95+
%4:gpr(s1) = G_TRUNC %5(s32)
96+
G_BRCOND %4(s1), %bb.1
97+
G_BR %bb.0
98+
bb.1:
99+
RET_ReallyLR
100+
101+
...
102+
---
103+
name: tbz_and_s32
104+
alignment: 4
105+
legalized: true
106+
regBankSelected: true
107+
tracksRegLiveness: true
108+
body: |
109+
; CHECK-LABEL: name: tbz_and_s32
110+
; CHECK: bb.0:
111+
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
112+
; CHECK: liveins: $w0
113+
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
114+
; CHECK: TBZW [[COPY]], 0, %bb.1
115+
; CHECK: B %bb.0
116+
; CHECK: bb.1:
117+
; CHECK: RET_ReallyLR
118+
bb.0:
119+
successors: %bb.0, %bb.1
120+
liveins: $w0
121+
%0:gpr(s32) = COPY $w0
122+
%1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask
123+
%3:gpr(s32) = G_CONSTANT i32 0
124+
%2:gpr(s32) = G_AND %0, %1
125+
%5:gpr(s32) = G_ICMP intpred(eq), %2(s32), %3
126+
%4:gpr(s1) = G_TRUNC %5(s32)
127+
G_BRCOND %4(s1), %bb.1
128+
G_BR %bb.0
129+
bb.1:
130+
RET_ReallyLR
131+
132+
...
133+
---
134+
name: dont_fold_and_lt
135+
alignment: 4
136+
legalized: true
137+
regBankSelected: true
138+
tracksRegLiveness: true
139+
body: |
140+
; CHECK-LABEL: name: dont_fold_and_lt
141+
; CHECK: bb.0:
142+
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
143+
; CHECK: liveins: $w0
144+
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
145+
; CHECK: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
146+
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
147+
; CHECK: TBNZW [[CSINCWr]], 0, %bb.1
148+
; CHECK: B %bb.0
149+
; CHECK: bb.1:
150+
; CHECK: RET_ReallyLR
151+
bb.0:
152+
successors: %bb.0, %bb.1
153+
liveins: $w0
154+
%0:gpr(s32) = COPY $w0
155+
%1:gpr(s32) = G_CONSTANT i32 1
156+
%3:gpr(s32) = G_CONSTANT i32 0
157+
%2:gpr(s32) = G_AND %0, %1
158+
%5:gpr(s32) = G_ICMP intpred(slt), %2(s32), %3
159+
%4:gpr(s1) = G_TRUNC %5(s32)
160+
G_BRCOND %4(s1), %bb.1
161+
G_BR %bb.0
162+
bb.1:
163+
RET_ReallyLR
164+
165+
...
166+
---
167+
name: dont_fold_and_gt
168+
alignment: 4
169+
legalized: true
170+
regBankSelected: true
171+
tracksRegLiveness: true
172+
body: |
173+
; CHECK-LABEL: name: dont_fold_and_gt
174+
; CHECK: bb.0:
175+
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
176+
; CHECK: liveins: $w0
177+
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
178+
; CHECK: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
179+
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
180+
; CHECK: TBNZW [[CSINCWr]], 0, %bb.1
181+
; CHECK: B %bb.0
182+
; CHECK: bb.1:
183+
; CHECK: RET_ReallyLR
184+
bb.0:
185+
successors: %bb.0, %bb.1
186+
liveins: $w0
187+
%0:gpr(s32) = COPY $w0
188+
%1:gpr(s32) = G_CONSTANT i32 1
189+
%3:gpr(s32) = G_CONSTANT i32 0
190+
%2:gpr(s32) = G_AND %0, %1
191+
%5:gpr(s32) = G_ICMP intpred(sgt), %2(s32), %3
192+
%4:gpr(s1) = G_TRUNC %5(s32)
193+
G_BRCOND %4(s1), %bb.1
194+
G_BR %bb.0
195+
bb.1:
196+
RET_ReallyLR
197+
198+
...
199+
---
200+
name: dont_fold_and_not_power_of_2
201+
alignment: 4
202+
legalized: true
203+
regBankSelected: true
204+
body: |
205+
; CHECK-LABEL: name: dont_fold_and_not_power_of_2
206+
; CHECK: bb.0:
207+
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
208+
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
209+
; CHECK: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[COPY]], 4098
210+
; CHECK: CBNZX [[ANDXri]], %bb.1
211+
; CHECK: B %bb.0
212+
; CHECK: bb.1:
213+
; CHECK: RET_ReallyLR
214+
bb.0:
215+
successors: %bb.0, %bb.1
216+
liveins: $x0
217+
%0:gpr(s64) = COPY $x0
218+
%1:gpr(s64) = G_CONSTANT i64 7
219+
%3:gpr(s64) = G_CONSTANT i64 0
220+
%2:gpr(s64) = G_AND %0, %1
221+
%5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
222+
%4:gpr(s1) = G_TRUNC %5(s32)
223+
G_BRCOND %4(s1), %bb.1
224+
G_BR %bb.0
225+
bb.1:
226+
RET_ReallyLR
227+
228+
...
229+
---
230+
name: dont_fold_cmp_not_0
231+
alignment: 4
232+
legalized: true
233+
regBankSelected: true
234+
body: |
235+
; CHECK-LABEL: name: dont_fold_cmp_not_0
236+
; CHECK: bb.0:
237+
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
238+
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
239+
; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[COPY]], 8064
240+
; CHECK: $xzr = SUBSXri [[ANDXri]], 4, 0, implicit-def $nzcv
241+
; CHECK: Bcc 1, %bb.1, implicit $nzcv
242+
; CHECK: B %bb.0
243+
; CHECK: bb.1:
244+
; CHECK: RET_ReallyLR
245+
bb.0:
246+
successors: %bb.0, %bb.1
247+
liveins: $x0
248+
%0:gpr(s64) = COPY $x0
249+
%1:gpr(s64) = G_CONSTANT i64 4
250+
%3:gpr(s64) = G_CONSTANT i64 4
251+
%2:gpr(s64) = G_AND %0, %1
252+
%5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
253+
%4:gpr(s1) = G_TRUNC %5(s32)
254+
G_BRCOND %4(s1), %bb.1
255+
G_BR %bb.0
256+
bb.1:
257+
RET_ReallyLR

0 commit comments

Comments
 (0)