Skip to content

Commit b6b2574

Browse files
committed
Unmerge GEPs to reduce register pressure on IndirectBr edges.
Summary: GEP merging can sometimes increase the number of live values and register pressure across control edges and cause performance problems particularly if the increased register pressure results in spills. This change implements GEP unmerging around an IndirectBr in certain cases to mitigate the issue. This is in the CodeGenPrepare pass (after all the GEP merging has happened.) With this patch, the Python interpreter loop runs faster by ~5%. Reviewers: sanjoy, hfinkel Reviewed By: hfinkel Subscribers: eastig, junbuml, llvm-commits Differential Revision: https://reviews.llvm.org/D36772 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312930 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 46582be commit b6b2574

File tree

2 files changed

+227
-0
lines changed

2 files changed

+227
-0
lines changed

lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6189,6 +6189,170 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
61896189
return true;
61906190
}
61916191

6192+
// Return true if the GEP has two operands, the first operand is of a sequential
6193+
// type, and the second operand is a constant.
6194+
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
6195+
gep_type_iterator I = gep_type_begin(*GEP);
6196+
return GEP->getNumOperands() == 2 &&
6197+
I.isSequential() &&
6198+
isa<ConstantInt>(GEP->getOperand(1));
6199+
}
6200+
6201+
// Try unmerging GEPs to reduce liveness interference (register pressure) across
6202+
// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
6203+
// reducing liveness interference across those edges benefits global register
6204+
// allocation. Currently handles only certain cases.
6205+
//
6206+
// For example, unmerge %GEPI and %UGEPI as below.
6207+
//
6208+
// ---------- BEFORE ----------
6209+
// SrcBlock:
6210+
// ...
6211+
// %GEPIOp = ...
6212+
// ...
6213+
// %GEPI = gep %GEPIOp, Idx
6214+
// ...
6215+
// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
6216+
// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
6217+
// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
6218+
// %UGEPI)
6219+
//
6220+
// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
6221+
// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
6222+
// ...
6223+
//
6224+
// DstBi:
6225+
// ...
6226+
// %UGEPI = gep %GEPIOp, UIdx
6227+
// ...
6228+
// ---------------------------
6229+
//
6230+
// ---------- AFTER ----------
6231+
// SrcBlock:
6232+
// ... (same as above)
6233+
// (* %GEPI is still alive on the indirectbr edges)
6234+
// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
6235+
// unmerging)
6236+
// ...
6237+
//
6238+
// DstBi:
6239+
// ...
6240+
// %UGEPI = gep %GEPI, (UIdx-Idx)
6241+
// ...
6242+
// ---------------------------
6243+
//
6244+
// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
6245+
// no longer alive on them.
6246+
//
6247+
// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
6248+
// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
6249+
// not to disable further simplications and optimizations as a result of GEP
6250+
// merging.
6251+
//
6252+
// Note this unmerging may increase the length of the data flow critical path
6253+
// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
6254+
// between the register pressure and the length of data-flow critical
6255+
// path. Restricting this to the uncommon IndirectBr case would minimize the
6256+
// impact of potentially longer critical path, if any, and the impact on compile
6257+
// time.
6258+
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
6259+
const TargetTransformInfo *TTI) {
6260+
BasicBlock *SrcBlock = GEPI->getParent();
6261+
// Check that SrcBlock ends with an IndirectBr. If not, give up. The common
6262+
// (non-IndirectBr) cases exit early here.
6263+
if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
6264+
return false;
6265+
// Check that GEPI is a simple gep with a single constant index.
6266+
if (!GEPSequentialConstIndexed(GEPI))
6267+
return false;
6268+
ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
6269+
// Check that GEPI is a cheap one.
6270+
if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
6271+
> TargetTransformInfo::TCC_Basic)
6272+
return false;
6273+
Value *GEPIOp = GEPI->getOperand(0);
6274+
// Check that GEPIOp is an instruction that's also defined in SrcBlock.
6275+
if (!isa<Instruction>(GEPIOp))
6276+
return false;
6277+
auto *GEPIOpI = cast<Instruction>(GEPIOp);
6278+
if (GEPIOpI->getParent() != SrcBlock)
6279+
return false;
6280+
// Check that GEP is used outside the block, meaning it's alive on the
6281+
// IndirectBr edge(s).
6282+
if (find_if(GEPI->users(), [&](User *Usr) {
6283+
if (auto *I = dyn_cast<Instruction>(Usr)) {
6284+
if (I->getParent() != SrcBlock) {
6285+
return true;
6286+
}
6287+
}
6288+
return false;
6289+
}) == GEPI->users().end())
6290+
return false;
6291+
// The second elements of the GEP chains to be unmerged.
6292+
std::vector<GetElementPtrInst *> UGEPIs;
6293+
// Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
6294+
// on IndirectBr edges.
6295+
for (User *Usr : GEPIOp->users()) {
6296+
if (Usr == GEPI) continue;
6297+
// Check if Usr is an Instruction. If not, give up.
6298+
if (!isa<Instruction>(Usr))
6299+
return false;
6300+
auto *UI = cast<Instruction>(Usr);
6301+
// Check if Usr in the same block as GEPIOp, which is fine, skip.
6302+
if (UI->getParent() == SrcBlock)
6303+
continue;
6304+
// Check if Usr is a GEP. If not, give up.
6305+
if (!isa<GetElementPtrInst>(Usr))
6306+
return false;
6307+
auto *UGEPI = cast<GetElementPtrInst>(Usr);
6308+
// Check if UGEPI is a simple gep with a single constant index and GEPIOp is
6309+
// the pointer operand to it. If so, record it in the vector. If not, give
6310+
// up.
6311+
if (!GEPSequentialConstIndexed(UGEPI))
6312+
return false;
6313+
if (UGEPI->getOperand(0) != GEPIOp)
6314+
return false;
6315+
if (GEPIIdx->getType() !=
6316+
cast<ConstantInt>(UGEPI->getOperand(1))->getType())
6317+
return false;
6318+
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6319+
if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
6320+
> TargetTransformInfo::TCC_Basic)
6321+
return false;
6322+
UGEPIs.push_back(UGEPI);
6323+
}
6324+
if (UGEPIs.size() == 0)
6325+
return false;
6326+
// Check the materializing cost of (Uidx-Idx).
6327+
for (GetElementPtrInst *UGEPI : UGEPIs) {
6328+
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6329+
APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
6330+
unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
6331+
if (ImmCost > TargetTransformInfo::TCC_Basic)
6332+
return false;
6333+
}
6334+
// Now unmerge between GEPI and UGEPIs.
6335+
for (GetElementPtrInst *UGEPI : UGEPIs) {
6336+
UGEPI->setOperand(0, GEPI);
6337+
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6338+
Constant *NewUGEPIIdx =
6339+
ConstantInt::get(GEPIIdx->getType(),
6340+
UGEPIIdx->getValue() - GEPIIdx->getValue());
6341+
UGEPI->setOperand(1, NewUGEPIIdx);
6342+
// If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
6343+
// inbounds to avoid UB.
6344+
if (!GEPI->isInBounds()) {
6345+
UGEPI->setIsInBounds(false);
6346+
}
6347+
}
6348+
// After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
6349+
// alive on IndirectBr edges).
6350+
assert(find_if(GEPIOp->users(), [&](User *Usr) {
6351+
return cast<Instruction>(Usr)->getParent() != SrcBlock;
6352+
}) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock");
6353+
return true;
6354+
}
6355+
61926356
bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
61936357
// Bail out if we inserted the instruction to prevent optimizations from
61946358
// stepping on each other's toes.
@@ -6302,6 +6466,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
63026466
optimizeInst(NC, ModifiedDT);
63036467
return true;
63046468
}
6469+
if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
6470+
return true;
6471+
}
63056472
return false;
63066473
}
63076474

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; RUN: opt -codegenprepare -S < %s | FileCheck %s
2+
3+
@exit_addr = constant i8* blockaddress(@gep_unmerging, %exit)
4+
@op1_addr = constant i8* blockaddress(@gep_unmerging, %op1)
5+
@op2_addr = constant i8* blockaddress(@gep_unmerging, %op2)
6+
@op3_addr = constant i8* blockaddress(@gep_unmerging, %op3)
7+
@dummy = global i8 0
8+
9+
define void @gep_unmerging(i1 %pred, i8* %p0) {
10+
entry:
11+
%table = alloca [256 x i8*]
12+
%table_0 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 0
13+
%table_1 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 1
14+
%table_2 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 2
15+
%table_3 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 3
16+
%exit_a = load i8*, i8** @exit_addr
17+
%op1_a = load i8*, i8** @op1_addr
18+
%op2_a = load i8*, i8** @op2_addr
19+
%op3_a = load i8*, i8** @op3_addr
20+
store i8* %exit_a, i8** %table_0
21+
store i8* %op1_a, i8** %table_1
22+
store i8* %op2_a, i8** %table_2
23+
store i8* %op3_a, i8** %table_3
24+
br label %indirectbr
25+
26+
op1:
27+
; CHECK-LABEL: op1:
28+
; CHECK-NEXT: %p1_inc2 = getelementptr i8, i8* %p_postinc, i64 2
29+
; CHECK-NEXT: %p1_inc1 = getelementptr i8, i8* %p_postinc, i64 1
30+
%p1_inc2 = getelementptr i8, i8* %p_preinc, i64 3
31+
%p1_inc1 = getelementptr i8, i8* %p_preinc, i64 2
32+
%a10 = load i8, i8* %p_postinc
33+
%a11 = load i8, i8* %p1_inc1
34+
%a12 = add i8 %a10, %a11
35+
store i8 %a12, i8* @dummy
36+
br i1 %pred, label %indirectbr, label %exit
37+
38+
op2:
39+
; CHECK-LABEL: op2:
40+
; CHECK-NEXT: %p2_inc = getelementptr i8, i8* %p_postinc, i64 1
41+
%p2_inc = getelementptr i8, i8* %p_preinc, i64 2
42+
%a2 = load i8, i8* %p_postinc
43+
store i8 %a2, i8* @dummy
44+
br i1 %pred, label %indirectbr, label %exit
45+
46+
op3:
47+
br i1 %pred, label %indirectbr, label %exit
48+
49+
indirectbr:
50+
%p_preinc = phi i8* [%p0, %entry], [%p1_inc2, %op1], [%p2_inc, %op2], [%p_postinc, %op3]
51+
%p_postinc = getelementptr i8, i8* %p_preinc, i64 1
52+
%next_op = load i8, i8* %p_preinc
53+
%p_zext = zext i8 %next_op to i64
54+
%slot = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 %p_zext
55+
%target = load i8*, i8** %slot
56+
indirectbr i8* %target, [label %exit, label %op1, label %op2]
57+
58+
exit:
59+
ret void
60+
}

0 commit comments

Comments
 (0)