Skip to content

Commit bd09197

Browse files
pkwasnie-inteligcbot
authored andcommitted
GEP Loop Strength Reduction pass improvements
1. Use one instance of SCEVExpander for all reduction in the functions. Limits number of duplicated instructions. 2. Improve comparison of SCEV expressions. More cases should be grouped together, lowering number of duplicated instructions.
1 parent 7e7165e commit bd09197

File tree

2 files changed

+48
-28
lines changed

2 files changed

+48
-28
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/GEPLoopStrengthReduction/GEPLoopStrengthReduction.cpp

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ class Scorer
262262
class Reducer
263263
{
264264
public:
265-
Reducer(const DataLayout &DL, DominatorTree &DT, Loop &L, LoopInfo &LI, ModuleMetaData &MMD, RegisterPressureEstimate &RPE, ScalarEvolution &SE, unsigned MaxPressure, bool AllowLICM)
266-
: DT(DT), L(L), LI(LI), RPE(RPE), SE(SE), E(SE, DL, "gep-loop-strength-reduction"), MaxPressure(MaxPressure), AllowLICM(AllowLICM), Scorer(DL, L, MMD) {}
265+
Reducer(const DataLayout &DL, DominatorTree &DT, Loop &L, LoopInfo &LI, ModuleMetaData &MMD, RegisterPressureEstimate &RPE, ScalarEvolution &SE, SCEVExpander &E, unsigned MaxPressure, bool AllowLICM)
266+
: DT(DT), L(L), LI(LI), RPE(RPE), SE(SE), E(E), MaxPressure(MaxPressure), AllowLICM(AllowLICM), Scorer(DL, L, MMD) {}
267267

268268
bool reduce();
269269

@@ -280,7 +280,7 @@ class Reducer
280280
LoopInfo &LI;
281281
RegisterPressureEstimate &RPE;
282282
ScalarEvolution &SE;
283-
SCEVExpander E;
283+
SCEVExpander &E;
284284

285285
Scorer Scorer;
286286

@@ -305,22 +305,26 @@ namespace SCEVHelper
305305
class SCEVAddBuilder
306306
{
307307
public:
308-
SCEVAddBuilder(ScalarEvolution &SE, Type *T) :
309-
SE(SE), T(T)
310-
{
311-
IGC_ASSERT_MESSAGE(T->isIntegerTy(), "builder requires integer type");
312-
}
308+
SCEVAddBuilder(ScalarEvolution &SE) : SE(SE) {}
313309

314310
SCEVAddBuilder &add(const SCEV *S, bool Negative = false);
315311

316312
SCEVAddBuilder &addNegative(const SCEV *S) { return add(S, true); }
317313

318-
const SCEV *build() { return SE.getAddExpr(Ops); }
314+
const SCEV *build();
319315

320316
private:
317+
318+
struct Op
319+
{
320+
Op(const SCEV *S, bool Negative) : S(S), Negative(Negative) {}
321+
322+
const SCEV *S;
323+
bool Negative;
324+
};
325+
321326
ScalarEvolution &SE;
322-
Type *T;
323-
SmallVector<const SCEV*, 16> Ops;
327+
SmallVector<Op, 16> Ops;
324328
};
325329
};
326330

@@ -406,7 +410,7 @@ bool ReductionCandidateGroup::addToGroup(ScalarEvolution &SE, GetElementPtrInst
406410
// Can't use ScalarEvolution::computeConstantDifference, as it only
407411
// supports SCEVAddExpr with two operands. Calculate difference as:
408412
// new candidate's operands + (-1 * base's operands)
409-
SCEVHelper::SCEVAddBuilder Builder(SE, SE.getWiderType(S->getType(), Base.S->getType()));
413+
SCEVHelper::SCEVAddBuilder Builder(SE);
410414
const SCEVConstant *Sum = dyn_cast<SCEVConstant>(Builder.add(S).addNegative(Base.S).build());
411415
if (!Sum)
412416
return false;
@@ -900,7 +904,7 @@ bool Reducer::deconstructSCEV(const SCEV *S, const SCEV *&Start, int64_t &Step)
900904

901905
const SCEV *OpSCEV = nullptr;
902906
int64_t OpStep = 0;
903-
SCEVHelper::SCEVAddBuilder Builder(SE, S->getType());
907+
SCEVHelper::SCEVAddBuilder Builder(SE);
904908

905909
for (auto *Op : Add->operands())
906910
{
@@ -1036,16 +1040,33 @@ SCEVHelper::SCEVAddBuilder &SCEVHelper::SCEVAddBuilder::add(const SCEV *S, bool
10361040
return *this;
10371041
}
10381042

1043+
Ops.emplace_back(S, Negative);
1044+
1045+
return *this;
1046+
}
1047+
1048+
1049+
const SCEV *SCEVHelper::SCEVAddBuilder::build()
1050+
{
10391051
// ScalarEvolution::getAddExpr requires all operands to have the same
1040-
// type. Extend type if required.
1041-
S = S->getType() == T ? S : SE.getSignExtendExpr(S, T);
1052+
// type. First find the widest type.
1053+
Type *T = nullptr;
1054+
for (auto *It = Ops.begin(); It != Ops.end(); ++It)
1055+
{
1056+
T = T ? SE.getWiderType(T, It->S->getType()) : It->S->getType();
1057+
}
10421058

1043-
// Change expresion to "-1 * expression"
1044-
S = Negative ? SE.getNegativeSCEV(S) : S;
1059+
// Join list of operands, extending type if required.
1060+
SmallVector<const SCEV*, 16> FinalOps;
10451061

1046-
Ops.push_back(S);
1062+
for (auto *It = Ops.begin(); It != Ops.end(); ++It)
1063+
{
1064+
const SCEV *S = It->S;
1065+
S = S->getType() == T ? S : SE.getSignExtendExpr(S, T);
1066+
FinalOps.push_back(It->Negative ? SE.getNegativeSCEV(S) : S);
1067+
}
10471068

1048-
return *this;
1069+
return SE.getAddExpr(FinalOps);
10491070
}
10501071

10511072

@@ -1093,9 +1114,12 @@ bool GEPLoopStrengthReduction::runOnFunction(llvm::Function &F)
10931114

10941115
bool changed = false;
10951116

1117+
// Using one SCEV expander between all reductions reduces number of duplicated new instructions.
1118+
auto E = SCEVExpander(SE, DL, "gep-loop-strength-reduction");
1119+
10961120
for (Loop *L : LI.getLoopsInPreorder())
10971121
{
1098-
changed |= Reducer(DL, DT, *L, LI, MMD, RPE, SE, MaxPressure, AllowLICM).reduce();
1122+
changed |= Reducer(DL, DT, *L, LI, MMD, RPE, SE, E, MaxPressure, AllowLICM).reduce();
10991123
}
11001124

11011125
return changed;

IGC/Compiler/tests/GEPLoopStrengthReduction/one_access_addexpr.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,10 @@ entry:
3535
br i1 %cmp1, label %for.body.lr.ph, label %for.end
3636

3737
; CHECK-LABEL: for.body.lr.ph:
38-
; CHECK: [[EXT1:%.*]] = sext i32 %b to i64
39-
; CHECK: [[EXT2:%.*]] = sext i32 %a to i64
40-
; CHECK: [[ADD1:%.*]] = add i64 [[EXT1]], [[EXT2]]
41-
; CHECK: [[ADD2:%.*]] = add i32 %b, %a
42-
; CHECK: [[MUL:%.*]] = mul i32 %c, [[ADD2]]
43-
; CHECK: [[EXT3:%.*]] = sext i32 [[MUL]] to i64
44-
; CHECK: [[ADD3:%.*]] = add i64 [[ADD1]], [[EXT3]]
45-
; CHECK: [[GEP_PHI1:%.*]] = getelementptr i32, i32 addrspace(1)* %p, i64 [[ADD3]]
38+
; CHECK: [[ADD1:%.*]] = add i32 %b, %a
39+
; CHECK: [[MUL:%.*]] = mul i32 %c, [[ADD1]]
40+
; CHECK: [[ADD2:%.*]] = add i32 [[ADD1]], [[MUL]]
41+
; CHECK: [[GEP_PHI1:%.*]] = getelementptr i32, i32 addrspace(1)* %p, i32 [[ADD2]]
4642
; CHECK: br label %for.body
4743
for.body.lr.ph: ; preds = %entry
4844
br label %for.body

0 commit comments

Comments
 (0)