Skip to content

Commit 5610d02

Browse files
pkwasnie-inteligcbot
authored andcommitted
GEP LSR - allow unknown constant value as induction step
GEP LSR requires induction step to be constant integer known at compilation time. Allow any value, as long as it is constant in loop body.
1 parent 6f2c8c5 commit 5610d02

File tree

3 files changed

+199
-23
lines changed

3 files changed

+199
-23
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/GEPLoopStrengthReduction/GEPLoopStrengthReduction.cpp

Lines changed: 76 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,6 @@ using namespace IGC;
9797
// global float *p = buf + id + i;
9898
// *(p + 1) = *p + 3.0f;
9999
// }
100-
//
101-
// TODO: At the moment pass accepts constant indices as llvm::Constant. These could be any llvm::Value
102-
// as long as value doesn't change inside loop body (is constant for full loop lifetime).
103100

104101

105102
enum ReductionType
@@ -187,10 +184,10 @@ friend class Scorer;
187184

188185
public:
189186

190-
ReductionCandidateGroup(Loop *L, const DominatorTree *DT, GetElementPtrInst *GEP, const SCEV *S, int64_t Step)
187+
ReductionCandidateGroup(Loop *L, const DominatorTree *DT, GetElementPtrInst *GEP, const SCEV *S, const SCEV *Step)
191188
: L(L), DT(DT), Step(Step), Base(GEP, S, 0), RT(REDUCE_TO_PREHEADER) {}
192189

193-
bool addToGroup(ScalarEvolution &SE, GetElementPtrInst *GEP, const SCEV *S, int64_t Step);
190+
bool addToGroup(ScalarEvolution &SE, GetElementPtrInst *GEP, const SCEV *S, const SCEV *Step);
194191

195192
void transform(IGCLLVM::IRBuilder<> &IRB, SCEVExpander &E);
196193

@@ -223,14 +220,16 @@ friend class Scorer;
223220
void reduceToPreheader(IGCLLVM::IRBuilder<> &IRB, SCEVExpander &E);
224221
void reduceIndexOnly(IGCLLVM::IRBuilder<> &IRB, SCEVExpander &E);
225222

223+
Value *getStepValue(IGCLLVM::IRBuilder<> &IRB);
224+
226225
// Base GEP to reduce
227226
ReductionCandidate Base;
228227

229228
// Other GEPs that can be reduced together with base
230229
SmallVector<ReductionCandidate, 4> Others;
231230

232231
// Increment step value
233-
int64_t Step;
232+
const SCEV *Step;
234233

235234
// Group member with the smallest SCEV expression. Preferred for reduction
236235
// to preheader, as it should give smallest increase in register pressure.
@@ -289,7 +288,7 @@ class Analyzer
289288
void analyzeGEP(GetElementPtrInst *GEP);
290289
bool doInitialValidation(GetElementPtrInst *GEP);
291290

292-
bool deconstructSCEV(const SCEV *S, const SCEV *&Start, int64_t &Step);
291+
bool deconstructSCEV(const SCEV *S, const SCEV *&Start, const SCEV *&Step);
293292

294293
DominatorTree &DT;
295294
Loop &L;
@@ -355,6 +354,9 @@ namespace SCEVHelper
355354
{
356355
const SCEV *dropExt(const SCEV *S);
357356

357+
bool isValidStep(const SCEV *S);
358+
bool isEqual(const SCEV *A, const SCEV *B);
359+
358360
// ScalarEvolution::getAddExpr requires all operands to have the same
359361
// type. Extend type if required.
360362

@@ -481,9 +483,9 @@ bool ReductionCandidate::isBetterForReduction(const ReductionCandidate &Other)
481483
// New candidate can take position of group's base if it uses less instructions
482484
// to calculate.
483485
// Returns true if candidate was added to group.
484-
bool ReductionCandidateGroup::addToGroup(ScalarEvolution &SE, GetElementPtrInst *GEP, const SCEV *S, int64_t Step)
486+
bool ReductionCandidateGroup::addToGroup(ScalarEvolution &SE, GetElementPtrInst *GEP, const SCEV *S, const SCEV *Step)
485487
{
486-
if (this->Step != Step)
488+
if (!SCEVHelper::isEqual(this->Step, Step))
487489
return false;
488490

489491
if (Base.GEP->getPointerOperand() != GEP->getPointerOperand())
@@ -619,7 +621,7 @@ void ReductionCandidateGroup::reduceToPreheader(IGCLLVM::IRBuilder<> &IRB, SCEVE
619621
Value *Pointer = IRB.CreateGEP(Base.GEP->getSourceElementType(), Base.GEP->getPointerOperand(), Indices);
620622

621623
// Create phi node if pointer is moved in loop
622-
if (Step != 0)
624+
if (!Step->isZero())
623625
{
624626
// Add new phi node with pointer as induction variable
625627
SmallVector<BasicBlock*, 4> Latches;
@@ -634,7 +636,7 @@ void ReductionCandidateGroup::reduceToPreheader(IGCLLVM::IRBuilder<> &IRB, SCEVE
634636
for (auto *L : Latches)
635637
{
636638
IRB.SetInsertPoint(&L->back());
637-
Value *Inc = IRB.CreateGEP(Phi, IRB.getInt64(Step));
639+
Value *Inc = IRB.CreateGEP(Phi, getStepValue(IRB));
638640
Phi->addIncoming(Inc, L);
639641
}
640642

@@ -675,6 +677,19 @@ void ReductionCandidateGroup::reduceIndexOnly(IGCLLVM::IRBuilder<> &IRB, SCEVExp
675677
}
676678

677679

680+
Value *ReductionCandidateGroup::getStepValue(IGCLLVM::IRBuilder<> &IRB)
681+
{
682+
if (auto *S = dyn_cast<SCEVConstant>(Step))
683+
return IRB.getInt64(dyn_cast<SCEVConstant>(Step)->getValue()->getSExtValue());
684+
685+
if (auto *S = dyn_cast<SCEVUnknown>(Step))
686+
return S->getValue();
687+
688+
IGC_ASSERT_MESSAGE(0, "invalid induction value type");
689+
return nullptr;
690+
}
691+
692+
678693
void Scorer::score(SmallVectorImpl<ReductionCandidateGroup> &Candidates)
679694
{
680695
for (auto &C : Candidates)
@@ -704,7 +719,7 @@ void Scorer::scoreReducedInstructions(ReductionCandidateGroup &Candidate)
704719
// 3. "+ base_ptr" - single "add" instruction
705720
int score = 0;
706721

707-
if (Candidate.Step != 0)
722+
if (!Candidate.Step->isZero())
708723
{
709724
// Reduction adds new instruction - incrementation of new induction variable at the end
710725
// of the iteration.
@@ -901,7 +916,7 @@ void Analyzer::analyzeGEP(GetElementPtrInst *GEP)
901916
return;
902917

903918
const SCEV *Start = nullptr;
904-
int64_t Step = 0;
919+
const SCEV *Step = nullptr;
905920

906921
if (!deconstructSCEV(S, Start, Step))
907922
return;
@@ -999,7 +1014,7 @@ bool Analyzer::doInitialValidation(GetElementPtrInst *GEP)
9991014
// Takes SCEV expression returned by ScalarEvolution and deconstructs it into
10001015
// expected format { start, +, step }. Returns false if expressions can't be
10011016
// parsed and reduced.
1002-
bool Analyzer::deconstructSCEV(const SCEV *S, const SCEV *&Start, int64_t &Step)
1017+
bool Analyzer::deconstructSCEV(const SCEV *S, const SCEV *&Start, const SCEV *&Step)
10031018
{
10041019
// Drop ext instructions to analyze nested content.
10051020
S = SCEVHelper::dropExt(S);
@@ -1009,10 +1024,10 @@ bool Analyzer::deconstructSCEV(const SCEV *S, const SCEV *&Start, int64_t &Step)
10091024
// { start, +, 0 }
10101025
// This will do LICM-like reduction moving GEP to preheader, without adding new
10111026
// induction variable.
1012-
if (IGCLLVM::isSafeToExpandAt(S, &L.getLoopPreheader()->back(), &SE, &E))
1027+
if (SE.isLoopInvariant(S, &L))
10131028
{
10141029
Start = S;
1015-
Step = 0;
1030+
Step = SE.getConstant(Type::getInt64Ty(L.getHeader()->getContext()), 0);
10161031
return true;
10171032
}
10181033

@@ -1027,12 +1042,17 @@ bool Analyzer::deconstructSCEV(const SCEV *S, const SCEV *&Start, int64_t &Step)
10271042
if (Add->getNumOperands() != 2)
10281043
return false;
10291044

1030-
const SCEVConstant *Op = dyn_cast<SCEVConstant>(Add->getOperand(1));
1031-
if (!Op)
1045+
const SCEV *OpStep = Add->getOperand(1);
1046+
1047+
// Step must be constant in loop's body.
1048+
if (!SE.isLoopInvariant(OpStep, &L))
1049+
return false;
1050+
1051+
if (!SCEVHelper::isValidStep(OpStep))
10321052
return false;
10331053

10341054
Start = Add->getStart();
1035-
Step = Op->getValue()->getSExtValue();
1055+
Step = OpStep;
10361056

10371057
return IGCLLVM::isSafeToExpandAt(Start, &L.getLoopPreheader()->back(), &SE, &E);
10381058
}
@@ -1048,20 +1068,20 @@ bool Analyzer::deconstructSCEV(const SCEV *S, const SCEV *&Start, int64_t &Step)
10481068
if (auto *Add = dyn_cast<SCEVAddExpr>(S))
10491069
{
10501070
// There can be only one expression with step != 0.
1051-
Step = 0;
1071+
Step = SE.getConstant(Type::getInt64Ty(L.getHeader()->getContext()), 0);
10521072

10531073
const SCEV *OpSCEV = nullptr;
1054-
int64_t OpStep = 0;
1074+
const SCEV *OpStep = nullptr;
10551075
SCEVHelper::SCEVAddBuilder Builder(SE);
10561076

10571077
for (auto *Op : Add->operands())
10581078
{
10591079
if (!deconstructSCEV(Op, OpSCEV, OpStep))
10601080
return false;
10611081

1062-
if (OpStep != 0)
1082+
if (!OpStep->isZero())
10631083
{
1064-
if (Step != 0)
1084+
if (!Step->isZero())
10651085
return false; // unsupported expression with multiple steps
10661086
Step = OpStep;
10671087
}
@@ -1306,6 +1326,39 @@ const SCEV *SCEVHelper::dropExt(const SCEV *S)
13061326
}
13071327

13081328

1329+
bool SCEVHelper::isValidStep(const SCEV *S)
1330+
{
1331+
switch (S->getSCEVType())
1332+
{
1333+
case scConstant:
1334+
case scUnknown:
1335+
return true;
1336+
default:
1337+
return false;
1338+
}
1339+
}
1340+
1341+
1342+
bool SCEVHelper::isEqual(const SCEV *A, const SCEV *B)
1343+
{
1344+
// Scalar Evolution keeps unique SCEV instances, so we can compare pointers.
1345+
if (A == B)
1346+
return true;
1347+
1348+
if (A->getSCEVType() != B->getSCEVType())
1349+
return false;
1350+
1351+
switch (A->getSCEVType())
1352+
{
1353+
case scConstant:
1354+
// Can be different bit width, but same integer value.
1355+
return cast<SCEVConstant>(A)->getValue()->getZExtValue() == cast<SCEVConstant>(B)->getValue()->getZExtValue();
1356+
default:
1357+
return false;
1358+
}
1359+
}
1360+
1361+
13091362
SCEVHelper::SCEVAddBuilder &SCEVHelper::SCEVAddBuilder::add(const SCEV *S, bool Negative)
13101363
{
13111364
IGC_ASSERT(S->getType()->isIntegerTy());
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt -debugify --igc-gep-loop-strength-reduction -check-debugify -S < %s 2>&1 | FileCheck %s
10+
11+
; Pointer is indexed with an uknown value. As long as the step is constant, we can optimize GEP.
12+
13+
; Debug-info related check
14+
; CHECK: CheckModuleDebugify: PASS
15+
16+
define spir_kernel void @test(i32 addrspace(1)* %p, i32 %n, i64 %step) {
17+
entry:
18+
%cmp1 = icmp slt i32 0, %n
19+
br i1 %cmp1, label %for.body.lr.ph, label %for.end
20+
21+
; CHECK-LABEL: for.body.lr.ph:
22+
; CHECK: [[GEP_PHI1:%.*]] = getelementptr i32, i32 addrspace(1)* %p, i64 0
23+
; CHECK: br label %for.body
24+
for.body.lr.ph: ; preds = %entry
25+
br label %for.body
26+
27+
; CHECK-LABEL: for.body:
28+
; CHECK: [[GEP:%.*]] = phi i32 addrspace(1)* [ [[GEP_PHI1]], %for.body.lr.ph ], [ [[GEP_PHI2:%.*]], %for.body ]
29+
; CHECK: %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
30+
; CHECK-NOT: getelementptr inbounds i32, i32 addrspace(1)* %p
31+
; CHECK: [[LOAD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
32+
; CHECK: %add = add nsw i32 [[LOAD]], 1
33+
; CHECK: store i32 %add, i32 addrspace(1)* [[GEP]], align 4
34+
; CHECK: %inc = add nuw nsw i32 %i.02, 1
35+
; CHECK: %cmp = icmp slt i32 %inc, %n
36+
; CHECK: [[GEP_PHI2]] = getelementptr i32, i32 addrspace(1)* [[GEP]], i64 %step
37+
; CHECK: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
38+
for.body: ; preds = %for.body.lr.ph, %for.body
39+
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
40+
%idxprom = zext i32 %i.02 to i64
41+
%idxprom2 = mul i64 %idxprom, %step
42+
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %idxprom2
43+
%0 = load i32, i32 addrspace(1)* %arrayidx, align 4
44+
%add = add nsw i32 %0, 1
45+
store i32 %add, i32 addrspace(1)* %arrayidx, align 4
46+
%inc = add nuw nsw i32 %i.02, 1
47+
%cmp = icmp slt i32 %inc, %n
48+
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
49+
50+
for.cond.for.end_crit_edge: ; preds = %for.body
51+
br label %for.end
52+
53+
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
54+
ret void
55+
}
56+
57+
!igc.functions = !{!0}
58+
59+
!0 = !{void (i32 addrspace(1)*, i32, i64)* @test, !1}
60+
!1 = !{!2}
61+
!2 = !{!"function_type", i32 0}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; RUN: igc_opt -debugify --igc-gep-loop-strength-reduction -check-debugify -S < %s 2>&1 | FileCheck %s
10+
11+
; Pointer is indexed with an uknown value. As long as the step is constant, we can optimize GEP.
12+
13+
; Debug-info related check
14+
; CHECK: CheckModuleDebugify: PASS
15+
16+
define spir_kernel void @test(i32 addrspace(1)* %p, i32 %n, i64 %step) {
17+
entry:
18+
%cmp1 = icmp slt i32 0, %n
19+
br i1 %cmp1, label %for.body.lr.ph, label %for.end
20+
21+
; CHECK-LABEL: for.body.lr.ph:
22+
; CHECK: [[GEP_PHI1:%.*]] = getelementptr i32, i32 addrspace(1)* %p, i64 0
23+
; CHECK: br label %for.body
24+
for.body.lr.ph: ; preds = %entry
25+
br label %for.body
26+
27+
; CHECK-LABEL: for.body:
28+
; CHECK: [[GEP:%.*]] = phi i32 addrspace(1)* [ [[GEP_PHI1]], %for.body.lr.ph ], [ [[GEP_PHI2:%.*]], %for.body ]
29+
; CHECK: %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
30+
; CHECK-NOT: getelementptr inbounds i32, i32 addrspace(1)* %p
31+
; CHECK: store i32 5, i32 addrspace(1)* [[GEP]], align 4
32+
; CHECK: [[GEP2:%.*]] = getelementptr i32, i32 addrspace(1)* [[GEP]], i64 39
33+
; CHECK: store i32 10, i32 addrspace(1)* [[GEP2]], align 4
34+
; CHECK: %inc = add nuw nsw i32 %i.02, 1
35+
; CHECK: %cmp = icmp slt i32 %inc, %n
36+
; CHECK: [[GEP_PHI2]] = getelementptr i32, i32 addrspace(1)* [[GEP]], i64 %step
37+
; CHECK: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
38+
for.body: ; preds = %for.body.lr.ph, %for.body
39+
%i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
40+
%idxprom = zext i32 %i.02 to i64
41+
%idxprom2 = mul i64 %idxprom, %step
42+
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %idxprom2
43+
store i32 5, i32 addrspace(1)* %arrayidx, align 4
44+
%idxprom3 = add nuw nsw i64 %idxprom2, 39
45+
%arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %idxprom3
46+
store i32 10, i32 addrspace(1)* %arrayidx2, align 4
47+
%inc = add nuw nsw i32 %i.02, 1
48+
%cmp = icmp slt i32 %inc, %n
49+
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
50+
51+
for.cond.for.end_crit_edge: ; preds = %for.body
52+
br label %for.end
53+
54+
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
55+
ret void
56+
}
57+
58+
!igc.functions = !{!0}
59+
60+
!0 = !{void (i32 addrspace(1)*, i32, i64)* @test, !1}
61+
!1 = !{!2}
62+
!2 = !{!"function_type", i32 0}

0 commit comments

Comments
 (0)