Skip to content

Commit f38d9c1

Browse files
pkwasnie-inteligcbot
authored andcommitted
GEP LSR - disable illegal types by default
Scalar Evolution can produce SCEV expression with any integer, leaving cleanup of expanded code to legalization pass. By default skip SCEVs with illegal types, with dedicated option to enable it back.
1 parent 2ccfca4 commit f38d9c1

File tree

3 files changed

+187
-2
lines changed

3 files changed

+187
-2
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/GEPLoopStrengthReduction/GEPLoopStrengthReduction.cpp

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ namespace SCEVHelper
354354
{
355355
const SCEV *dropExt(const SCEV *S);
356356

357+
bool isValid(const SCEV *S);
357358
bool isEqual(const SCEV *A, const SCEV *B);
358359

359360
// ScalarEvolution::getAddExpr requires all operands to have the same
@@ -459,7 +460,7 @@ void ReductionCandidateGroup::print(raw_ostream &OS)
459460
OS << ", ";
460461
}
461462
OS << "], step=";
462-
OS << Step;
463+
Step->print(OS);
463464
OS << "}";
464465
}
465466

@@ -927,7 +928,7 @@ void Analyzer::analyzeGEP(GetElementPtrInst *GEP)
927928
Value *Index = *(GEP->indices().end() - 1);
928929

929930
const SCEV *S = SE.getSCEV(Index);
930-
if (isa<SCEVCouldNotCompute>(S))
931+
if (!SCEVHelper::isValid(S))
931932
return;
932933

933934
const SCEV *Start = nullptr;
@@ -1385,6 +1386,55 @@ const SCEV *SCEVHelper::dropExt(const SCEV *S)
13851386
}
13861387

13871388

1389+
// Returns true is SCEV expression legal.
1390+
bool SCEVHelper::isValid(const SCEV *S)
1391+
{
1392+
if (isa<SCEVCouldNotCompute>(S))
1393+
return false;
1394+
1395+
// Scalar Evolution doesn't have SCEV expression for bitwise-and. Instead,
1396+
// if possible, SE produces expressions for any integer size, leaving cleanup
1397+
// to legalization pass. For example this code:
1398+
// %1 = shl i64 %0, 32
1399+
// %2 = ashr exact i64 %1, 30
1400+
// produces i34 integer SCEV.
1401+
//
1402+
// By default don't allow illegal integer types.
1403+
if (IGC_IS_FLAG_ENABLED(EnableGEPLSRAnyIntBitWidth))
1404+
return true;
1405+
1406+
std::function<bool(Type*)> IsInvalidInt = [](Type *Ty)
1407+
{
1408+
if (!Ty->isIntegerTy())
1409+
return false;
1410+
1411+
switch (Ty->getScalarSizeInBits())
1412+
{
1413+
case 8:
1414+
case 16:
1415+
case 32:
1416+
case 64:
1417+
return false;
1418+
default:
1419+
return true;
1420+
}
1421+
};
1422+
1423+
bool HasInvalidInt = SCEVExprContains(S, [&](const SCEV *S) {
1424+
if (auto *Cast = dyn_cast<SCEVCastExpr>(S))
1425+
return IsInvalidInt(Cast->getOperand()->getType()) || IsInvalidInt(Cast->getType());
1426+
return false;
1427+
});
1428+
1429+
LLVM_DEBUG(
1430+
if (HasInvalidInt) {
1431+
dbgs() << " Dropping SCEV with invalid integer type: "; S->print(dbgs()); dbgs() << "\n";
1432+
});
1433+
1434+
return !HasInvalidInt;
1435+
}
1436+
1437+
13881438
bool SCEVHelper::isEqual(const SCEV *A, const SCEV *B)
13891439
{
13901440
// Scalar Evolution keeps unique SCEV instances, so we can compare pointers.
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: regkeys
10+
; RUN: igc_opt --regkey=EnableGEPLSRAnyIntBitWidth=0 -debugify --igc-gep-loop-strength-reduction -check-debugify -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-BLOCK-ILLEGAL
11+
; RUN: igc_opt --regkey=EnableGEPLSRAnyIntBitWidth=1 -debugify --igc-gep-loop-strength-reduction -check-debugify -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ALLOW-ILLEGAL
12+
;
13+
; Test for illegal types in SCEV expressions.
14+
15+
; Debug-info related check
16+
; CHECK-ALLOW-ILLEGAL: CheckModuleDebugify: PASS
17+
18+
; Combination of shl/ashr instructions create SCEV with i34 type:
19+
; (sext i34 {(4 * (trunc i64 %b to i34)),+,(4 * (trunc i64 %a to i34))}<%for.body> to i64)
20+
define spir_kernel void @test_shl_ashr(i32 addrspace(1)* %p, i64 %n, i64 %a, i64 %b) {
21+
entry:
22+
%cmp1 = icmp slt i64 0, %n
23+
br i1 %cmp1, label %for.body.lr.ph, label %for.end
24+
25+
; CHECK-BLOCK-ILLEGAL-LABEL: for.body.lr.ph:
26+
; CHECK-BLOCK-ILLEGAL: br label %for.body
27+
; CHECK-ALLOW-ILLEGAL-LABEL: for.body.lr.ph:
28+
; CHECK-ALLOW-ILLEGAL: [[TRUNC1:%.*]] = trunc i64 %b to i34
29+
; CHECK-ALLOW-ILLEGAL: [[SHL1:%.*]] = shl i34 [[TRUNC1]], 2
30+
; CHECK-ALLOW-ILLEGAL: [[INDEX:%.*]] = sext i34 [[SHL1]] to i64
31+
; CHECK-ALLOW-ILLEGAL: [[GEP_PHI1:%.*]] = getelementptr i32, i32 addrspace(1)* %p, i64 [[INDEX]]
32+
; CHECK-ALLOW-ILLEGAL: [[TRUNC2:%.*]] = trunc i64 %a to i34
33+
; CHECK-ALLOW-ILLEGAL: [[SHL2:%.*]] = shl i34 [[TRUNC2]], 2
34+
; CHECK-ALLOW-ILLEGAL: br label %for.body
35+
for.body.lr.ph: ; preds = %entry
36+
br label %for.body
37+
38+
; CHECK-BLOCK-ILLEGAL-LABEL: for.body:
39+
; CHECK-BLOCK-ILLEGAL: %i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
40+
; CHECK-BLOCK-ILLEGAL: %0 = mul i64 %i.02, %a
41+
; CHECK-BLOCK-ILLEGAL: %1 = add i64 %0, %b
42+
; CHECK-BLOCK-ILLEGAL: %2 = shl i64 %1, 32
43+
; CHECK-BLOCK-ILLEGAL: %idxprom = ashr exact i64 %2, 30
44+
; CHECK-BLOCK-ILLEGAL: %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %idxprom
45+
; CHECK-BLOCK-ILLEGAL: store i32 39, i32 addrspace(1)* %arrayidx, align 4
46+
; CHECK-BLOCK-ILLEGAL: %inc = add nuw nsw i64 %i.02, 1
47+
; CHECK-BLOCK-ILLEGAL: %cmp = icmp slt i64 %inc, %n
48+
; CHECK-BLOCK-ILLEGAL: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
49+
; CHECK-ALLOW-ILLEGAL-LABEL: for.body:
50+
; CHECK-ALLOW-ILLEGAL: [[GEP:%.*]] = phi i32 addrspace(1)* [ [[GEP_PHI1]], %for.body.lr.ph ], [ [[GEP_PHI2:%.*]], %for.body ]
51+
; CHECK-ALLOW-ILLEGAL: %i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
52+
; CHECK-ALLOW-ILLEGAL: store i32 39, i32 addrspace(1)* [[GEP]], align 4
53+
; CHECK-ALLOW-ILLEGAL: %inc = add nuw nsw i64 %i.02, 1
54+
; CHECK-ALLOW-ILLEGAL: %cmp = icmp slt i64 %inc, %n
55+
; CHECK-ALLOW-ILLEGAL: [[GEP_PHI2]] = getelementptr i32, i32 addrspace(1)* [[GEP]], i34 [[SHL2]]
56+
; CHECK-ALLOW-ILLEGAL: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
57+
for.body: ; preds = %for.body.lr.ph, %for.body
58+
%i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
59+
%0 = mul i64 %i.02, %a
60+
%1 = add i64 %0, %b
61+
%2 = shl i64 %1, 32
62+
%idxprom = ashr exact i64 %2, 30
63+
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %idxprom
64+
store i32 39, i32 addrspace(1)* %arrayidx, align 4
65+
%inc = add nuw nsw i64 %i.02, 1
66+
%cmp = icmp slt i64 %inc, %n
67+
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
68+
69+
for.cond.for.end_crit_edge: ; preds = %for.body
70+
br label %for.end
71+
72+
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
73+
ret void
74+
}
75+
76+
; Instruction "and" creates SCEV with i7 type:
77+
; (zext i7 {(trunc i64 %b to i7),+,(trunc i64 %a to i7)}<%for.body> to i64)
78+
define spir_kernel void @test_and(i32 addrspace(1)* %p, i64 %n, i64 %a, i64 %b) {
79+
entry:
80+
%cmp1 = icmp slt i64 0, %n
81+
br i1 %cmp1, label %for.body.lr.ph, label %for.end
82+
83+
; CHECK-BLOCK-ILLEGAL-LABEL: for.body.lr.ph:
84+
; CHECK-BLOCK-ILLEGAL: br label %for.body
85+
; CHECK-ALLOW-ILLEGAL-LABEL: for.body.lr.ph:
86+
; CHECK-ALLOW-ILLEGAL: [[TRUNC1:%.*]] = trunc i64 %b to i7
87+
; CHECK-ALLOW-ILLEGAL: [[INDEX:%.*]] = zext i7 [[TRUNC1]] to i64
88+
; CHECK-ALLOW-ILLEGAL: [[GEP_PHI1:%.*]] = getelementptr i32, i32 addrspace(1)* %p, i64 [[INDEX]]
89+
; CHECK-ALLOW-ILLEGAL: [[TRUNC2:%.*]] = trunc i64 %a to i7
90+
; CHECK-ALLOW-ILLEGAL: br label %for.body
91+
for.body.lr.ph: ; preds = %entry
92+
br label %for.body
93+
94+
; CHECK-BLOCK-ILLEGAL-LABEL: for.body:
95+
; CHECK-BLOCK-ILLEGAL: %i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
96+
; CHECK-BLOCK-ILLEGAL: %0 = mul i64 %i.02, %a
97+
; CHECK-BLOCK-ILLEGAL: %1 = add i64 %0, %b
98+
; CHECK-BLOCK-ILLEGAL: %idxprom = and i64 %1, 127
99+
; CHECK-BLOCK-ILLEGAL: %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %idxprom
100+
; CHECK-BLOCK-ILLEGAL: store i32 39, i32 addrspace(1)* %arrayidx, align 4
101+
; CHECK-BLOCK-ILLEGAL: %inc = add nuw nsw i64 %i.02, 1
102+
; CHECK-BLOCK-ILLEGAL: %cmp = icmp slt i64 %inc, %n
103+
; CHECK-BLOCK-ILLEGAL: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
104+
; CHECK-ALLOW-ILLEGAL-LABEL: for.body:
105+
; CHECK-ALLOW-ILLEGAL: [[GEP:%.*]] = phi i32 addrspace(1)* [ [[GEP_PHI1]], %for.body.lr.ph ], [ [[GEP_PHI2:%.*]], %for.body ]
106+
; CHECK-ALLOW-ILLEGAL: %i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
107+
; CHECK-ALLOW-ILLEGAL: store i32 39, i32 addrspace(1)* [[GEP]], align 4
108+
; CHECK-ALLOW-ILLEGAL: %inc = add nuw nsw i64 %i.02, 1
109+
; CHECK-ALLOW-ILLEGAL: %cmp = icmp slt i64 %inc, %n
110+
; CHECK-ALLOW-ILLEGAL: [[GEP_PHI2]] = getelementptr i32, i32 addrspace(1)* [[GEP]], i7 [[TRUNC2]]
111+
; CHECK-ALLOW-ILLEGAL: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
112+
for.body: ; preds = %for.body.lr.ph, %for.body
113+
%i.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
114+
%0 = mul i64 %i.02, %a
115+
%1 = add i64 %0, %b
116+
%idxprom = and i64 %1, 127
117+
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %idxprom
118+
store i32 39, i32 addrspace(1)* %arrayidx, align 4
119+
%inc = add nuw nsw i64 %i.02, 1
120+
%cmp = icmp slt i64 %inc, %n
121+
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
122+
123+
for.cond.for.end_crit_edge: ; preds = %for.body
124+
br label %for.end
125+
126+
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
127+
ret void
128+
}
129+
130+
!igc.functions = !{!0}
131+
132+
!0 = !{void (i32 addrspace(1)*, i64, i64, i64)* @test_shl_ashr, !1}
133+
!1 = !{!2}
134+
!2 = !{!"function_type", i32 0}

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ DECLARE_IGC_REGKEY(bool, EnableOptReportLoadNarrowing, false, "Generate opt repo
290290
DECLARE_IGC_REGKEY(bool, EnableGEPLSR, true, "Enables GEP Loop Strength Reduction pass", false)
291291
DECLARE_IGC_REGKEY(DWORD, GEPLSRThresholdRatio, 100, "Ratio for register pressure threshold in GEP Loop Strength Reduction pass", false)
292292
DECLARE_IGC_REGKEY(bool, EnableGEPLSRToPreheader, true, "Enables reduction to loop's preheader in GEP Loop Strength Reduction pass", false)
293+
DECLARE_IGC_REGKEY(bool, EnableGEPLSRAnyIntBitWidth, false, "Experimental: Enables reduction of SCEV with illegal integers. Requires legalization pass to clear up expanded code.", true)
293294
DECLARE_IGC_REGKEY(DWORD, FPRoundingModeCoalescingMaxDistance, 20, "Max distance in instructions for reordering FP instructions with common rounding mode", false)
294295
DECLARE_IGC_REGKEY(bool, DisableDotAddToDp4aMerge, false, "Disable Dot and Add ops to Dp4a merge optimization.", false)
295296
DECLARE_IGC_REGKEY(bool, DisableLoopSplitWidePHIs, false, "Disable splitting of loop PHI values to eliminate subvector extract operations", false)

0 commit comments

Comments
 (0)