Skip to content

Commit c875e79

Browse files
jgu222igcbot
authored andcommitted
Simplify GEP
For GEP pattern like the folloiwng: a0 = v g0 = gep float, base, a0 ... a1 = v + 1 g1 = gep float, base, a1 ... a2 = v + 2 g2 = gep float, base, a2 Change it to the following to avoid a0 = v g0 = gep float, base, a0 ... g1 = gep float, g0, 1 ... g2 = gep float, g0, 2 As the latter will result in less address computation code.
1 parent a94b784 commit c875e79

File tree

3 files changed

+175
-8
lines changed

3 files changed

+175
-8
lines changed

IGC/Compiler/CISACodeGen/GenIRLowering.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -445,12 +445,17 @@ bool GEPLowering::simplifyGEP(BasicBlock &BB) const {
445445
Idx = ZExt->getOperand(0);
446446
} else if (auto *SExt = dyn_cast<SExtInst>(Idx)) {
447447
Idx = SExt->getOperand(0);
448-
auto *Op = dyn_cast<OverflowingBinaryOperator>(Idx);
449-
if (!Op || !Op->hasNoSignedWrap())
448+
Operator* Opr = dyn_cast<Operator>(Idx);
449+
if (Opr && Opr->getOpcode() == BinaryOperator::BinaryOps::SDiv) {
450+
// Skip if it is SDiv. Special check is needed as
451+
// OverflowingBinaryOperator does not include SDiv
452+
continue;
453+
}
454+
auto* Op = dyn_cast<OverflowingBinaryOperator>(Idx);
455+
if (Op && !Op->hasNoSignedWrap())
450456
continue;
451-
} else {
452-
continue;
453457
}
458+
454459
const SCEV *E = SE->getSCEV(Idx);
455460
// Skip if the offset to the base is already a constant.
456461
if (isa<SCEVConstant>(E))
@@ -461,18 +466,31 @@ bool GEPLowering::simplifyGEP(BasicBlock &BB) const {
461466
auto EI = Exprs.begin();
462467
auto EE = Exprs.end();
463468
const SCEV *Offset = nullptr;
464-
unsigned MinDiff = UINT_MAX;
469+
constexpr unsigned DIFF_SIZE_THRESHOLD = 3;
470+
unsigned MinDiff = DIFF_SIZE_THRESHOLD;
465471
GetElementPtrInst *BaseWithMinDiff = nullptr;
466472
for (/*EMPTY*/; EI != EE; ++EI) {
467473
// Skip if the result types do not match.
468474
if (EI->GEP->getType() != GEP->getType() ||
469475
E->getType() != EI->Idx->getType())
470476
continue;
477+
471478
auto *Diff = SE->getMinusSCEV(E, EI->Idx);
472-
if (Diff->getExpressionSize() < 4 &&
473-
Diff->getExpressionSize() < MinDiff) {
479+
unsigned exprSize = Diff->getExpressionSize();
480+
if (exprSize <= MinDiff) {
481+
// For the same expr size, keep the first one as its base
482+
// except that the first one isn't constant and this one
483+
// is. In this case, this constant is selected (favor contant)
484+
if (exprSize == MinDiff && !(!Offset || isa<SCEVConstant>(Diff)))
485+
continue;
486+
474487
BaseWithMinDiff = EI->GEP;
475488
Offset = Diff;
489+
MinDiff = exprSize;
490+
491+
// If it is constant, it is the best and we're done.
492+
if (isa<SCEVConstant>(Diff))
493+
break;
476494
}
477495
}
478496
// Not found, add this GEP as a potential base expr.
@@ -525,9 +543,13 @@ bool GEPLowering::runOnFunction(Function& F) {
525543

526544
bool Changed = false;
527545

528-
if (IGC_IS_FLAG_ENABLED(EnableGEPSimplification)) {
546+
if (IGC_IS_FLAG_ENABLED(EnableGEPSimplification))
547+
{
529548
for (auto &BB : F)
530549
Changed |= simplifyGEP(BB);
550+
551+
if (IGC_IS_FLAG_ENABLED(TestGEPSimplification))
552+
return Changed;
531553
}
532554

533555
for (auto& BB : F) {
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2022 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; ------------------------------------------------
10+
;
11+
; REQUIRES: regkeys
12+
;
13+
; RUN: igc_opt -platformpvc --igc-gep-lowering -regkey=EnableGEPSimplification=1,TestGEPSimplification=1 -S %s | FileCheck %s
14+
; ------------------------------------------------
15+
; GEPLowering/GEP simplification : testing GEP strength reduction
16+
; ------------------------------------------------
17+
18+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
19+
target triple = "spir64-unknown-unknown"
20+
21+
;
22+
; CHECK-LABEL: define spir_kernel void @test_gep
23+
;
24+
25+
; Function Attrs: convergent nounwind
26+
define spir_kernel void @test_gep(i32 addrspace(1)* %dst, i32 addrspace(1)* %src, i64 %inc0, i64 %inc1, i64 %Offset64, i32 %Offset32) #0 {
27+
;
28+
; case 1 : gep's id is sext/zext
29+
;
30+
; CHECK-LABEL: case1:
31+
; CHECK: [[C1T0:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %{{.*}}
32+
; CHECK: {{.*}} = getelementptr inbounds i32, i32 addrspace(1)* [[C1T0]], i64 4
33+
; CHECK: {{.*}} = getelementptr inbounds i32, i32 addrspace(1)* [[C1T0]], i64 8
34+
; CHECK: {{.*}} = getelementptr inbounds i32, i32 addrspace(1)* [[C1T0]], i64 12
35+
;
36+
case1:
37+
%simdLaneId16 = call i16 @llvm.genx.GenISA.simdLaneId()
38+
%simdLaneId = zext i16 %simdLaneId16 to i32
39+
%idbase1 = add nsw i32 %Offset32, %simdLaneId
40+
%id1.1 = zext i32 %idbase1 to i64
41+
%addr1.1 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id1.1
42+
%res1.0 = load i32, i32 addrspace(1)* %addr1.1, align 4
43+
%add11.1 = add nsw i32 %idbase1, 4
44+
%id1.2 = zext i32 %add11.1 to i64
45+
%addr1.2 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id1.2
46+
%res1.1 = load i32, i32 addrspace(1)* %addr1.2, align 4
47+
%sum1.0 = add nsw i32 %res1.0, %res1.1
48+
%add11.2 = add nsw i32 %idbase1, 8
49+
%id1.3 = zext i32 %add11.2 to i64
50+
%addr1.3 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id1.3
51+
%res1.2 = load i32, i32 addrspace(1)* %addr1.3, align 4
52+
%sum1.1 = add nsw i32 %sum1.0, %res1.2
53+
%add11.3 = add nsw i32 %idbase1, 12
54+
%id1.4 = zext i32 %add11.3 to i64
55+
%addr1.4 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id1.4
56+
%res1.3 = load i32, i32 addrspace(1)* %addr1.4, align 4
57+
%sum1.2 = add nsw i32 %sum1.1, %res1.3
58+
%id1.5 = sext i32 %idbase1 to i64
59+
%addr1.5 = getelementptr inbounds i32, i32 addrspace(1)* %dst, i64 %id1.5
60+
store i32 %sum1.2, i32 addrspace(1)* %addr1.5, align 4
61+
br label %case2
62+
63+
;
64+
; case 2 : gep's id is add/sub
65+
;
66+
; CHECK-LABEL: case2:
67+
; CHECK: [[C2T0:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %{{.*}}
68+
; CHECK: [[C2T1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C2T0]], i64 10
69+
; CHECK: [[C2T2:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C2T0]], i64 20
70+
; CHECK: [[C2T3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C2T0]], i64 30
71+
;
72+
case2:
73+
%simdLaneId64 = zext i16 %simdLaneId16 to i64
74+
%idbase2 = add i64 %Offset64, %simdLaneId64
75+
%addr2 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %idbase2
76+
%res2.0 = load i32, i32 addrspace(1)* %addr2, align 4
77+
%id2.1 = add nsw i64 %idbase2, 10
78+
%addr2.1 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id2.1
79+
%res2.1 = load i32, i32 addrspace(1)* %addr2.1, align 4
80+
%sum2.0 = add nsw i32 %res2.0, %res2.1
81+
%id2.2 = add nsw i64 %idbase2, 20
82+
%addr2.2 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id2.2
83+
%res2.2 = load i32, i32 addrspace(1)* %addr2.2, align 4
84+
%sum2.1 = add nsw i32 %sum2.0, %res2.2
85+
%id2.3 = add nsw i64 %idbase2, 30
86+
%addr2.3 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id2.3
87+
%res2.3 = load i32, i32 addrspace(1)* %addr2.3, align 4
88+
%sum2.2 = add nsw i32 %sum2.1, %res2.3
89+
%addr2.4 = getelementptr inbounds i32, i32 addrspace(1)* %dst, i64 %idbase2
90+
store i32 %sum2.2, i32 addrspace(1)* %addr2.4, align 4
91+
br label %case3
92+
93+
;
94+
; case 3 : address inc b/w two GEPs is a variable (BB-level invariant), not a constant
95+
;
96+
; CHECK-LABEL: case3:
97+
; CHECK: [[C3T0:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %{{.*}}
98+
; CHECK: [[C3T1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C3T0]], i64 %inc0
99+
;
100+
; // to match add [nsw] i64 %inc1, 1. It is the 2nd add from here
101+
;
102+
; CHECK: %sum3.0 = add
103+
; CHECK: [[C3T2:%.*]] = add
104+
; CHECK-SAME: i64 %inc1, 1
105+
;
106+
; CHECK: [[C3T3:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C3T0]], i64 [[C3T2]]
107+
; CHECK: [[C3T4:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[C3T0]], i64 128
108+
;
109+
case3:
110+
%addr3 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %idbase2
111+
%res3.0 = load i32, i32 addrspace(1)* %addr3, align 4
112+
%id3.1 = add nsw i64 %idbase2, %inc0
113+
%addr3.1 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id3.1
114+
%res3.1 = load i32, i32 addrspace(1)* %addr3.1, align 4
115+
%sum3.0 = add nsw i32 %res3.0, %res3.1
116+
%incinc1 = add nsw i64 %inc1, 1
117+
%id3.2 = add nsw i64 %idbase2, %incinc1
118+
%addr3.2 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id3.2
119+
%res3.2 = load i32, i32 addrspace(1)* %addr3.2, align 4
120+
%sum3.1 = add nsw i32 %sum3.0, %res3.2
121+
%id3.3 = add nsw i64 %idbase2, 128
122+
%addr3.3 = getelementptr inbounds i32, i32 addrspace(1)* %src, i64 %id3.3
123+
%res3.3 = load i32, i32 addrspace(1)* %addr3.3, align 4
124+
%sum3.2 = add nsw i32 %sum3.1, %res3.3
125+
%addr3.4 = getelementptr inbounds i32, i32 addrspace(1)* %dst, i64 %idbase2
126+
store i32 %sum3.2, i32 addrspace(1)* %addr3.4, align 4
127+
;
128+
; CHECK: ret void
129+
;
130+
ret void
131+
}
132+
133+
; Function Attrs: nounwind readnone
134+
declare i16 @llvm.genx.GenISA.simdLaneId() #1
135+
136+
attributes #0 = { convergent nounwind "less-precise-fpmad"="true" }
137+
attributes #1 = { nounwind readnone }
138+
139+
!igc.functions = !{!0}
140+
141+
!0 = !{void (i32 addrspace(1)*, i32 addrspace(1)*, i64, i64, i64, i32)* @test_gep, !1}
142+
!1 = !{!2, !3}
143+
!2 = !{!"function_type", i32 0}
144+
!3 = !{!"implicit_arg_desc"}

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ DECLARE_IGC_REGKEY(bool, EnableLSC, false, "Enables the new dataport encoding fo
541541
DECLARE_IGC_REGKEY(bool, ForceNoLSC, false, "Disables the new dataport encoding for LSC messages.", true)
542542
DECLARE_IGC_REGKEY(bool, EnableMadLoopSlice, true, "Enables the slicing of mad loops.", true)
543543
DECLARE_IGC_REGKEY(bool, EnableGEPSimplification, true, "Enable GEP simplification", true)
544+
DECLARE_IGC_REGKEY(bool, TestGEPSimplification, false, "[Test] Testing GEP simplification without actually lowering GEP. Used in lit test", false)
544545
DECLARE_IGC_REGKEY(bool, DisableSystemMemoryCachingInGPUForConstantBuffers, false, "Disables caching system memory in GPU for loads from constant buffers", false)
545546
DECLARE_IGC_REGKEY(bool, EnableTileYForExperiments, false, "Enable TileY heuristics for experiments", false)
546547
DECLARE_IGC_REGKEY(bool, EnableDG2LSCSIMD8WA, true, "Enables WA for DG2 LSC simd8 d32-v8/d64-v3/d64-v4. [temp, should be replaced with WA id", false)

0 commit comments

Comments
 (0)