Skip to content

Commit 9e28c58

Browse files
mshelegoigcbot
authored andcommitted
Fix depressurizer thresholds calculation
Set threshold based on the number of free registers instead of overall
1 parent 4254dee commit 9e28c58

File tree

2 files changed

+68
-44
lines changed

2 files changed

+68
-44
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXDepressurizer.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -439,15 +439,16 @@ bool GenXDepressurizer::runOnFunctionGroup(FunctionGroup &FG) {
439439
.getTM<GenXTargetMachine>()
440440
.getGenXSubtarget();
441441
vc::KernelMetadata KM(FG.getHead());
442-
// Minimal general register size is 32 bytes and GRF can consist of at least
443-
// 32 registers. Thresholds should be set according to the actual GRF size.
444-
unsigned RegSizeFactor = ST->getGRFByteSize() / 32;
445-
int GRFSize = vc::getGRFSize(BC, ST, KM);
446-
unsigned RegNumFactor = (GRFSize > 0 ? GRFSize : 128) / 32;
447442
// Historically the general register pressure threshold was set to 2560 for
448-
// 128*32 byte GRF case and the flag tolerance threshold was set to 1.5x of it.
449-
GRFThreshold = 640 * RegSizeFactor * RegNumFactor;
450-
FlagGRFTolerance = GRFThreshold * 3 / 2;
443+
// 128*32 byte GRF case, which means that only 48 registers are left for
444+
// allocation. The flag tolerance threshold was set to 3840, which means 120
445+
// registers. In case of different GRF size these thresholds should be
446+
// calculated accordingly.
447+
unsigned RegSize = ST->getGRFByteSize();
448+
int GRFSize = vc::getGRFSize(BC, ST, KM);
449+
unsigned NumRegs = (GRFSize > 0 ? GRFSize : 128);
450+
GRFThreshold = NumRegs > 48 ? (NumRegs - 48) * RegSize : 0;
451+
FlagGRFTolerance = 120 * RegSize;
451452
// Process functions in the function group in reverse order, so we know the
452453
// max pressure in a subroutine when we see a call to it.
453454
for (auto fgi = FG.rbegin(), fge = FG.rend(); fgi != fge; ++fgi) {

IGC/VectorCompiler/test/Depressurizer/grf_size.ll

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
;
77
;============================ end_copyright_notice =============================
88
;
9-
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPG -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPG
10-
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPC -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPC
11-
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Xe2 -vc-grf-size=64 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-Xe2-64
12-
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Xe2 -vc-grf-size=128 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-Xe2-128
13-
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=Xe2 -vc-grf-size=256 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-Xe2-256
9+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPG -vc-grf-size=32 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPG-32
10+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPG -vc-grf-size=64 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPG-64
11+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPG -vc-grf-size=128 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPG-128
12+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPG -vc-grf-size=256 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPG-256
13+
; RUN: %opt %use_old_pass_manager% -GenXModule -GenXLiveRangesWrapper -GenXDepressurizerWrapper -march=genx64 -mtriple=spir64-unknown-unknown -mcpu=XeHPC -vc-auto-large-grf -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XeHPC
1414

1515
; CHECK-LABEL: @test1
1616
define dllexport void @test1(<16 x half> %arg) #0 {
@@ -23,11 +23,11 @@ loop:
2323
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
2424
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
2525
%i.next = add i32 %i, 1
26-
; CHECK-XeHPG-NOT: fpext
26+
; CHECK-XeHPG-32: fpext
27+
; CHECK-XeHPG-64-NOT: fpext
28+
; CHECK-XeHPG-128-NOT: fpext
29+
; CHECK-XeHPG-256-NOT: fpext
2730
; CHECK-XeHPC-NOT: fpext
28-
; CHECK-Xe2-64-NOT: fpext
29-
; CHECK-Xe2-128-NOT: fpext
30-
; CHECK-Xe2-256-NOT: fpext
3131
%res.next = fadd <16 x float> %res, %fp
3232
%cmp = icmp ult i32 %i.next, 100
3333
br i1 %cmp, label %loop, label %end
@@ -37,7 +37,7 @@ end:
3737
}
3838

3939
; CHECK-LABEL: @test2
40-
define dllexport <1024 x i32> @test2(<1024 x i32> %pressure, <16 x half> %arg) #0 {
40+
define dllexport <512 x i8> @test2(<512 x i8> %pressure, <16 x half> %arg) #0 {
4141
entry:
4242
%fp = fpext <16 x half> %arg to <16 x float>
4343
; CHECK: br label %loop
@@ -47,21 +47,21 @@ loop:
4747
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
4848
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
4949
%i.next = add i32 %i, 1
50-
; CHECK-XeHPG: fpext
50+
; CHECK-XeHPG-32: fpext
51+
; CHECK-XeHPG-64: fpext
52+
; CHECK-XeHPG-128-NOT: fpext
53+
; CHECK-XeHPG-256-NOT: fpext
5154
; CHECK-XeHPC-NOT: fpext
52-
; CHECK-Xe2-64: fpext
53-
; CHECK-Xe2-128-NOT: fpext
54-
; CHECK-Xe2-256-NOT: fpext
5555
%res.next = fadd <16 x float> %res, %fp
5656
%cmp = icmp ult i32 %i.next, 100
5757
br i1 %cmp, label %loop, label %end
5858

5959
end:
60-
ret <1024 x i32> %pressure
60+
ret <512 x i8> %pressure
6161
}
6262

6363
; CHECK-LABEL: @test3
64-
define dllexport <2048 x i32> @test3(<2048 x i32> %pressure, <16 x half> %arg) #0 {
64+
define dllexport <2560 x i8> @test3(<2560 x i8> %pressure, <16 x half> %arg) #0 {
6565
entry:
6666
%fp = fpext <16 x half> %arg to <16 x float>
6767
; CHECK: br label %loop
@@ -71,21 +71,21 @@ loop:
7171
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
7272
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
7373
%i.next = add i32 %i, 1
74-
; CHECK-XeHPG: fpext
75-
; CHECK-XeHPC: fpext
76-
; CHECK-Xe2-64: fpext
77-
; CHECK-Xe2-128: fpext
78-
; CHECK-Xe2-256-NOT: fpext
74+
; CHECK-XeHPG-32: fpext
75+
; CHECK-XeHPG-64: fpext
76+
; CHECK-XeHPG-128: fpext
77+
; CHECK-XeHPG-256-NOT: fpext
78+
; CHECK-XeHPC-NOT: fpext
7979
%res.next = fadd <16 x float> %res, %fp
8080
%cmp = icmp ult i32 %i.next, 100
8181
br i1 %cmp, label %loop, label %end
8282

8383
end:
84-
ret <2048 x i32> %pressure
84+
ret <2560 x i8> %pressure
8585
}
8686

8787
; CHECK-LABEL: @test4
88-
define dllexport <4096 x i32> @test4(<4096 x i32> %pressure, <16 x half> %arg) #0 {
88+
define dllexport <5120 x i8> @test4(<5120 x i8> %pressure, <16 x half> %arg) #0 {
8989
entry:
9090
%fp = fpext <16 x half> %arg to <16 x float>
9191
; CHECK: br label %loop
@@ -95,23 +95,47 @@ loop:
9595
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
9696
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
9797
%i.next = add i32 %i, 1
98-
; CHECK-XeHPG: fpext
98+
; CHECK-XeHPG-32: fpext
99+
; CHECK-XeHPG-64: fpext
100+
; CHECK-XeHPG-128: fpext
101+
; CHECK-XeHPG-256-NOT: fpext
99102
; CHECK-XeHPC: fpext
100-
; CHECK-Xe2-64: fpext
101-
; CHECK-Xe2-128: fpext
102-
; CHECK-Xe2-256: fpext
103103
%res.next = fadd <16 x float> %res, %fp
104104
%cmp = icmp ult i32 %i.next, 100
105105
br i1 %cmp, label %loop, label %end
106106

107107
end:
108-
ret <4096 x i32> %pressure
108+
ret <5120 x i8> %pressure
109+
}
110+
111+
; CHECK-LABEL: @test5
112+
define dllexport <6656 x i8> @test5(<6656 x i8> %pressure, <16 x half> %arg) #0 {
113+
entry:
114+
%fp = fpext <16 x half> %arg to <16 x float>
115+
; CHECK: br label %loop
116+
br label %loop
117+
118+
loop:
119+
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
120+
%res = phi <16 x float> [ zeroinitializer, %entry ], [ %res.next, %loop ]
121+
%i.next = add i32 %i, 1
122+
; CHECK-XeHPG-32: fpext
123+
; CHECK-XeHPG-64: fpext
124+
; CHECK-XeHPG-128: fpext
125+
; CHECK-XeHPG-256: fpext
126+
; CHECK-XeHPC: fpext
127+
%res.next = fadd <16 x float> %res, %fp
128+
%cmp = icmp ult i32 %i.next, 100
129+
br i1 %cmp, label %loop, label %end
130+
131+
end:
132+
ret <6656 x i8> %pressure
109133
}
110134

111135
; COM: Register pressure is beyond threshold for all platforms
112136
; COM: Check that instructions smaller than a one register are not moved into the loop
113-
; CHECK-LABEL: @test5
114-
define dllexport <4096 x i32> @test5(<4096 x i32> %pressure, <4 x half> %arg.4, <8 x half> %arg.8) #0 {
137+
; CHECK-LABEL: @test6
138+
define dllexport <6656 x i8> @test6(<6656 x i8> %pressure, <4 x half> %arg.4, <8 x half> %arg.8) #0 {
115139
entry:
116140
%fp.4 = fpext <4 x half> %arg.4 to <4 x float>
117141
%fp.8 = fpext <8 x half> %arg.8 to <8 x float>
@@ -122,20 +146,19 @@ loop:
122146
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
123147
%res.4 = phi <4 x float> [ zeroinitializer, %entry ], [ %res.next.4, %loop ]
124148
%res.8 = phi <8 x float> [ zeroinitializer, %entry ], [ %res.next.8, %loop ]
125-
; CHECK-XeHPG-NOT: fpext <4 x half>
126-
; CHECK-XeHPG: fpext <8 x half>
149+
; CHECK-XeHPG-32: fpext
150+
; CHECK-XeHPG-64: fpext
151+
; CHECK-XeHPG-128: fpext
152+
; CHECK-XeHPG-256: fpext
127153
; CHECK-XeHPC-NOT: fpext
128-
; CHECK-Xe2-64-NOT: fpext
129-
; CHECK-Xe2-128-NOT: fpext
130-
; CHECK-Xe2-256-NOT: fpext
131154
%res.next.4 = fadd <4 x float> %res.4, %fp.4
132155
%res.next.8 = fadd <8 x float> %res.8, %fp.8
133156
%i.next = add i32 %i, 1
134157
%cmp = icmp ult i32 %i.next, 100
135158
br i1 %cmp, label %loop, label %end
136159

137160
end:
138-
ret <4096 x i32> %pressure
161+
ret <6656 x i8> %pressure
139162
}
140163

141164
attributes #0 = { "CMGenxMain" }

0 commit comments

Comments
 (0)