Skip to content

Commit 3c1273d

Browse files
author
Tim Corringham
committed
[AMDGPU] Add amdgpu specific loop threshold metadata
Add new loop metadata amdgpu.loop.unroll.threshold to allow the initial AMDGPU specific unroll threshold value to be specified on a loop by loop basis. The intention is to be able to to allow more nuanced hints, e.g. specifying a low threshold value to indicate that a loop may be unrolled if cheap enough rather than using the all or nothing llvm.loop.unroll.disable metadata. Differential Revision: https://reviews.llvm.org/D84779
1 parent af3c51e commit 3c1273d

File tree

2 files changed

+133
-0
lines changed

2 files changed

+133
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,26 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
117117
const unsigned MaxAlloca = (256 - 16) * 4;
118118
unsigned ThresholdPrivate = UnrollThresholdPrivate;
119119
unsigned ThresholdLocal = UnrollThresholdLocal;
120+
121+
// If this loop has the amdgpu.loop.unroll.threshold metadata we will use the
122+
// provided threshold value as the default for Threshold
123+
if (MDNode *LoopUnrollThreshold =
124+
findOptionMDForLoop(L, "amdgpu.loop.unroll.threshold")) {
125+
if (LoopUnrollThreshold->getNumOperands() == 2) {
126+
ConstantInt *MetaThresholdValue = mdconst::extract_or_null<ConstantInt>(
127+
LoopUnrollThreshold->getOperand(1));
128+
if (MetaThresholdValue) {
129+
// We will also use the supplied value for PartialThreshold for now.
130+
// We may introduce additional metadata if it becomes necessary in the
131+
// future.
132+
UP.Threshold = MetaThresholdValue->getSExtValue();
133+
UP.PartialThreshold = UP.Threshold;
134+
ThresholdPrivate = std::min(ThresholdPrivate, UP.Threshold);
135+
ThresholdLocal = std::min(ThresholdLocal, UP.Threshold);
136+
}
137+
}
138+
}
139+
120140
unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
121141
for (const BasicBlock *BB : L->getBlocks()) {
122142
const DataLayout &DL = BB->getModule()->getDataLayout();
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
; RUN: opt < %s -S -mtriple=amdgcn-- -loop-unroll | FileCheck %s
2+
3+
; Check the handling of amdgpu.loop.unroll.threshold metadata which can be used to
4+
; set the default threshold for a loop. This metadata overrides both the AMDGPU
5+
; default, and any value specified by the amdgpu-unroll-threshold function attribute
6+
; (which sets a threshold for all loops in the function).
7+
8+
; Check that the loop in unroll_default is not fully unrolled using the default
9+
; unroll threshold
10+
; CHECK-LABEL: @unroll_default
11+
; CHECK: entry:
12+
; CHECK: br i1 %cmp
13+
; CHECK: ret void
14+
15+
@in = internal unnamed_addr global i32* null, align 8
16+
@out = internal unnamed_addr global i32* null, align 8
17+
18+
define void @unroll_default() {
19+
entry:
20+
br label %do.body
21+
22+
do.body: ; preds = %entry
23+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
24+
%v1 = load i64, i64* bitcast (i32** @in to i64*), align 8
25+
store i64 %v1, i64* bitcast (i32** @out to i64*), align 8
26+
%inc = add nsw i32 %i.0, 1
27+
%cmp = icmp slt i32 %inc, 100
28+
br i1 %cmp, label %do.body, label %do.end
29+
30+
do.end: ; preds = %do.body
31+
ret void
32+
}
33+
34+
; Check that the same loop in unroll_full is fully unrolled when the default
35+
; unroll threshold is increased by use of the amdgpu.loop.unroll.threshold metadata
36+
; CHECK-LABEL: @unroll_full
37+
; CHECK: entry:
38+
; CHECK-NOT: br i1 %cmp
39+
; CHECK: ret void
40+
41+
define void @unroll_full() {
42+
entry:
43+
br label %do.body
44+
45+
do.body: ; preds = %entry
46+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
47+
%v1 = load i64, i64* bitcast (i32** @in to i64*), align 8
48+
store i64 %v1, i64* bitcast (i32** @out to i64*), align 8
49+
%inc = add nsw i32 %i.0, 1
50+
%cmp = icmp slt i32 %inc, 100
51+
br i1 %cmp, label %do.body, label %do.end, !llvm.loop !1
52+
53+
do.end: ; preds = %do.body
54+
ret void
55+
}
56+
57+
; Check that the same loop in override_no_unroll is not unrolled when a high default
58+
; unroll threshold specified using the amdgpu-unroll-threshold function attribute
59+
; is overridden by a low threshold using the amdgpu.loop.unroll.threshold metadata
60+
61+
; CHECK-LABEL: @override_no_unroll
62+
; CHECK: entry:
63+
; CHECK: br i1 %cmp
64+
; CHECK: ret void
65+
66+
define void @override_no_unroll() #0 {
67+
entry:
68+
br label %do.body
69+
70+
do.body: ; preds = %entry
71+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
72+
%v1 = load i64, i64* bitcast (i32** @in to i64*), align 8
73+
store i64 %v1, i64* bitcast (i32** @out to i64*), align 8
74+
%inc = add nsw i32 %i.0, 1
75+
%cmp = icmp slt i32 %inc, 100
76+
br i1 %cmp, label %do.body, label %do.end, !llvm.loop !3
77+
78+
do.end: ; preds = %do.body
79+
ret void
80+
}
81+
82+
; Check that the same loop in override_unroll is fully unrolled when a low default
83+
; unroll threshold specified using the amdgpu-unroll-threshold function attribute
84+
; is overridden by a high threshold using the amdgpu.loop.unroll.threshold metadata
85+
86+
; CHECK-LABEL: @override_unroll
87+
; CHECK: entry:
88+
; CHECK-NOT: br i1 %cmp
89+
; CHECK: ret void
90+
91+
define void @override_unroll() #1 {
92+
entry:
93+
br label %do.body
94+
95+
do.body: ; preds = %entry
96+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ]
97+
%v1 = load i64, i64* bitcast (i32** @in to i64*), align 8
98+
store i64 %v1, i64* bitcast (i32** @out to i64*), align 8
99+
%inc = add nsw i32 %i.0, 1
100+
%cmp = icmp slt i32 %inc, 100
101+
br i1 %cmp, label %do.body, label %do.end, !llvm.loop !1
102+
103+
do.end: ; preds = %do.body
104+
ret void
105+
}
106+
107+
attributes #0 = { "amdgpu-unroll-threshold"="1000" }
108+
attributes #1 = { "amdgpu-unroll-threshold"="100" }
109+
110+
!1 = !{!1, !2}
111+
!2 = !{!"amdgpu.loop.unroll.threshold", i32 1000}
112+
!3 = !{!3, !4}
113+
!4 = !{!"amdgpu.loop.unroll.threshold", i32 100}

0 commit comments

Comments
 (0)