Skip to content

Commit 04bd5b5

Browse files
vangthao95kerbowa
authored andcommitted
[AMDGPU] Fix not rescheduling without clustering
Regions are sometimes skipped which should be rescheduled without memory op clustering. RegionIdx is not incremented when iterating over regions that are flagged to be skipped, causing the index to be incorrect. Thanks to Vang Thao for discovering this bug! Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D85498
1 parent f1d5257 commit 04bd5b5

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,8 +567,10 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
567567
SavedMutations.swap(Mutations);
568568

569569
for (auto Region : Regions) {
570-
if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx])
570+
if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) {
571+
++RegionIdx;
571572
continue;
573+
}
572574

573575
RegionBegin = Region.first;
574576
RegionEnd = Region.second;

llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
; Interleave loads and stores to fit into 9 VGPR limit.
44
; This requires to avoid load/store clustering.
55

6+
; Reschedule the second scheduling region without clustering while
7+
; the first region is skipped.
8+
69
; GCN: global_load_dwordx4
710
; GCN: global_store_dwordx4
811
; GCN: global_load_dwordx4
@@ -12,10 +15,13 @@
1215
; GCN: NumVgprs: {{[0-9]$}}
1316
; GCN: ScratchSize: 0{{$}}
1417

15-
define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1) #1 {
18+
define amdgpu_kernel void @load_store_max_9vgprs(<4 x i32> addrspace(1)* nocapture noalias readonly %arg, <4 x i32> addrspace(1)* nocapture noalias %arg1, i1 %cnd) #1 {
1619
bb:
1720
%id = call i32 @llvm.amdgcn.workitem.id.x()
1821
%base = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i32 %id
22+
br i1 %cnd, label %bb1, label %bb2
23+
24+
bb1:
1925
%tmp = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 1
2026
%tmp2 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp, align 4
2127
%tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %base, i32 3
@@ -27,6 +33,9 @@ bb:
2733
store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp7, align 4
2834
%tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 5
2935
store <4 x i32> %tmp6, <4 x i32> addrspace(1)* %tmp8, align 4
36+
br label %bb2
37+
38+
bb2:
3039
ret void
3140
}
3241

0 commit comments

Comments
 (0)