Skip to content

Commit 7f97ac9

Browse files
committed
Revert "[AMDGPU] Omit unnecessary waitcnt before barriers"
This reverts commit 8d0c34f.
1 parent bcdb11e commit 7f97ac9

File tree

6 files changed

+17
-134
lines changed

6 files changed

+17
-134
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -737,12 +737,6 @@ def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
737737
"Hardware automatically inserts waitcnt before barrier"
738738
>;
739739

740-
def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier",
741-
"BackOffBarrier",
742-
"true",
743-
"Hardware supports backing off s_barrier if an exception occurs"
744-
>;
745-
746740
def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
747741
"HasTrigReducedRange",
748742
"true",
@@ -1031,8 +1025,7 @@ def FeatureISAVersion9_0_A : FeatureSet<
10311025
FeatureMadMacF32Insts,
10321026
FeatureSupportsSRAMECC,
10331027
FeaturePackedTID,
1034-
FullRate64Ops,
1035-
FeatureBackOffBarrier]>;
1028+
FullRate64Ops]>;
10361029

10371030
def FeatureISAVersion9_0_C : FeatureSet<
10381031
[FeatureGFX9,
@@ -1066,8 +1059,7 @@ def FeatureISAVersion9_4_0 : FeatureSet<
10661059
FeatureSupportsSRAMECC,
10671060
FeaturePackedTID,
10681061
FeatureArchitectedFlatScratch,
1069-
FullRate64Ops,
1070-
FeatureBackOffBarrier]>;
1062+
FullRate64Ops]>;
10711063

10721064
// TODO: Organize more features into groups.
10731065
def FeatureGroup {
@@ -1102,8 +1094,7 @@ def FeatureISAVersion10_1_0 : FeatureSet<
11021094
FeatureMadMacF32Insts,
11031095
FeatureDsSrc2Insts,
11041096
FeatureLdsMisalignedBug,
1105-
FeatureSupportsXNACK,
1106-
FeatureBackOffBarrier])>;
1097+
FeatureSupportsXNACK])>;
11071098

11081099
def FeatureISAVersion10_1_1 : FeatureSet<
11091100
!listconcat(FeatureGroup.GFX10_1_Bugs,
@@ -1125,8 +1116,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
11251116
FeatureMadMacF32Insts,
11261117
FeatureDsSrc2Insts,
11271118
FeatureLdsMisalignedBug,
1128-
FeatureSupportsXNACK,
1129-
FeatureBackOffBarrier])>;
1119+
FeatureSupportsXNACK])>;
11301120

11311121
def FeatureISAVersion10_1_2 : FeatureSet<
11321122
!listconcat(FeatureGroup.GFX10_1_Bugs,
@@ -1148,8 +1138,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
11481138
FeatureMadMacF32Insts,
11491139
FeatureDsSrc2Insts,
11501140
FeatureLdsMisalignedBug,
1151-
FeatureSupportsXNACK,
1152-
FeatureBackOffBarrier])>;
1141+
FeatureSupportsXNACK])>;
11531142

11541143
def FeatureISAVersion10_1_3 : FeatureSet<
11551144
!listconcat(FeatureGroup.GFX10_1_Bugs,
@@ -1167,8 +1156,7 @@ def FeatureISAVersion10_1_3 : FeatureSet<
11671156
FeatureMadMacF32Insts,
11681157
FeatureDsSrc2Insts,
11691158
FeatureLdsMisalignedBug,
1170-
FeatureSupportsXNACK,
1171-
FeatureBackOffBarrier])>;
1159+
FeatureSupportsXNACK])>;
11721160

11731161
def FeatureISAVersion10_3_0 : FeatureSet<
11741162
[FeatureGFX10,
@@ -1185,8 +1173,7 @@ def FeatureISAVersion10_3_0 : FeatureSet<
11851173
FeatureNSAEncoding,
11861174
FeatureNSAMaxSize13,
11871175
FeatureWavefrontSize32,
1188-
FeatureShaderCyclesRegister,
1189-
FeatureBackOffBarrier]>;
1176+
FeatureShaderCyclesRegister]>;
11901177

11911178
//===----------------------------------------------------------------------===//
11921179

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
7272
// Dynamically set bits that enable features.
7373
bool FlatForGlobal = false;
7474
bool AutoWaitcntBeforeBarrier = false;
75-
bool BackOffBarrier = false;
7675
bool UnalignedScratchAccess = false;
7776
bool UnalignedAccessMode = false;
7877
bool HasApertureRegs = false;
@@ -494,12 +493,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
494493
return AutoWaitcntBeforeBarrier;
495494
}
496495

497-
/// \returns true if the target supports backing off of s_barrier instructions
498-
/// when an exception is raised.
499-
bool supportsBackOffBarrier() const {
500-
return BackOffBarrier;
501-
}
502-
503496
bool hasUnalignedBufferAccess() const {
504497
return UnalignedBufferAccess;
505498
}

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,12 +1135,12 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
11351135
}
11361136
}
11371137

1138-
// The subtarget may have an implicit S_WAITCNT 0 before barriers. If it does
1139-
// not, we need to ensure the subtarget is capable of backing off barrier
1140-
// instructions in case there are any outstanding memory operations that may
1141-
// cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here.
1138+
// Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
1139+
// occurs before the instruction. Doing it here prevents any additional
1140+
// S_WAITCNTs from being emitted if the instruction was marked as
1141+
// requiring a WAITCNT beforehand.
11421142
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
1143-
!ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
1143+
!ST->hasAutoWaitcntBeforeBarrier()) {
11441144
Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
11451145
}
11461146

llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll

Lines changed: 0 additions & 97 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ body: |
3535
; GFX10: S_WAITCNT 0
3636
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
3737
; GFX10: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
38-
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 1
38+
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
3939
; GFX10: S_BARRIER
4040
; GFX10: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
4141
; GFX10: S_WAITCNT 112
@@ -112,7 +112,7 @@ body: |
112112
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
113113
; GFX10: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
114114
; GFX10: S_WAITCNT 0
115-
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 1
115+
; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0
116116
; GFX10: S_BARRIER
117117
; GFX10: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
118118
; GFX10: S_WAITCNT 112

llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx802 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s
22
; RUN: llc -march=amdgcn -mcpu=gfx900 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9_10,GFX8_9 %s
3-
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-back-off-barrier -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s
44

55
; GCN-LABEL: barrier_vmcnt_global:
66
; GFX8: flat_load_dword
@@ -42,7 +42,7 @@ bb:
4242
%tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4
4343
store i32 0, i32 addrspace(1)* %tmp5, align 4
4444
fence syncscope("singlethread") release
45-
tail call void @llvm.amdgcn.s.barrier()
45+
tail call void @llvm.amdgcn.s.barrier() #3
4646
fence syncscope("singlethread") acquire
4747
%tmp6 = add nuw nsw i64 %tmp2, 4294967296
4848
%tmp7 = lshr exact i64 %tmp6, 32
@@ -116,7 +116,7 @@ bb:
116116
%tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4
117117
store i32 0, i32* %tmp5, align 4
118118
fence syncscope("singlethread") release
119-
tail call void @llvm.amdgcn.s.barrier()
119+
tail call void @llvm.amdgcn.s.barrier() #3
120120
fence syncscope("singlethread") acquire
121121
%tmp6 = add nuw nsw i64 %tmp2, 4294967296
122122
%tmp7 = lshr exact i64 %tmp6, 32

0 commit comments

Comments
 (0)