Skip to content

Commit 86842e1

Browse files
jwanggit86Jun Wang
andauthored
[AMDGPU] New clang option for emitting a waitcnt instruction after each memory instruction (#79236)
This patch introduces a new command-line option for clang, namely, amdgpu-precise-mem-op (or precise-memory in the backend). When this option is specified, a waitcnt instruction is generated after each memory load/store instruction. The counter values are always 0, but which counters are involved depends on the memory instruction. --------- Co-authored-by: Jun Wang <[email protected]>
1 parent f27f369 commit 86842e1

File tree

7 files changed

+1686
-0
lines changed

7 files changed

+1686
-0
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4912,6 +4912,9 @@ defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable",
49124912
defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64",
49134913
"Specify wavefront size 64", "Specify wavefront size 32",
49144914
" mode (AMDGPU only)">;
4915+
defm amdgpu_precise_memory_op
4916+
: SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable",
4917+
" precise memory mode (AMDGPU only)">;
49154918

49164919
defm unsafe_fp_atomics : BoolMOption<"unsafe-fp-atomics",
49174920
TargetOpts<"AllowAMDGPUUnsafeFPAtomics">, DefaultFalse,

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
670670
options::OPT_mno_wavefrontsize64, false))
671671
Features.push_back("+wavefrontsize64");
672672

673+
if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op,
674+
options::OPT_mno_amdgpu_precise_memory_op, false))
675+
Features.push_back("+precise-memory");
676+
673677
handleTargetFeaturesGroup(D, Triple, Args, Features,
674678
options::OPT_m_amdgpu_Features_Group);
675679
}

clang/test/Driver/amdgpu-features.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,9 @@
3232

3333
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-cumode %s 2>&1 | FileCheck --check-prefix=NO-CUMODE %s
3434
// NO-CUMODE: "-target-feature" "-cumode"
35+
36+
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mamdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=PREC-MEM %s
37+
// PREC-MEM: "-target-feature" "+precise-memory"
38+
39+
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s
40+
// NO-PREC-MEM-NOT: {{".*precise-memory"}}

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,10 @@ def FeatureCuMode : SubtargetFeature<"cumode",
168168
"Enable CU wavefront execution mode"
169169
>;
170170

171+
def FeaturePreciseMemory
172+
: SubtargetFeature<"precise-memory", "EnablePreciseMemory",
173+
"true", "Enable precise memory mode">;
174+
171175
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
172176
"SGPRInitBug",
173177
"true",

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
8787
bool EnableTgSplit = false;
8888
bool EnableCuMode = false;
8989
bool TrapHandler = false;
90+
bool EnablePreciseMemory = false;
9091

9192
// Used as options.
9293
bool EnableLoadStoreOpt = false;
@@ -599,6 +600,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
599600
return EnableCuMode;
600601
}
601602

603+
bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }
604+
602605
bool hasFlatAddressSpace() const {
603606
return FlatAddressSpace;
604607
}

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2305,6 +2305,14 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
23052305
}
23062306
#endif
23072307

2308+
if (ST->isPreciseMemoryEnabled() && Inst.mayLoadOrStore()) {
2309+
AMDGPU::Waitcnt Wait = WCG->getAllZeroWaitcnt(
2310+
Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst));
2311+
ScoreBrackets.simplifyWaitcnt(Wait);
2312+
Modified |= generateWaitcnt(Wait, std::next(Inst.getIterator()), Block,
2313+
ScoreBrackets, /*OldWaitcntInstr=*/nullptr);
2314+
}
2315+
23082316
LLVM_DEBUG({
23092317
Inst.print(dbgs());
23102318
ScoreBrackets.dump();

0 commit comments

Comments
 (0)