Skip to content

Commit f0a0168

Browse files
jwanggit86Jun Wang
authored andcommitted
[AMDGPU] New clang option for emitting a waitcnt instruction after each memory instruction (llvm#79236)
This patch introduces a new command-line option for clang, namely, amdgpu-precise-mem-op (or precise-memory in the backend). When this option is specified, a waitcnt instruction is generated after each memory load/store instruction. The counter values are always 0, but which counters are involved depends on the memory instruction. --------- Co-authored-by: Jun Wang <[email protected]> Change-Id: Ieeac771ce0facbdff3c6149945bbabc95d7cb48c
1 parent 8aaa491 commit f0a0168

File tree

7 files changed

+1692
-0
lines changed

7 files changed

+1692
-0
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4865,6 +4865,9 @@ defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable",
48654865
defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64",
48664866
"Specify wavefront size 64", "Specify wavefront size 32",
48674867
" mode (AMDGPU only)">;
4868+
defm amdgpu_precise_memory_op
4869+
: SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable",
4870+
" precise memory mode (AMDGPU only)">;
48684871

48694872
defm unsafe_fp_atomics : BoolOption<"m", "unsafe-fp-atomics",
48704873
TargetOpts<"AllowAMDGPUUnsafeFPAtomics">, DefaultFalse,

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
668668
Features.push_back("-sramecc");
669669
}
670670

671+
if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op,
672+
options::OPT_mno_amdgpu_precise_memory_op, false))
673+
Features.push_back("+precise-memory");
674+
671675
handleTargetFeaturesGroup(D, Triple, Args, Features,
672676
options::OPT_m_amdgpu_Features_Group);
673677
}

clang/test/Driver/amdgpu-features.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,9 @@
3232

3333
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-cumode %s 2>&1 | FileCheck --check-prefix=NO-CUMODE %s
3434
// NO-CUMODE: "-target-feature" "-cumode"
35+
36+
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mamdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=PREC-MEM %s
37+
// PREC-MEM: "-target-feature" "+precise-memory"
38+
39+
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s
40+
// NO-PREC-MEM-NOT: {{".*precise-memory"}}

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@ def FeatureCuMode : SubtargetFeature<"cumode",
167167
"Enable CU wavefront execution mode"
168168
>;
169169

170+
def FeaturePreciseMemory
171+
: SubtargetFeature<"precise-memory", "EnablePreciseMemory",
172+
"true", "Enable precise memory mode">;
173+
170174
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
171175
"SGPRInitBug",
172176
"true",

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
8787
bool EnableTgSplit = false;
8888
bool EnableCuMode = false;
8989
bool TrapHandler = false;
90+
bool EnablePreciseMemory = false;
9091

9192
// Used as options.
9293
bool EnableLoadStoreOpt = false;
@@ -596,6 +597,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
596597
return EnableCuMode;
597598
}
598599

600+
bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }
601+
599602
bool hasFlatAddressSpace() const {
600603
return FlatAddressSpace;
601604
}

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,6 +2326,14 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
23262326
}
23272327
#endif
23282328

2329+
if (ST->isPreciseMemoryEnabled() && Inst.mayLoadOrStore()) {
2330+
AMDGPU::Waitcnt Wait = WCG->getAllZeroWaitcnt(
2331+
Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst));
2332+
ScoreBrackets.simplifyWaitcnt(Wait);
2333+
Modified |= generateWaitcnt(Wait, std::next(Inst.getIterator()), Block,
2334+
ScoreBrackets, /*OldWaitcntInstr=*/nullptr);
2335+
}
2336+
23292337
LLVM_DEBUG({
23302338
Inst.print(dbgs());
23312339
ScoreBrackets.dump();

0 commit comments

Comments
 (0)