Skip to content

Commit 84610a8

Browse files
committed
AMDGPU: Add AMDGPUSubtarget::getEUsPerCU()
We will use this for more accurate occupancy computations. Note that IsaInfo takes WGP mode vs. CU mode into account on gfx10+. Differential Revision: https://reviews.llvm.org/D139467
1 parent 20f895c commit 84610a8

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
173173
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
174174
// clang-format on
175175
MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
176+
EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(this);
176177
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
177178
InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
178179
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class AMDGPUSubtarget {
6161
bool HasFminFmaxLegacy = true;
6262
bool EnablePromoteAlloca = false;
6363
bool HasTrigReducedRange = false;
64+
unsigned EUsPerCU = 4;
6465
unsigned MaxWavesPerEU = 10;
6566
unsigned LocalMemorySize = 0;
6667
char WavefrontSizeLog2 = 0;
@@ -209,6 +210,11 @@ class AMDGPUSubtarget {
209210
return LocalMemorySize;
210211
}
211212

213+
/// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the
214+
/// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs.
215+
/// CU mode into account.
216+
unsigned getEUsPerCU() const { return EUsPerCU; }
217+
212218
Align getAlignmentForImplicitArgPtr() const {
213219
return isAmdHsaOS() ? Align(8) : Align(4);
214220
}

0 commit comments

Comments
 (0)