Skip to content

Commit 5b7ec58

Browse files
committed
resolve review comments
1 parent 3c09c1d commit 5b7ec58

File tree

1 file changed

+13
-16
lines changed

1 file changed

+13
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,7 +1149,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11491149
if (!CallerAA || !CallerAA->isValidState())
11501150
return false;
11511151

1152-
auto Assumed = this->getAssumed();
1152+
ConstantRange Assumed = this->getAssumed();
11531153
unsigned Min = std::max(Assumed.getLower().getZExtValue(),
11541154
CallerAA->getAssumed().getLower().getZExtValue());
11551155
unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
@@ -1317,37 +1317,34 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
13171317
}
13181318
}
13191319

1320-
static void checkWavesPerEU(Module &M, TargetMachine &TM) {
1320+
/// The final check and update of the attribute 'amdgpu-waves-per-eu' based on
1321+
/// the determined 'amdgpu-flat-work-group-size' attribute. We can't do this
1322+
/// during attributor run because the two attributes grow in opposite direction,
1323+
/// we should not use any intermediate value to calculate waves per eu until we
1324+
/// have a determined flat workgroup size.
1325+
static void updateWavesPerEU(Module &M, TargetMachine &TM) {
13211326
for (Function &F : M) {
13221327
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
13231328

13241329
auto FlatWgrpSizeAttr =
13251330
AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
1326-
auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute(
1327-
F, "amdgpu-waves-per-eu", /*OnlyFirstRequired=*/true);
13281331

13291332
unsigned MinWavesPerEU = ST.getMinWavesPerEU();
13301333
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU();
13311334

1332-
unsigned MinFlatWgrpSize = 1U;
1333-
unsigned MaxFlatWgrpSize = 1024U;
1335+
unsigned MinFlatWgrpSize = ST.getMinFlatWorkGroupSize();
1336+
unsigned MaxFlatWgrpSize = ST.getMaxFlatWorkGroupSize();
13341337
if (FlatWgrpSizeAttr.has_value()) {
13351338
MinFlatWgrpSize = FlatWgrpSizeAttr->first;
13361339
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second);
13371340
}
13381341

13391342
// Start with the max range.
13401343
unsigned Min = MinWavesPerEU;
1341-
unsigned Max = MaxWavesPerEU;
1344+
unsigned Max = MinWavesPerEU;
13421345

1343-
// If the attribute exists, set them to the value from the attribute.
1344-
if (WavesPerEUAttr.has_value()) {
1345-
Min = WavesPerEUAttr->first;
1346-
if (WavesPerEUAttr->second.has_value())
1347-
Max = *(WavesPerEUAttr->second);
1348-
}
1349-
1350-
// Compute the range from flat workgroup size.
1346+
// Compute the range from flat workgroup size. `getWavesPerEU` will also
1347+
// account for the 'amdgpu-waves-er-eu' attribute.
13511348
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
13521349
ST.getWavesPerEU(F, std::make_pair(MinFlatWgrpSize, MaxFlatWgrpSize));
13531350

@@ -1450,7 +1447,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14501447
if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
14511448
LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
14521449
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1453-
checkWavesPerEU(M, TM);
1450+
updateWavesPerEU(M, TM);
14541451

14551452
return Changed;
14561453
}

0 commit comments

Comments
 (0)