@@ -1149,7 +1149,7 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1149
1149
if (!CallerAA || !CallerAA->isValidState ())
1150
1150
return false ;
1151
1151
1152
- auto Assumed = this ->getAssumed ();
1152
+ ConstantRange Assumed = this ->getAssumed ();
1153
1153
unsigned Min = std::max (Assumed.getLower ().getZExtValue (),
1154
1154
CallerAA->getAssumed ().getLower ().getZExtValue ());
1155
1155
unsigned Max = std::max (Assumed.getUpper ().getZExtValue (),
@@ -1317,37 +1317,34 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
1317
1317
}
1318
1318
}
1319
1319
1320
- static void checkWavesPerEU (Module &M, TargetMachine &TM) {
1320
+ // / The final check and update of the attribute 'amdgpu-waves-per-eu' based on
1321
+ // / the determined 'amdgpu-flat-work-group-size' attribute. We can't do this
1322
+ // / during attributor run because the two attributes grow in opposite direction,
1323
+ // / we should not use any intermediate value to calculate waves per eu until we
1324
+ // / have a determined flat workgroup size.
1325
+ static void updateWavesPerEU (Module &M, TargetMachine &TM) {
1321
1326
for (Function &F : M) {
1322
1327
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
1323
1328
1324
1329
auto FlatWgrpSizeAttr =
1325
1330
AMDGPU::getIntegerPairAttribute (F, " amdgpu-flat-work-group-size" );
1326
- auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute (
1327
- F, " amdgpu-waves-per-eu" , /* OnlyFirstRequired=*/ true );
1328
1331
1329
1332
unsigned MinWavesPerEU = ST.getMinWavesPerEU ();
1330
1333
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU ();
1331
1334
1332
- unsigned MinFlatWgrpSize = 1U ;
1333
- unsigned MaxFlatWgrpSize = 1024U ;
1335
+ unsigned MinFlatWgrpSize = ST. getMinFlatWorkGroupSize () ;
1336
+ unsigned MaxFlatWgrpSize = ST. getMaxFlatWorkGroupSize () ;
1334
1337
if (FlatWgrpSizeAttr.has_value ()) {
1335
1338
MinFlatWgrpSize = FlatWgrpSizeAttr->first ;
1336
1339
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second );
1337
1340
}
1338
1341
1339
1342
// Start with the max range.
1340
1343
unsigned Min = MinWavesPerEU;
1341
- unsigned Max = MaxWavesPerEU ;
1344
+ unsigned Max = MinWavesPerEU ;
1342
1345
1343
- // If the attribute exists, set them to the value from the attribute.
1344
- if (WavesPerEUAttr.has_value ()) {
1345
- Min = WavesPerEUAttr->first ;
1346
- if (WavesPerEUAttr->second .has_value ())
1347
- Max = *(WavesPerEUAttr->second );
1348
- }
1349
-
1350
- // Compute the range from flat workgroup size.
1346
+ // Compute the range from flat workgroup size. `getWavesPerEU` will also
1347
+ // account for the 'amdgpu-waves-er-eu' attribute.
1351
1348
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
1352
1349
ST.getWavesPerEU (F, std::make_pair (MinFlatWgrpSize, MaxFlatWgrpSize));
1353
1350
@@ -1450,7 +1447,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1450
1447
if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
1451
1448
LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1452
1449
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1453
- checkWavesPerEU (M, TM);
1450
+ updateWavesPerEU (M, TM);
1454
1451
1455
1452
return Changed;
1456
1453
}
0 commit comments