@@ -622,12 +622,13 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
622
622
623
623
int64_t PGRM_Rsrc3 = 1 ;
624
624
bool EvaluatableRsrc3 =
625
- CurrentProgramInfo.ComputePGMRSrc3GFX90A ->evaluateAsAbsolute (PGRM_Rsrc3);
625
+ CurrentProgramInfo.ComputePGMRSrc3 ->evaluateAsAbsolute (PGRM_Rsrc3);
626
626
(void )PGRM_Rsrc3;
627
627
(void )EvaluatableRsrc3;
628
- assert (STM.hasGFX90AInsts () || !EvaluatableRsrc3 ||
628
+ assert (STM.getGeneration () >= AMDGPUSubtarget::GFX10 ||
629
+ STM.hasGFX90AInsts () || !EvaluatableRsrc3 ||
629
630
static_cast <uint64_t >(PGRM_Rsrc3) == 0 );
630
- KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A ;
631
+ KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3 ;
631
632
632
633
KernelDescriptor.kernarg_preload = MCConstantExpr::create (
633
634
AMDGPU::hasKernargPreload (STM) ? Info->getNumKernargPreloadedSGPRs () : 0 ,
@@ -822,22 +823,22 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
822
823
false );
823
824
824
825
[[maybe_unused]] int64_t PGMRSrc3;
825
- assert (STM.hasGFX90AInsts () ||
826
- (CurrentProgramInfo. ComputePGMRSrc3GFX90A -> evaluateAsAbsolute (
827
- PGMRSrc3) &&
826
+ assert (STM.getGeneration () >= AMDGPUSubtarget::GFX10 ||
827
+ STM. hasGFX90AInsts () ||
828
+ (CurrentProgramInfo. ComputePGMRSrc3 -> evaluateAsAbsolute ( PGMRSrc3) &&
828
829
static_cast <uint64_t >(PGMRSrc3) == 0 ));
829
830
if (STM.hasGFX90AInsts ()) {
830
831
OutStreamer->emitRawComment (
831
832
" COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
832
833
getMCExprStr (MCKernelDescriptor::bits_get (
833
- CurrentProgramInfo.ComputePGMRSrc3GFX90A ,
834
+ CurrentProgramInfo.ComputePGMRSrc3 ,
834
835
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
835
836
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
836
837
false );
837
838
OutStreamer->emitRawComment (
838
839
" COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
839
840
getMCExprStr (MCKernelDescriptor::bits_get (
840
- CurrentProgramInfo.ComputePGMRSrc3GFX90A ,
841
+ CurrentProgramInfo.ComputePGMRSrc3 ,
841
842
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
842
843
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
843
844
false );
@@ -1229,24 +1230,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1229
1230
ProgInfo.LdsSize = STM.isAmdHsaOS () ? 0 : ProgInfo.LDSBlocks ;
1230
1231
ProgInfo.EXCPEnable = 0 ;
1231
1232
1233
+ // return ((Dst & ~Mask) | (Value << Shift))
1234
+ auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1235
+ uint32_t Shift) {
1236
+ const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1237
+ const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1238
+ Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1239
+ Dst = MCBinaryExpr::createOr (Dst, MCBinaryExpr::createShl (Value, Shft, Ctx),
1240
+ Ctx);
1241
+ return Dst;
1242
+ };
1243
+
1232
1244
if (STM.hasGFX90AInsts ()) {
1233
- // return ((Dst & ~Mask) | (Value << Shift))
1234
- auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1235
- uint32_t Shift) {
1236
- const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1237
- const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1238
- Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1239
- Dst = MCBinaryExpr::createOr (
1240
- Dst, MCBinaryExpr::createShl (Value, Shft, Ctx), Ctx);
1241
- return Dst;
1242
- };
1243
-
1244
- ProgInfo.ComputePGMRSrc3GFX90A =
1245
- SetBits (ProgInfo.ComputePGMRSrc3GFX90A , ProgInfo.AccumOffset ,
1245
+ ProgInfo.ComputePGMRSrc3 =
1246
+ SetBits (ProgInfo.ComputePGMRSrc3 , ProgInfo.AccumOffset ,
1246
1247
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1247
1248
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
1248
- ProgInfo.ComputePGMRSrc3GFX90A =
1249
- SetBits (ProgInfo.ComputePGMRSrc3GFX90A , CreateExpr (ProgInfo.TgSplit ),
1249
+ ProgInfo.ComputePGMRSrc3 =
1250
+ SetBits (ProgInfo.ComputePGMRSrc3 , CreateExpr (ProgInfo.TgSplit ),
1250
1251
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1251
1252
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
1252
1253
}
@@ -1267,6 +1268,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1267
1268
" , final occupancy is " + Twine (Occupancy));
1268
1269
F.getContext ().diagnose (Diag);
1269
1270
}
1271
+
1272
+ if (isGFX11Plus (STM)) {
1273
+ uint32_t CodeSizeInBytes = (uint32_t )std::min (
1274
+ ProgInfo.getFunctionCodeSize (MF, true /* IsLowerBound */ ),
1275
+ (uint64_t )std::numeric_limits<uint32_t >::max ());
1276
+ uint32_t CodeSizeInLines = divideCeil (CodeSizeInBytes, 128 );
1277
+ uint32_t Field, Shift, Width;
1278
+ if (isGFX11 (STM)) {
1279
+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE;
1280
+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT;
1281
+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;
1282
+ } else {
1283
+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE;
1284
+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT;
1285
+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;
1286
+ }
1287
+ uint64_t InstPrefSize = std::min (CodeSizeInLines, (1u << Width) - 1 );
1288
+ ProgInfo.ComputePGMRSrc3 = SetBits (ProgInfo.ComputePGMRSrc3 ,
1289
+ CreateExpr (InstPrefSize), Field, Shift);
1290
+ }
1270
1291
}
1271
1292
1272
1293
static unsigned getRsrcReg (CallingConv::ID CallConv) {
0 commit comments