@@ -622,12 +622,13 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
622
622
623
623
int64_t PGRM_Rsrc3 = 1 ;
624
624
bool EvaluatableRsrc3 =
625
- CurrentProgramInfo.ComputePGMRSrc3GFX90A ->evaluateAsAbsolute (PGRM_Rsrc3);
625
+ CurrentProgramInfo.ComputePGMRSrc3 ->evaluateAsAbsolute (PGRM_Rsrc3);
626
626
(void )PGRM_Rsrc3;
627
627
(void )EvaluatableRsrc3;
628
- assert (STM.hasGFX90AInsts () || !EvaluatableRsrc3 ||
628
+ assert (STM.getGeneration () >= AMDGPUSubtarget::GFX10 ||
629
+ STM.hasGFX90AInsts () || !EvaluatableRsrc3 ||
629
630
static_cast <uint64_t >(PGRM_Rsrc3) == 0 );
630
- KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A ;
631
+ KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3 ;
631
632
632
633
KernelDescriptor.kernarg_preload = MCConstantExpr::create (
633
634
AMDGPU::hasKernargPreload (STM) ? Info->getNumKernargPreloadedSGPRs () : 0 ,
@@ -748,7 +749,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
748
749
RI.getSymbol (CurrentFnSym->getName (), RIK::RIK_PrivateSegSize,
749
750
OutContext, IsLocal)
750
751
->getVariableValue (),
751
- getFunctionCodeSize (MF), MFI);
752
+ CurrentProgramInfo. getFunctionCodeSize (MF), MFI);
752
753
return false ;
753
754
}
754
755
@@ -757,7 +758,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
757
758
CurrentProgramInfo.NumArchVGPR ,
758
759
STM.hasMAIInsts () ? CurrentProgramInfo.NumAccVGPR : nullptr ,
759
760
CurrentProgramInfo.NumVGPR , CurrentProgramInfo.NumSGPR ,
760
- CurrentProgramInfo.ScratchSize , getFunctionCodeSize (MF), MFI);
761
+ CurrentProgramInfo.ScratchSize ,
762
+ CurrentProgramInfo.getFunctionCodeSize (MF), MFI);
761
763
762
764
OutStreamer->emitRawComment (
763
765
" FloatMode: " + Twine (CurrentProgramInfo.FloatMode ), false );
@@ -821,22 +823,22 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
821
823
false );
822
824
823
825
[[maybe_unused]] int64_t PGMRSrc3;
824
- assert (STM.hasGFX90AInsts () ||
825
- (CurrentProgramInfo. ComputePGMRSrc3GFX90A -> evaluateAsAbsolute (
826
- PGMRSrc3) &&
826
+ assert (STM.getGeneration () >= AMDGPUSubtarget::GFX10 ||
827
+ STM. hasGFX90AInsts () ||
828
+ (CurrentProgramInfo. ComputePGMRSrc3 -> evaluateAsAbsolute ( PGMRSrc3) &&
827
829
static_cast <uint64_t >(PGMRSrc3) == 0 ));
828
830
if (STM.hasGFX90AInsts ()) {
829
831
OutStreamer->emitRawComment (
830
832
" COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
831
833
getMCExprStr (MCKernelDescriptor::bits_get (
832
- CurrentProgramInfo.ComputePGMRSrc3GFX90A ,
834
+ CurrentProgramInfo.ComputePGMRSrc3 ,
833
835
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
834
836
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
835
837
false );
836
838
OutStreamer->emitRawComment (
837
839
" COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
838
840
getMCExprStr (MCKernelDescriptor::bits_get (
839
- CurrentProgramInfo.ComputePGMRSrc3GFX90A ,
841
+ CurrentProgramInfo.ComputePGMRSrc3 ,
840
842
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
841
843
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
842
844
false );
@@ -893,27 +895,6 @@ void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
893
895
}
894
896
}
895
897
896
- uint64_t AMDGPUAsmPrinter::getFunctionCodeSize (const MachineFunction &MF) const {
897
- const GCNSubtarget &STM = MF.getSubtarget <GCNSubtarget>();
898
- const SIInstrInfo *TII = STM.getInstrInfo ();
899
-
900
- uint64_t CodeSize = 0 ;
901
-
902
- for (const MachineBasicBlock &MBB : MF) {
903
- for (const MachineInstr &MI : MBB) {
904
- // TODO: CodeSize should account for multiple functions.
905
-
906
- // TODO: Should we count size of debug info?
907
- if (MI.isDebugInstr ())
908
- continue ;
909
-
910
- CodeSize += TII->getInstSizeInBytes (MI);
911
- }
912
- }
913
-
914
- return CodeSize;
915
- }
916
-
917
898
// AccumOffset computed for the MCExpr equivalent of:
918
899
// alignTo(std::max(1, NumVGPR), 4) / 4 - 1;
919
900
static const MCExpr *computeAccumOffset (const MCExpr *NumVGPR, MCContext &Ctx) {
@@ -1249,24 +1230,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1249
1230
ProgInfo.LdsSize = STM.isAmdHsaOS () ? 0 : ProgInfo.LDSBlocks ;
1250
1231
ProgInfo.EXCPEnable = 0 ;
1251
1232
1233
+ // return ((Dst & ~Mask) | (Value << Shift))
1234
+ auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1235
+ uint32_t Shift) {
1236
+ const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1237
+ const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1238
+ Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1239
+ Dst = MCBinaryExpr::createOr (Dst, MCBinaryExpr::createShl (Value, Shft, Ctx),
1240
+ Ctx);
1241
+ return Dst;
1242
+ };
1243
+
1252
1244
if (STM.hasGFX90AInsts ()) {
1253
- // return ((Dst & ~Mask) | (Value << Shift))
1254
- auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1255
- uint32_t Shift) {
1256
- const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1257
- const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1258
- Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1259
- Dst = MCBinaryExpr::createOr (
1260
- Dst, MCBinaryExpr::createShl (Value, Shft, Ctx), Ctx);
1261
- return Dst;
1262
- };
1263
-
1264
- ProgInfo.ComputePGMRSrc3GFX90A =
1265
- SetBits (ProgInfo.ComputePGMRSrc3GFX90A , ProgInfo.AccumOffset ,
1245
+ ProgInfo.ComputePGMRSrc3 =
1246
+ SetBits (ProgInfo.ComputePGMRSrc3 , ProgInfo.AccumOffset ,
1266
1247
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1267
1248
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
1268
- ProgInfo.ComputePGMRSrc3GFX90A =
1269
- SetBits (ProgInfo.ComputePGMRSrc3GFX90A , CreateExpr (ProgInfo.TgSplit ),
1249
+ ProgInfo.ComputePGMRSrc3 =
1250
+ SetBits (ProgInfo.ComputePGMRSrc3 , CreateExpr (ProgInfo.TgSplit ),
1270
1251
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1271
1252
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
1272
1253
}
@@ -1287,6 +1268,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1287
1268
" , final occupancy is " + Twine (Occupancy));
1288
1269
F.getContext ().diagnose (Diag);
1289
1270
}
1271
+
1272
+ if (isGFX11Plus (STM)) {
1273
+ uint32_t CodeSizeInBytes =
1274
+ (uint32_t )std::min (ProgInfo.getFunctionCodeSize (MF),
1275
+ (uint64_t )std::numeric_limits<uint32_t >::max ());
1276
+ uint32_t CodeSizeInLines = divideCeil (CodeSizeInBytes, 128 );
1277
+ uint32_t Field, Shift, Width;
1278
+ if (isGFX11 (STM)) {
1279
+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE;
1280
+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT;
1281
+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;
1282
+ } else {
1283
+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE;
1284
+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT;
1285
+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;
1286
+ }
1287
+ uint64_t InstPrefSize = std::min (CodeSizeInLines, (1u << Width) - 1 );
1288
+ ProgInfo.ComputePGMRSrc3 = SetBits (ProgInfo.ComputePGMRSrc3 ,
1289
+ CreateExpr (InstPrefSize), Field, Shift);
1290
+ }
1290
1291
}
1291
1292
1292
1293
static unsigned getRsrcReg (CallingConv::ID CallConv) {
0 commit comments