@@ -1194,6 +1194,30 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
1194
1194
getLdsDwGranularity (ST) * sizeof (uint32_t )));
1195
1195
}
1196
1196
1197
+ static constexpr std::pair<unsigned , unsigned > getShiftMask (unsigned Value) {
1198
+ unsigned Shift = 0 ;
1199
+ unsigned Mask = 0 ;
1200
+
1201
+ Mask = ~Value;
1202
+ for (; !(Mask & 1 ); Shift++, Mask >>= 1 ) {
1203
+ }
1204
+
1205
+ return std::make_pair (Shift, Mask);
1206
+ }
1207
+
1208
+ static const MCExpr *MaskShiftSet (const MCExpr *Val, uint32_t Mask,
1209
+ uint32_t Shift, MCContext &Ctx) {
1210
+ if (Mask) {
1211
+ const MCExpr *MaskExpr = MCConstantExpr::create (Mask, Ctx);
1212
+ Val = MCBinaryExpr::createAnd (Val, MaskExpr, Ctx);
1213
+ }
1214
+ if (Shift) {
1215
+ const MCExpr *ShiftExpr = MCConstantExpr::create (Shift, Ctx);
1216
+ Val = MCBinaryExpr::createShl (Val, ShiftExpr, Ctx);
1217
+ }
1218
+ return Val;
1219
+ }
1220
+
1197
1221
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1198
1222
// is AMDPAL. It stores each compute/SPI register setting and other PAL
1199
1223
// metadata items into the PALMD::Metadata, combining with any provided by the
@@ -1207,41 +1231,49 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
1207
1231
auto &Ctx = MF.getContext ();
1208
1232
1209
1233
MD->setEntryPoint (CC, MF.getFunction ().getName ());
1210
- MD->setNumUsedVgprs (
1211
- CC, getMCExprValue (CurrentProgramInfo.NumVGPRsForWavesPerEU , Ctx));
1234
+ MD->setNumUsedVgprs (CC, CurrentProgramInfo.NumVGPRsForWavesPerEU , Ctx);
1212
1235
1213
1236
// Only set AGPRs for supported devices
1214
1237
const GCNSubtarget &STM = MF.getSubtarget <GCNSubtarget>();
1215
1238
if (STM.hasMAIInsts ()) {
1216
- MD->setNumUsedAgprs (CC, getMCExprValue ( CurrentProgramInfo.NumAccVGPR , Ctx) );
1239
+ MD->setNumUsedAgprs (CC, CurrentProgramInfo.NumAccVGPR );
1217
1240
}
1218
1241
1219
- MD->setNumUsedSgprs (
1220
- CC, getMCExprValue (CurrentProgramInfo.NumSGPRsForWavesPerEU , Ctx));
1242
+ MD->setNumUsedSgprs (CC, CurrentProgramInfo.NumSGPRsForWavesPerEU , Ctx);
1221
1243
if (MD->getPALMajorVersion () < 3 ) {
1222
- MD->setRsrc1 (CC, CurrentProgramInfo.getPGMRSrc1 (CC, STM) );
1244
+ MD->setRsrc1 (CC, CurrentProgramInfo.getPGMRSrc1 (CC, STM, Ctx), Ctx );
1223
1245
if (AMDGPU::isCompute (CC)) {
1224
- MD->setRsrc2 (CC, CurrentProgramInfo.getComputePGMRSrc2 () );
1246
+ MD->setRsrc2 (CC, CurrentProgramInfo.getComputePGMRSrc2 (Ctx), Ctx );
1225
1247
} else {
1226
- if (getMCExprValue (CurrentProgramInfo.ScratchBlocks , Ctx) > 0 )
1227
- MD->setRsrc2 (CC, S_00B84C_SCRATCH_EN (1 ));
1248
+ const MCExpr *HasScratchBlocks =
1249
+ MCBinaryExpr::createGT (CurrentProgramInfo.ScratchBlocks ,
1250
+ MCConstantExpr::create (0 , Ctx), Ctx);
1251
+ auto [Shift, Mask] = getShiftMask (C_00B84C_SCRATCH_EN);
1252
+ MD->setRsrc2 (CC, MaskShiftSet (HasScratchBlocks, Mask, Shift, Ctx), Ctx);
1228
1253
}
1229
1254
} else {
1230
1255
MD->setHwStage (CC, " .debug_mode" , (bool )CurrentProgramInfo.DebugMode );
1231
- MD->setHwStage (CC, " .scratch_en" ,
1232
- ( bool ) getMCExprValue ( CurrentProgramInfo.ScratchEnable , Ctx) );
1256
+ MD->setHwStage (CC, " .scratch_en" , msgpack::Type::Boolean,
1257
+ CurrentProgramInfo.ScratchEnable );
1233
1258
EmitPALMetadataCommon (MD, CurrentProgramInfo, CC, STM);
1234
1259
}
1235
1260
1236
1261
// ScratchSize is in bytes, 16 aligned.
1237
1262
MD->setScratchSize (
1238
- CC, alignTo (getMCExprValue (CurrentProgramInfo.ScratchSize , Ctx), 16 ));
1263
+ CC,
1264
+ AMDGPUVariadicMCExpr::createAlignTo (CurrentProgramInfo.ScratchSize ,
1265
+ MCConstantExpr::create (16 , Ctx), Ctx),
1266
+ Ctx);
1267
+
1239
1268
if (MF.getFunction ().getCallingConv () == CallingConv::AMDGPU_PS) {
1240
1269
unsigned ExtraLDSSize = STM.getGeneration () >= AMDGPUSubtarget::GFX11
1241
1270
? divideCeil (CurrentProgramInfo.LDSBlocks , 2 )
1242
1271
: CurrentProgramInfo.LDSBlocks ;
1243
1272
if (MD->getPALMajorVersion () < 3 ) {
1244
- MD->setRsrc2 (CC, S_00B02C_EXTRA_LDS_SIZE (ExtraLDSSize));
1273
+ MD->setRsrc2 (
1274
+ CC,
1275
+ MCConstantExpr::create (S_00B02C_EXTRA_LDS_SIZE (ExtraLDSSize), Ctx),
1276
+ Ctx);
1245
1277
MD->setSpiPsInputEna (MFI->getPSInputEnable ());
1246
1278
MD->setSpiPsInputAddr (MFI->getPSInputAddr ());
1247
1279
} else {
@@ -1288,20 +1320,19 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
1288
1320
1289
1321
if (MD->getPALMajorVersion () < 3 ) {
1290
1322
// Set compute registers
1291
- MD->setRsrc1 (CallingConv::AMDGPU_CS,
1292
- CurrentProgramInfo.getPGMRSrc1 (CallingConv::AMDGPU_CS, ST));
1323
+ MD->setRsrc1 (
1324
+ CallingConv::AMDGPU_CS,
1325
+ CurrentProgramInfo.getPGMRSrc1 (CallingConv::AMDGPU_CS, ST, Ctx), Ctx);
1293
1326
MD->setRsrc2 (CallingConv::AMDGPU_CS,
1294
- CurrentProgramInfo.getComputePGMRSrc2 () );
1327
+ CurrentProgramInfo.getComputePGMRSrc2 (Ctx), Ctx );
1295
1328
} else {
1296
1329
EmitPALMetadataCommon (MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
1297
1330
}
1298
1331
1299
1332
// Set optional info
1300
1333
MD->setFunctionLdsSize (FnName, CurrentProgramInfo.LDSSize );
1301
- MD->setFunctionNumUsedVgprs (
1302
- FnName, getMCExprValue (CurrentProgramInfo.NumVGPRsForWavesPerEU , Ctx));
1303
- MD->setFunctionNumUsedSgprs (
1304
- FnName, getMCExprValue (CurrentProgramInfo.NumSGPRsForWavesPerEU , Ctx));
1334
+ MD->setFunctionNumUsedVgprs (FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU );
1335
+ MD->setFunctionNumUsedSgprs (FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU );
1305
1336
}
1306
1337
1307
1338
// This is supposed to be log2(Size)
0 commit comments