@@ -123,7 +123,7 @@ class SIFoldOperandsImpl {
123
123
SmallVectorImpl<FoldCandidate> &FoldList,
124
124
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const ;
125
125
126
- MachineOperand * getImmOrMaterializedImm (MachineOperand &Op) const ;
126
+ std::optional< int64_t > getImmOrMaterializedImm (MachineOperand &Op) const ;
127
127
bool tryConstantFoldOp (MachineInstr *MI) const ;
128
128
bool tryFoldCndMask (MachineInstr &MI) const ;
129
129
bool tryFoldZeroHighBits (MachineInstr &MI) const ;
@@ -1296,21 +1296,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
1296
1296
MI.removeOperand (I);
1297
1297
}
1298
1298
1299
- MachineOperand *
1299
+ std::optional< int64_t >
1300
1300
SIFoldOperandsImpl::getImmOrMaterializedImm (MachineOperand &Op) const {
1301
- // If this has a subregister, it obviously is a register source.
1302
- if (!Op.isReg () || Op.getSubReg () != AMDGPU::NoSubRegister ||
1303
- !Op.getReg ().isVirtual ())
1304
- return &Op;
1301
+ if (Op.isImm ())
1302
+ return Op.getImm ();
1305
1303
1306
- MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
1304
+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
1305
+ return std::nullopt;
1306
+
1307
+ const MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
1307
1308
if (Def && Def->isMoveImmediate ()) {
1308
- MachineOperand &ImmSrc = Def->getOperand (1 );
1309
+ const MachineOperand &ImmSrc = Def->getOperand (1 );
1309
1310
if (ImmSrc.isImm ())
1310
- return & ImmSrc;
1311
+ return TII-> extractSubregFromImm ( ImmSrc. getImm (), Op. getSubReg ()) ;
1311
1312
}
1312
1313
1313
- return &Op ;
1314
+ return std::nullopt ;
1314
1315
}
1315
1316
1316
1317
// Try to simplify operations with a constant that may appear after instruction
@@ -1325,30 +1326,34 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
1325
1326
int Src0Idx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src0);
1326
1327
if (Src0Idx == -1 )
1327
1328
return false ;
1328
- MachineOperand *Src0 = getImmOrMaterializedImm (MI->getOperand (Src0Idx));
1329
+
1330
+ MachineOperand *Src0 = &MI->getOperand (Src0Idx);
1331
+ std::optional<int64_t > Src0Imm = getImmOrMaterializedImm (*Src0);
1329
1332
1330
1333
if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1331
1334
Opc == AMDGPU::S_NOT_B32) &&
1332
- Src0-> isImm () ) {
1333
- MI->getOperand (1 ).ChangeToImmediate (~Src0-> getImm () );
1335
+ Src0Imm ) {
1336
+ MI->getOperand (1 ).ChangeToImmediate (~*Src0Imm );
1334
1337
mutateCopyOp (*MI, TII->get (getMovOpc (Opc == AMDGPU::S_NOT_B32)));
1335
1338
return true ;
1336
1339
}
1337
1340
1338
1341
int Src1Idx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src1);
1339
1342
if (Src1Idx == -1 )
1340
1343
return false ;
1341
- MachineOperand *Src1 = getImmOrMaterializedImm (MI->getOperand (Src1Idx));
1342
1344
1343
- if (!Src0->isImm () && !Src1->isImm ())
1345
+ MachineOperand *Src1 = &MI->getOperand (Src1Idx);
1346
+ std::optional<int64_t > Src1Imm = getImmOrMaterializedImm (*Src1);
1347
+
1348
+ if (!Src0Imm && !Src1Imm)
1344
1349
return false ;
1345
1350
1346
1351
// and k0, k1 -> v_mov_b32 (k0 & k1)
1347
1352
// or k0, k1 -> v_mov_b32 (k0 | k1)
1348
1353
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1349
- if (Src0-> isImm () && Src1-> isImm () ) {
1354
+ if (Src0Imm && Src1Imm ) {
1350
1355
int32_t NewImm;
1351
- if (!evalBinaryInstruction (Opc, NewImm, Src0-> getImm (), Src1-> getImm () ))
1356
+ if (!evalBinaryInstruction (Opc, NewImm, *Src0Imm, *Src1Imm ))
1352
1357
return false ;
1353
1358
1354
1359
bool IsSGPR = TRI->isSGPRReg (*MRI, MI->getOperand (0 ).getReg ());
@@ -1364,12 +1369,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
1364
1369
if (!MI->isCommutable ())
1365
1370
return false ;
1366
1371
1367
- if (Src0-> isImm () && !Src1-> isImm () ) {
1372
+ if (Src0Imm && !Src1Imm ) {
1368
1373
std::swap (Src0, Src1);
1369
1374
std::swap (Src0Idx, Src1Idx);
1375
+ std::swap (Src0Imm, Src1Imm);
1370
1376
}
1371
1377
1372
- int32_t Src1Val = static_cast <int32_t >(Src1-> getImm () );
1378
+ int32_t Src1Val = static_cast <int32_t >(*Src1Imm );
1373
1379
if (Opc == AMDGPU::V_OR_B32_e64 ||
1374
1380
Opc == AMDGPU::V_OR_B32_e32 ||
1375
1381
Opc == AMDGPU::S_OR_B32) {
@@ -1426,9 +1432,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
1426
1432
MachineOperand *Src0 = TII->getNamedOperand (MI, AMDGPU::OpName::src0);
1427
1433
MachineOperand *Src1 = TII->getNamedOperand (MI, AMDGPU::OpName::src1);
1428
1434
if (!Src1->isIdenticalTo (*Src0)) {
1429
- auto *Src0Imm = getImmOrMaterializedImm (*Src0);
1430
- auto *Src1Imm = getImmOrMaterializedImm (*Src1);
1431
- if (!Src1Imm->isIdenticalTo (*Src0Imm))
1435
+ std::optional<int64_t > Src1Imm = getImmOrMaterializedImm (*Src1);
1436
+ if (!Src1Imm)
1437
+ return false ;
1438
+
1439
+ std::optional<int64_t > Src0Imm = getImmOrMaterializedImm (*Src0);
1440
+ if (!Src0Imm || *Src0Imm != *Src1Imm)
1432
1441
return false ;
1433
1442
}
1434
1443
@@ -1461,8 +1470,8 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
1461
1470
MI.getOpcode () != AMDGPU::V_AND_B32_e32)
1462
1471
return false ;
1463
1472
1464
- MachineOperand *Src0 = getImmOrMaterializedImm (MI.getOperand (1 ));
1465
- if (!Src0-> isImm () || Src0-> getImm () != 0xffff )
1473
+ std::optional< int64_t > Src0Imm = getImmOrMaterializedImm (MI.getOperand (1 ));
1474
+ if (!Src0Imm || *Src0Imm != 0xffff )
1466
1475
return false ;
1467
1476
1468
1477
Register Src1 = MI.getOperand (2 ).getReg ();
0 commit comments