@@ -123,7 +123,7 @@ class SIFoldOperandsImpl {
123
123
SmallVectorImpl<FoldCandidate> &FoldList,
124
124
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const ;
125
125
126
- MachineOperand * getImmOrMaterializedImm (MachineOperand &Op) const ;
126
+ std::optional< int64_t > getImmOrMaterializedImm (MachineOperand &Op) const ;
127
127
bool tryConstantFoldOp (MachineInstr *MI) const ;
128
128
bool tryFoldCndMask (MachineInstr &MI) const ;
129
129
bool tryFoldZeroHighBits (MachineInstr &MI) const ;
@@ -1298,21 +1298,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
1298
1298
MI.removeOperand (I);
1299
1299
}
1300
1300
1301
- MachineOperand *
1301
+ std::optional< int64_t >
1302
1302
SIFoldOperandsImpl::getImmOrMaterializedImm (MachineOperand &Op) const {
1303
- // If this has a subregister, it obviously is a register source.
1304
- if (!Op.isReg () || Op.getSubReg () != AMDGPU::NoSubRegister ||
1305
- !Op.getReg ().isVirtual ())
1306
- return &Op;
1303
+ if (Op.isImm ())
1304
+ return Op.getImm ();
1307
1305
1308
- MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
1306
+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
1307
+ return std::nullopt;
1308
+
1309
+ const MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
1309
1310
if (Def && Def->isMoveImmediate ()) {
1310
- MachineOperand &ImmSrc = Def->getOperand (1 );
1311
+ const MachineOperand &ImmSrc = Def->getOperand (1 );
1311
1312
if (ImmSrc.isImm ())
1312
- return & ImmSrc;
1313
+ return TII-> extractSubregFromImm ( ImmSrc. getImm (), Op. getSubReg ()) ;
1313
1314
}
1314
1315
1315
- return &Op ;
1316
+ return std::nullopt ;
1316
1317
}
1317
1318
1318
1319
// Try to simplify operations with a constant that may appear after instruction
@@ -1327,30 +1328,34 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
1327
1328
int Src0Idx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src0);
1328
1329
if (Src0Idx == -1 )
1329
1330
return false ;
1330
- MachineOperand *Src0 = getImmOrMaterializedImm (MI->getOperand (Src0Idx));
1331
+
1332
+ MachineOperand *Src0 = &MI->getOperand (Src0Idx);
1333
+ std::optional<int64_t > Src0Imm = getImmOrMaterializedImm (*Src0);
1331
1334
1332
1335
if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1333
1336
Opc == AMDGPU::S_NOT_B32) &&
1334
- Src0-> isImm () ) {
1335
- MI->getOperand (1 ).ChangeToImmediate (~Src0-> getImm () );
1337
+ Src0Imm ) {
1338
+ MI->getOperand (1 ).ChangeToImmediate (~*Src0Imm );
1336
1339
mutateCopyOp (*MI, TII->get (getMovOpc (Opc == AMDGPU::S_NOT_B32)));
1337
1340
return true ;
1338
1341
}
1339
1342
1340
1343
int Src1Idx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src1);
1341
1344
if (Src1Idx == -1 )
1342
1345
return false ;
1343
- MachineOperand *Src1 = getImmOrMaterializedImm (MI->getOperand (Src1Idx));
1344
1346
1345
- if (!Src0->isImm () && !Src1->isImm ())
1347
+ MachineOperand *Src1 = &MI->getOperand (Src1Idx);
1348
+ std::optional<int64_t > Src1Imm = getImmOrMaterializedImm (*Src1);
1349
+
1350
+ if (!Src0Imm && !Src1Imm)
1346
1351
return false ;
1347
1352
1348
1353
// and k0, k1 -> v_mov_b32 (k0 & k1)
1349
1354
// or k0, k1 -> v_mov_b32 (k0 | k1)
1350
1355
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1351
- if (Src0-> isImm () && Src1-> isImm () ) {
1356
+ if (Src0Imm && Src1Imm ) {
1352
1357
int32_t NewImm;
1353
- if (!evalBinaryInstruction (Opc, NewImm, Src0-> getImm (), Src1-> getImm () ))
1358
+ if (!evalBinaryInstruction (Opc, NewImm, *Src0Imm, *Src1Imm ))
1354
1359
return false ;
1355
1360
1356
1361
bool IsSGPR = TRI->isSGPRReg (*MRI, MI->getOperand (0 ).getReg ());
@@ -1366,12 +1371,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
1366
1371
if (!MI->isCommutable ())
1367
1372
return false ;
1368
1373
1369
- if (Src0-> isImm () && !Src1-> isImm () ) {
1374
+ if (Src0Imm && !Src1Imm ) {
1370
1375
std::swap (Src0, Src1);
1371
1376
std::swap (Src0Idx, Src1Idx);
1377
+ std::swap (Src0Imm, Src1Imm);
1372
1378
}
1373
1379
1374
- int32_t Src1Val = static_cast <int32_t >(Src1-> getImm () );
1380
+ int32_t Src1Val = static_cast <int32_t >(*Src1Imm );
1375
1381
if (Opc == AMDGPU::V_OR_B32_e64 ||
1376
1382
Opc == AMDGPU::V_OR_B32_e32 ||
1377
1383
Opc == AMDGPU::S_OR_B32) {
@@ -1428,9 +1434,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
1428
1434
MachineOperand *Src0 = TII->getNamedOperand (MI, AMDGPU::OpName::src0);
1429
1435
MachineOperand *Src1 = TII->getNamedOperand (MI, AMDGPU::OpName::src1);
1430
1436
if (!Src1->isIdenticalTo (*Src0)) {
1431
- auto *Src0Imm = getImmOrMaterializedImm (*Src0);
1432
- auto *Src1Imm = getImmOrMaterializedImm (*Src1);
1433
- if (!Src1Imm->isIdenticalTo (*Src0Imm))
1437
+ std::optional<int64_t > Src1Imm = getImmOrMaterializedImm (*Src1);
1438
+ if (!Src1Imm)
1439
+ return false ;
1440
+
1441
+ std::optional<int64_t > Src0Imm = getImmOrMaterializedImm (*Src0);
1442
+ if (!Src0Imm || *Src0Imm != *Src1Imm)
1434
1443
return false ;
1435
1444
}
1436
1445
@@ -1463,8 +1472,8 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
1463
1472
MI.getOpcode () != AMDGPU::V_AND_B32_e32)
1464
1473
return false ;
1465
1474
1466
- MachineOperand *Src0 = getImmOrMaterializedImm (MI.getOperand (1 ));
1467
- if (!Src0-> isImm () || Src0-> getImm () != 0xffff )
1475
+ std::optional< int64_t > Src0Imm = getImmOrMaterializedImm (MI.getOperand (1 ));
1476
+ if (!Src0Imm || *Src0Imm != 0xffff )
1468
1477
return false ;
1469
1478
1470
1479
Register Src1 = MI.getOperand (2 ).getReg ();
0 commit comments