Skip to content

Commit cd10c01

Browse files
authored
AMDGPU: Handle subregister uses in SIFoldOperands constant folding (#127485)
1 parent 1c8add1 commit cd10c01

File tree

2 files changed

+67
-24
lines changed

2 files changed

+67
-24
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class SIFoldOperandsImpl {
123123
SmallVectorImpl<FoldCandidate> &FoldList,
124124
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
125125

126-
MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const;
126+
std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
127127
bool tryConstantFoldOp(MachineInstr *MI) const;
128128
bool tryFoldCndMask(MachineInstr &MI) const;
129129
bool tryFoldZeroHighBits(MachineInstr &MI) const;
@@ -1296,21 +1296,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
12961296
MI.removeOperand(I);
12971297
}
12981298

1299-
MachineOperand *
1299+
std::optional<int64_t>
13001300
SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const {
1301-
// If this has a subregister, it obviously is a register source.
1302-
if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister ||
1303-
!Op.getReg().isVirtual())
1304-
return &Op;
1301+
if (Op.isImm())
1302+
return Op.getImm();
13051303

1306-
MachineInstr *Def = MRI->getVRegDef(Op.getReg());
1304+
if (!Op.isReg() || !Op.getReg().isVirtual())
1305+
return std::nullopt;
1306+
1307+
const MachineInstr *Def = MRI->getVRegDef(Op.getReg());
13071308
if (Def && Def->isMoveImmediate()) {
1308-
MachineOperand &ImmSrc = Def->getOperand(1);
1309+
const MachineOperand &ImmSrc = Def->getOperand(1);
13091310
if (ImmSrc.isImm())
1310-
return &ImmSrc;
1311+
return TII->extractSubregFromImm(ImmSrc.getImm(), Op.getSubReg());
13111312
}
13121313

1313-
return &Op;
1314+
return std::nullopt;
13141315
}
13151316

13161317
// Try to simplify operations with a constant that may appear after instruction
@@ -1325,30 +1326,34 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
13251326
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
13261327
if (Src0Idx == -1)
13271328
return false;
1328-
MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx));
1329+
1330+
MachineOperand *Src0 = &MI->getOperand(Src0Idx);
1331+
std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
13291332

13301333
if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
13311334
Opc == AMDGPU::S_NOT_B32) &&
1332-
Src0->isImm()) {
1333-
MI->getOperand(1).ChangeToImmediate(~Src0->getImm());
1335+
Src0Imm) {
1336+
MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
13341337
mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
13351338
return true;
13361339
}
13371340

13381341
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
13391342
if (Src1Idx == -1)
13401343
return false;
1341-
MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx));
13421344

1343-
if (!Src0->isImm() && !Src1->isImm())
1345+
MachineOperand *Src1 = &MI->getOperand(Src1Idx);
1346+
std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1347+
1348+
if (!Src0Imm && !Src1Imm)
13441349
return false;
13451350

13461351
// and k0, k1 -> v_mov_b32 (k0 & k1)
13471352
// or k0, k1 -> v_mov_b32 (k0 | k1)
13481353
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1349-
if (Src0->isImm() && Src1->isImm()) {
1354+
if (Src0Imm && Src1Imm) {
13501355
int32_t NewImm;
1351-
if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
1356+
if (!evalBinaryInstruction(Opc, NewImm, *Src0Imm, *Src1Imm))
13521357
return false;
13531358

13541359
bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg());
@@ -1364,12 +1369,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
13641369
if (!MI->isCommutable())
13651370
return false;
13661371

1367-
if (Src0->isImm() && !Src1->isImm()) {
1372+
if (Src0Imm && !Src1Imm) {
13681373
std::swap(Src0, Src1);
13691374
std::swap(Src0Idx, Src1Idx);
1375+
std::swap(Src0Imm, Src1Imm);
13701376
}
13711377

1372-
int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
1378+
int32_t Src1Val = static_cast<int32_t>(*Src1Imm);
13731379
if (Opc == AMDGPU::V_OR_B32_e64 ||
13741380
Opc == AMDGPU::V_OR_B32_e32 ||
13751381
Opc == AMDGPU::S_OR_B32) {
@@ -1426,9 +1432,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
14261432
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
14271433
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
14281434
if (!Src1->isIdenticalTo(*Src0)) {
1429-
auto *Src0Imm = getImmOrMaterializedImm(*Src0);
1430-
auto *Src1Imm = getImmOrMaterializedImm(*Src1);
1431-
if (!Src1Imm->isIdenticalTo(*Src0Imm))
1435+
std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1436+
if (!Src1Imm)
1437+
return false;
1438+
1439+
std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
1440+
if (!Src0Imm || *Src0Imm != *Src1Imm)
14321441
return false;
14331442
}
14341443

@@ -1461,8 +1470,8 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
14611470
MI.getOpcode() != AMDGPU::V_AND_B32_e32)
14621471
return false;
14631472

1464-
MachineOperand *Src0 = getImmOrMaterializedImm(MI.getOperand(1));
1465-
if (!Src0->isImm() || Src0->getImm() != 0xffff)
1473+
std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
1474+
if (!Src0Imm || *Src0Imm != 0xffff)
14661475
return false;
14671476

14681477
Register Src1 = MI.getOperand(2).getReg();

llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,3 +927,37 @@ body: |
927927
S_ENDPGM 0, implicit %3
928928
929929
...
930+
931+
---
932+
name: constant_s_xor_b32_uses_subreg
933+
tracksRegLiveness: true
934+
body: |
935+
bb.0:
936+
; GCN-LABEL: name: constant_s_xor_b32_uses_subreg
937+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
938+
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
939+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]]
940+
%0:sreg_64 = S_MOV_B64 32
941+
%1:sreg_64 = S_MOV_B64 15
942+
%2:sgpr_32 = S_XOR_B32 %0.sub0, %1.sub0, implicit-def dead $scc
943+
%3:sgpr_32 = S_XOR_B32 %0.sub1, %1.sub1, implicit-def dead $scc
944+
S_ENDPGM 0, implicit %2, implicit %3
945+
946+
...
947+
948+
---
949+
name: constant_v_or_b32_uses_subreg
950+
tracksRegLiveness: true
951+
body: |
952+
bb.0:
953+
; GCN-LABEL: name: constant_v_or_b32_uses_subreg
954+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 268435455, implicit $exec
955+
; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
956+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]]
957+
%0:vreg_64 = V_MOV_B64_PSEUDO 18446744069683019775, implicit $exec
958+
%1:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
959+
%2:vgpr_32 = V_OR_B32_e32 %0.sub0, %1.sub0, implicit $exec
960+
%3:vgpr_32 = V_OR_B32_e32 %0.sub1, %1.sub1, implicit $exec
961+
S_ENDPGM 0, implicit %2, implicit %3
962+
963+
...

0 commit comments

Comments
 (0)