Skip to content

Commit d978a96

Browse files
committed
AMDGPU: Handle subregister uses in SIFoldOperands constant folding
1 parent 5eada95 commit d978a96

File tree

2 files changed

+67
-24
lines changed

2 files changed

+67
-24
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class SIFoldOperandsImpl {
123123
SmallVectorImpl<FoldCandidate> &FoldList,
124124
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
125125

126-
MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const;
126+
std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
127127
bool tryConstantFoldOp(MachineInstr *MI) const;
128128
bool tryFoldCndMask(MachineInstr &MI) const;
129129
bool tryFoldZeroHighBits(MachineInstr &MI) const;
@@ -1298,21 +1298,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
12981298
MI.removeOperand(I);
12991299
}
13001300

1301-
MachineOperand *
1301+
std::optional<int64_t>
13021302
SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const {
1303-
// If this has a subregister, it obviously is a register source.
1304-
if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister ||
1305-
!Op.getReg().isVirtual())
1306-
return &Op;
1303+
if (Op.isImm())
1304+
return Op.getImm();
13071305

1308-
MachineInstr *Def = MRI->getVRegDef(Op.getReg());
1306+
if (!Op.isReg() || !Op.getReg().isVirtual())
1307+
return std::nullopt;
1308+
1309+
const MachineInstr *Def = MRI->getVRegDef(Op.getReg());
13091310
if (Def && Def->isMoveImmediate()) {
1310-
MachineOperand &ImmSrc = Def->getOperand(1);
1311+
const MachineOperand &ImmSrc = Def->getOperand(1);
13111312
if (ImmSrc.isImm())
1312-
return &ImmSrc;
1313+
return TII->extractSubregFromImm(ImmSrc.getImm(), Op.getSubReg());
13131314
}
13141315

1315-
return &Op;
1316+
return std::nullopt;
13161317
}
13171318

13181319
// Try to simplify operations with a constant that may appear after instruction
@@ -1327,30 +1328,34 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
13271328
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
13281329
if (Src0Idx == -1)
13291330
return false;
1330-
MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx));
1331+
1332+
MachineOperand *Src0 = &MI->getOperand(Src0Idx);
1333+
std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
13311334

13321335
if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
13331336
Opc == AMDGPU::S_NOT_B32) &&
1334-
Src0->isImm()) {
1335-
MI->getOperand(1).ChangeToImmediate(~Src0->getImm());
1337+
Src0Imm) {
1338+
MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
13361339
mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
13371340
return true;
13381341
}
13391342

13401343
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
13411344
if (Src1Idx == -1)
13421345
return false;
1343-
MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx));
13441346

1345-
if (!Src0->isImm() && !Src1->isImm())
1347+
MachineOperand *Src1 = &MI->getOperand(Src1Idx);
1348+
std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1349+
1350+
if (!Src0Imm && !Src1Imm)
13461351
return false;
13471352

13481353
// and k0, k1 -> v_mov_b32 (k0 & k1)
13491354
// or k0, k1 -> v_mov_b32 (k0 | k1)
13501355
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
1351-
if (Src0->isImm() && Src1->isImm()) {
1356+
if (Src0Imm && Src1Imm) {
13521357
int32_t NewImm;
1353-
if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
1358+
if (!evalBinaryInstruction(Opc, NewImm, *Src0Imm, *Src1Imm))
13541359
return false;
13551360

13561361
bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg());
@@ -1366,12 +1371,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
13661371
if (!MI->isCommutable())
13671372
return false;
13681373

1369-
if (Src0->isImm() && !Src1->isImm()) {
1374+
if (Src0Imm && !Src1Imm) {
13701375
std::swap(Src0, Src1);
13711376
std::swap(Src0Idx, Src1Idx);
1377+
std::swap(Src0Imm, Src1Imm);
13721378
}
13731379

1374-
int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
1380+
int32_t Src1Val = static_cast<int32_t>(*Src1Imm);
13751381
if (Opc == AMDGPU::V_OR_B32_e64 ||
13761382
Opc == AMDGPU::V_OR_B32_e32 ||
13771383
Opc == AMDGPU::S_OR_B32) {
@@ -1428,9 +1434,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
14281434
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
14291435
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
14301436
if (!Src1->isIdenticalTo(*Src0)) {
1431-
auto *Src0Imm = getImmOrMaterializedImm(*Src0);
1432-
auto *Src1Imm = getImmOrMaterializedImm(*Src1);
1433-
if (!Src1Imm->isIdenticalTo(*Src0Imm))
1437+
std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1);
1438+
if (!Src1Imm)
1439+
return false;
1440+
1441+
std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0);
1442+
if (!Src0Imm || *Src0Imm != *Src1Imm)
14341443
return false;
14351444
}
14361445

@@ -1463,8 +1472,8 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
14631472
MI.getOpcode() != AMDGPU::V_AND_B32_e32)
14641473
return false;
14651474

1466-
MachineOperand *Src0 = getImmOrMaterializedImm(MI.getOperand(1));
1467-
if (!Src0->isImm() || Src0->getImm() != 0xffff)
1475+
std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1));
1476+
if (!Src0Imm || *Src0Imm != 0xffff)
14681477
return false;
14691478

14701479
Register Src1 = MI.getOperand(2).getReg();

llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,3 +927,37 @@ body: |
927927
S_ENDPGM 0, implicit %3
928928
929929
...
930+
931+
---
932+
name: constant_s_xor_b32_uses_subreg
933+
tracksRegLiveness: true
934+
body: |
935+
bb.0:
936+
; GCN-LABEL: name: constant_s_xor_b32_uses_subreg
937+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
938+
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
939+
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]]
940+
%0:sreg_64 = S_MOV_B64 32
941+
%1:sreg_64 = S_MOV_B64 15
942+
%2:sgpr_32 = S_XOR_B32 %0.sub0, %1.sub0, implicit-def dead $scc
943+
%3:sgpr_32 = S_XOR_B32 %0.sub1, %1.sub1, implicit-def dead $scc
944+
S_ENDPGM 0, implicit %2, implicit %3
945+
946+
...
947+
948+
---
949+
name: constant_v_or_b32_uses_subreg
950+
tracksRegLiveness: true
951+
body: |
952+
bb.0:
953+
; GCN-LABEL: name: constant_v_or_b32_uses_subreg
954+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 268435455, implicit $exec
955+
; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
956+
; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]]
957+
%0:vreg_64 = V_MOV_B64_PSEUDO 18446744069683019775, implicit $exec
958+
%1:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
959+
%2:vgpr_32 = V_OR_B32_e32 %0.sub0, %1.sub0, implicit $exec
960+
%3:vgpr_32 = V_OR_B32_e32 %0.sub1, %1.sub1, implicit $exec
961+
S_ENDPGM 0, implicit %2, implicit %3
962+
963+
...

0 commit comments

Comments
 (0)