Skip to content

Commit 8615eeb

Browse files
committed
AMDGPU: Partially merge indirect register write handling
a785209 switched to using a pseudos instead of manually tying operands on the regular instruction. The VGPR indexing mode path should have the same problems that change attempted to avoid, so these should use the same strategy. Use a single pseudo for the VGPR indexing mode and movreld paths, and expand it based on the subtarget later. These have essentially the same constraints, reading the index from m0. Switch from using an offset to the subregister index directly, instead of computing an offset and re-adding it back. Also add missing pseudos for existing register class sizes.
1 parent c12a591 commit 8615eeb

File tree

3 files changed

+61
-70
lines changed

3 files changed

+61
-70
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 26 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3465,21 +3465,27 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
34653465
return LoopBB;
34663466
}
34673467

3468-
static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI,
3468+
static unsigned getIndirectRegWritePseudo(const SIRegisterInfo &TRI,
34693469
const TargetRegisterClass *VecRC) {
34703470
switch (TRI.getRegSizeInBits(*VecRC)) {
34713471
case 32: // 4 bytes
3472-
return AMDGPU::V_MOVRELD_B32_V1;
3472+
return AMDGPU::V_INDIRECT_REG_WRITE_B32_V1;
34733473
case 64: // 8 bytes
3474-
return AMDGPU::V_MOVRELD_B32_V2;
3474+
return AMDGPU::V_INDIRECT_REG_WRITE_B32_V2;
3475+
case 96: // 12 bytes
3476+
return AMDGPU::V_INDIRECT_REG_WRITE_B32_V3;
34753477
case 128: // 16 bytes
3476-
return AMDGPU::V_MOVRELD_B32_V4;
3478+
return AMDGPU::V_INDIRECT_REG_WRITE_B32_V4;
3479+
case 160: // 20 bytes
3480+
return AMDGPU::V_INDIRECT_REG_WRITE_B32_V5;
34773481
case 256: // 32 bytes
3478-
return AMDGPU::V_MOVRELD_B32_V8;
3482+
return AMDGPU::V_INDIRECT_REG_WRITE_B32_V8;
34793483
case 512: // 64 bytes
3480-
return AMDGPU::V_MOVRELD_B32_V16;
3484+
return AMDGPU::V_INDIRECT_REG_WRITE_B32_V16;
3485+
case 1024: // 128 bytes
3486+
return AMDGPU::V_INDIRECT_REG_WRITE_B32_V32;
34813487
default:
3482-
llvm_unreachable("unsupported size for MOVRELD pseudos");
3488+
llvm_unreachable("unsupported size for IndirectRegWrite pseudos");
34833489
}
34843490
}
34853491

@@ -3526,24 +3532,14 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
35263532
MachineBasicBlock::iterator I(&MI);
35273533
const DebugLoc &DL = MI.getDebugLoc();
35283534

3529-
if (UseGPRIdxMode) {
3530-
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
3531-
.addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
3532-
.add(*Val)
3533-
.addReg(Dst, RegState::ImplicitDefine)
3534-
.addReg(SrcVec->getReg(), RegState::Implicit)
3535-
.addReg(AMDGPU::M0, RegState::Implicit);
3536-
3535+
const MCInstrDesc &MovRelDesc
3536+
= TII->get(getIndirectRegWritePseudo(TRI, VecRC));
3537+
BuildMI(MBB, I, DL, MovRelDesc, Dst)
3538+
.addReg(SrcVec->getReg())
3539+
.add(*Val)
3540+
.addImm(SubReg);
3541+
if (UseGPRIdxMode)
35373542
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
3538-
} else {
3539-
const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC));
3540-
3541-
BuildMI(MBB, I, DL, MovRelDesc)
3542-
.addReg(Dst, RegState::Define)
3543-
.addReg(SrcVec->getReg())
3544-
.add(*Val)
3545-
.addImm(SubReg - AMDGPU::sub0);
3546-
}
35473543

35483544
MI.eraseFromParent();
35493545
return &MBB;
@@ -3560,26 +3556,15 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
35603556
Offset, UseGPRIdxMode, false);
35613557
MachineBasicBlock *LoopBB = InsPt->getParent();
35623558

3563-
if (UseGPRIdxMode) {
3564-
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
3565-
.addReg(PhiReg, RegState::Undef, SubReg) // vdst
3566-
.add(*Val) // src0
3567-
.addReg(Dst, RegState::ImplicitDefine)
3568-
.addReg(PhiReg, RegState::Implicit)
3569-
.addReg(AMDGPU::M0, RegState::Implicit);
3559+
const MCInstrDesc &MovRelDesc = TII->get(getIndirectRegWritePseudo(TRI, VecRC));
3560+
BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
3561+
.addReg(PhiReg)
3562+
.add(*Val)
3563+
.addImm(AMDGPU::sub0);
3564+
if (UseGPRIdxMode)
35703565
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
3571-
} else {
3572-
const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC));
3573-
3574-
BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
3575-
.addReg(Dst, RegState::Define)
3576-
.addReg(PhiReg)
3577-
.add(*Val)
3578-
.addImm(SubReg - AMDGPU::sub0);
3579-
}
35803566

35813567
MI.eraseFromParent();
3582-
35833568
return LoopBB;
35843569
}
35853570

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,30 +1482,33 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
14821482
MI.eraseFromParent();
14831483
break;
14841484
}
1485-
case AMDGPU::V_MOVRELD_B32_V1:
1486-
case AMDGPU::V_MOVRELD_B32_V2:
1487-
case AMDGPU::V_MOVRELD_B32_V4:
1488-
case AMDGPU::V_MOVRELD_B32_V8:
1489-
case AMDGPU::V_MOVRELD_B32_V16: {
1490-
const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
1485+
case AMDGPU::V_INDIRECT_REG_WRITE_B32_V1:
1486+
case AMDGPU::V_INDIRECT_REG_WRITE_B32_V2:
1487+
case AMDGPU::V_INDIRECT_REG_WRITE_B32_V3:
1488+
case AMDGPU::V_INDIRECT_REG_WRITE_B32_V4:
1489+
case AMDGPU::V_INDIRECT_REG_WRITE_B32_V5:
1490+
case AMDGPU::V_INDIRECT_REG_WRITE_B32_V8:
1491+
case AMDGPU::V_INDIRECT_REG_WRITE_B32_V16:
1492+
case AMDGPU::V_INDIRECT_REG_WRITE_B32_V32: {
1493+
unsigned Opc = ST.useVGPRIndexMode() ?
1494+
AMDGPU::V_MOV_B32_indirect : AMDGPU::V_MOVRELD_B32_e32;
1495+
const MCInstrDesc &OpDesc = get(Opc);
14911496
Register VecReg = MI.getOperand(0).getReg();
14921497
bool IsUndef = MI.getOperand(1).isUndef();
1493-
unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
1498+
unsigned SubReg = MI.getOperand(3).getImm();
14941499
assert(VecReg == MI.getOperand(1).getReg());
14951500

1496-
MachineInstr *MovRel =
1497-
BuildMI(MBB, MI, DL, MovRelDesc)
1498-
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
1499-
.add(MI.getOperand(2))
1500-
.addReg(VecReg, RegState::ImplicitDefine)
1501-
.addReg(VecReg,
1502-
RegState::Implicit | (IsUndef ? RegState::Undef : 0));
1501+
MachineInstrBuilder MIB =
1502+
BuildMI(MBB, MI, DL, OpDesc)
1503+
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
1504+
.add(MI.getOperand(2))
1505+
.addReg(VecReg, RegState::ImplicitDefine)
1506+
.addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
15031507

15041508
const int ImpDefIdx =
1505-
MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
1509+
OpDesc.getNumOperands() + OpDesc.getNumImplicitUses();
15061510
const int ImpUseIdx = ImpDefIdx + 1;
1507-
MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
1508-
1511+
MIB->tieOperands(ImpDefIdx, ImpUseIdx);
15091512
MI.eraseFromParent();
15101513
break;
15111514
}

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -812,25 +812,28 @@ def V_MOV_B32_indirect : VPseudoInstSI<(outs),
812812
let SubtargetPredicate = isGFX8GFX9;
813813
}
814814

815-
// This is a pseudo variant of the v_movreld_b32 instruction in which the
816-
// vector operand appears only twice, once as def and once as use. Using this
817-
// pseudo avoids problems with the Two Address instructions pass.
818-
class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
815+
// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32
816+
// expecting to be executed with gpr indexing mode enabled)
817+
// instruction in which the vector operand appears only twice, once as
818+
// def and once as use. Using this pseudo avoids problems with the Two
819+
// Address instructions pass.
820+
class V_INDIRECT_REG_WRITE_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
819821
(outs rc:$vdst),
820-
(ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> {
822+
(ins rc:$vsrc, VSrc_b32:$val, i32imm:$subreg)> {
821823
let VOP1 = 1;
822824

823825
let Constraints = "$vsrc = $vdst";
824826
let Uses = [M0, EXEC];
825-
826-
let SubtargetPredicate = HasMovrel;
827827
}
828828

829-
def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>;
830-
def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>;
831-
def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
832-
def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
833-
def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
829+
def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo<VGPR_32>;
830+
def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_64>;
831+
def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_96>;
832+
def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_128>;
833+
def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_160>;
834+
def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_256>;
835+
def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_512>;
836+
def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo<VReg_1024>;
834837

835838
let OtherPredicates = [isGFX8Plus] in {
836839

0 commit comments

Comments
 (0)