Skip to content

Commit 30cbdd6

Browse files
committed
patch 2
1 parent fc6ad72 commit 30cbdd6

File tree

7 files changed

+2888
-3211
lines changed

7 files changed

+2888
-3211
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 46 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -7227,48 +7227,53 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72277227
return DeferredList.contains(MI);
72287228
}
72297229

7230-
// legalize operand between 16bit and 32bit registers in v2s copy
7230+
// Legalize size mismatches between 16bit and 32bit registers in v2s copy
72317231
// lowering (change spgr to vgpr).
72327232
// This is mainly caused by 16bit SALU and 16bit VALU using reg with different
72337233
// size. Need to legalize the size of the operands during the vgpr lowering
72347234
// chain. This can be removed after we have sgpr16 in place
7235-
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
7235+
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI, unsigned OpIdx,
72367236
MachineRegisterInfo &MRI) const {
72377237
if (!ST.useRealTrue16Insts())
72387238
return;
72397239

72407240
unsigned Opcode = MI.getOpcode();
72417241
MachineBasicBlock *MBB = MI.getParent();
7242+
// Legalize operands and check for size mismatch
7243+
if (!OpIdx || OpIdx >= MI.getNumExplicitOperands() ||
7244+
OpIdx >= get(Opcode).getNumOperands())
7245+
return;
72427246

7243-
// legalize operands and check for size mismatch
7244-
for (MachineOperand &Op : MI.explicit_operands()) {
7245-
unsigned OpIdx = Op.getOperandNo();
7246-
if (!OpIdx)
7247-
continue;
7248-
if (Op.isReg() && Op.getReg().isVirtual()) {
7249-
const TargetRegisterClass *DefRC = MRI.getRegClass(Op.getReg());
7250-
if (!RI.isVGPRClass(DefRC))
7251-
continue;
7252-
unsigned RCID = get(Opcode).operands()[OpIdx].RegClass;
7253-
const TargetRegisterClass *UseRC = RI.getRegClass(RCID);
7254-
if (RI.getMatchingSuperRegClass(DefRC, UseRC, AMDGPU::lo16)) {
7255-
Op.setSubReg(AMDGPU::lo16);
7256-
} else if (RI.getMatchingSuperRegClass(UseRC, DefRC, AMDGPU::lo16)) {
7257-
const DebugLoc &DL = MI.getDebugLoc();
7258-
Register NewDstReg =
7259-
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7260-
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7261-
BuildMI(*MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
7262-
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDstReg)
7263-
.addReg(Op.getReg())
7264-
.addImm(AMDGPU::lo16)
7265-
.addReg(Undef)
7266-
.addImm(AMDGPU::hi16);
7267-
Op.setReg(NewDstReg);
7268-
}
7269-
}
7247+
MachineOperand &Op = MI.getOperand(OpIdx);
7248+
if (!Op.isReg() || !Op.getReg().isVirtual())
7249+
return;
7250+
7251+
const TargetRegisterClass *CurrRC = MRI.getRegClass(Op.getReg());
7252+
if (!RI.isVGPRClass(CurrRC))
7253+
return;
7254+
7255+
unsigned RCID = get(Opcode).operands()[OpIdx].RegClass;
7256+
const TargetRegisterClass *ExpectedRC = RI.getRegClass(RCID);
7257+
if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7258+
Op.setSubReg(AMDGPU::lo16);
7259+
} else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7260+
const DebugLoc &DL = MI.getDebugLoc();
7261+
Register NewDstReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7262+
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7263+
BuildMI(*MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
7264+
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDstReg)
7265+
.addReg(Op.getReg())
7266+
.addImm(AMDGPU::lo16)
7267+
.addReg(Undef)
7268+
.addImm(AMDGPU::hi16);
7269+
Op.setReg(NewDstReg);
72707270
}
72717271
}
7272+
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
7273+
MachineRegisterInfo &MRI) const {
7274+
for (unsigned OpIdx = 1; OpIdx < MI.getNumExplicitOperands(); OpIdx++)
7275+
legalizeOperandsVALUt16(MI, OpIdx, MRI);
7276+
}
72727277

72737278
void SIInstrInfo::moveToVALU(SIInstrWorklist &Worklist,
72747279
MachineDominatorTree *MDT) const {
@@ -7789,15 +7794,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77897794
return;
77907795
}
77917796

7792-
// If this is a v2s copy src from 16bit to 32bit,
7793-
// replace vgpr copy to reg_sequence
7797+
// If this is a v2s copy between 16bit and 32bit reg,
7798+
// replace vgpr copy to reg_sequence/extract_subreg
77947799
// This can be remove after we have sgpr16 in place
77957800
if (ST.useRealTrue16Insts() && Inst.isCopy() &&
77967801
Inst.getOperand(1).getReg().isVirtual() &&
77977802
RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
77987803
const TargetRegisterClass *SrcRegRC = getOpRegClass(Inst, 1);
7799-
if (16 == RI.getRegSizeInBits(*SrcRegRC) &&
7800-
32 == RI.getRegSizeInBits(*NewDstRC)) {
7804+
if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
78017805
Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
78027806
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
78037807
BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
@@ -7810,18 +7814,13 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78107814
.addImm(AMDGPU::hi16);
78117815
Inst.eraseFromParent();
78127816
MRI.replaceRegWith(DstReg, NewDstReg);
7813-
// legalize useMI with mismatched size
7814-
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7815-
E = MRI.use_end();
7816-
I != E; ++I) {
7817-
MachineInstr &UseMI = *I->getParent();
7818-
unsigned UseMIOpcode = UseMI.getOpcode();
7819-
if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
7820-
(16 ==
7821-
RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
7822-
I->setSubReg(AMDGPU::lo16);
7823-
}
7824-
}
7817+
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
7818+
return;
7819+
} else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
7820+
AMDGPU::lo16)) {
7821+
Inst.getOperand(1).setSubReg(AMDGPU::lo16);
7822+
Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
7823+
MRI.replaceRegWith(DstReg, NewDstReg);
78257824
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
78267825
return;
78277826
}
@@ -7916,23 +7915,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
79167915
assert(NewDstRC);
79177916
NewDstReg = MRI.createVirtualRegister(NewDstRC);
79187917
MRI.replaceRegWith(DstReg, NewDstReg);
7919-
7920-
// Check useMI of NewInstr. If used by a true16 instruction,
7921-
// add a lo16 subreg access if size mismatched
7922-
// This can be remove after we have sgpr16 in place
7923-
if (ST.useRealTrue16Insts() && NewDstRC == &AMDGPU::VGPR_32RegClass) {
7924-
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7925-
E = MRI.use_end();
7926-
I != E; ++I) {
7927-
MachineInstr &UseMI = *I->getParent();
7928-
unsigned UseMIOpcode = UseMI.getOpcode();
7929-
if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
7930-
(16 ==
7931-
RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
7932-
I->setSubReg(AMDGPU::lo16);
7933-
}
7934-
}
7935-
}
79367918
}
79377919
fixImplicitOperands(*NewInstr);
79387920

@@ -8740,6 +8722,8 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
87408722
++I;
87418723
} while (I != E && I->getParent() == &UseMI);
87428724
} else {
8725+
legalizeOperandsVALUt16(UseMI, OpNo, MRI);
8726+
87438727
++I;
87448728
}
87458729
}

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
13041304
/// Fix operands in Inst to fix 16bit SALU to VALU lowering.
13051305
void legalizeOperandsVALUt16(MachineInstr &Inst,
13061306
MachineRegisterInfo &MRI) const;
1307+
void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx,
1308+
MachineRegisterInfo &MRI) const;
13071309

13081310
/// Replace the instructions opcode with the equivalent VALU
13091311
/// opcode. This function will also move the users of MachineInstruntions

0 commit comments

Comments
 (0)