Skip to content

Commit 63ad0f3

Browse files
committed
patch 2
1 parent fc6ad72 commit 63ad0f3

File tree

7 files changed

+2889
-3210
lines changed

7 files changed

+2889
-3210
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 47 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -7232,43 +7232,50 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72327232
// This is mainly caused by 16bit SALU and 16bit VALU using reg with different
72337233
// size. Need to legalize the size of the operands during the vgpr lowering
72347234
// chain. This can be removed after we have sgpr16 in place
7235-
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
7235+
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI, unsigned OpIdx,
72367236
MachineRegisterInfo &MRI) const {
72377237
if (!ST.useRealTrue16Insts())
72387238
return;
72397239

72407240
unsigned Opcode = MI.getOpcode();
72417241
MachineBasicBlock *MBB = MI.getParent();
7242-
72437242
// legalize operands and check for size mismatch
7244-
for (MachineOperand &Op : MI.explicit_operands()) {
7245-
unsigned OpIdx = Op.getOperandNo();
7246-
if (!OpIdx)
7247-
continue;
7248-
if (Op.isReg() && Op.getReg().isVirtual()) {
7249-
const TargetRegisterClass *DefRC = MRI.getRegClass(Op.getReg());
7250-
if (!RI.isVGPRClass(DefRC))
7251-
continue;
7252-
unsigned RCID = get(Opcode).operands()[OpIdx].RegClass;
7253-
const TargetRegisterClass *UseRC = RI.getRegClass(RCID);
7254-
if (RI.getMatchingSuperRegClass(DefRC, UseRC, AMDGPU::lo16)) {
7255-
Op.setSubReg(AMDGPU::lo16);
7256-
} else if (RI.getMatchingSuperRegClass(UseRC, DefRC, AMDGPU::lo16)) {
7257-
const DebugLoc &DL = MI.getDebugLoc();
7258-
Register NewDstReg =
7259-
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7260-
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7261-
BuildMI(*MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
7262-
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDstReg)
7263-
.addReg(Op.getReg())
7264-
.addImm(AMDGPU::lo16)
7265-
.addReg(Undef)
7266-
.addImm(AMDGPU::hi16);
7267-
Op.setReg(NewDstReg);
7268-
}
7269-
}
7243+
if (!OpIdx || OpIdx >= MI.getNumExplicitOperands())
7244+
return;
7245+
7246+
MachineOperand &Op = MI.getOperand(OpIdx);
7247+
if (!Op.isReg() || !Op.getReg().isVirtual())
7248+
return;
7249+
7250+
const TargetRegisterClass *CurrRC = MRI.getRegClass(Op.getReg());
7251+
if (!RI.isVGPRClass(CurrRC))
7252+
return;
7253+
7254+
if (OpIdx >= get(Opcode).getNumOperands())
7255+
return;
7256+
7257+
unsigned RCID = get(Opcode).operands()[OpIdx].RegClass;
7258+
const TargetRegisterClass *ExpectedRC = RI.getRegClass(RCID);
7259+
if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7260+
Op.setSubReg(AMDGPU::lo16);
7261+
} else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7262+
const DebugLoc &DL = MI.getDebugLoc();
7263+
Register NewDstReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7264+
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7265+
BuildMI(*MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
7266+
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDstReg)
7267+
.addReg(Op.getReg())
7268+
.addImm(AMDGPU::lo16)
7269+
.addReg(Undef)
7270+
.addImm(AMDGPU::hi16);
7271+
Op.setReg(NewDstReg);
72707272
}
72717273
}
7274+
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
7275+
MachineRegisterInfo &MRI) const {
7276+
for (unsigned OpIdx = 1; OpIdx < MI.getNumExplicitOperands(); OpIdx++)
7277+
legalizeOperandsVALUt16(MI, OpIdx, MRI);
7278+
}
72727279

72737280
void SIInstrInfo::moveToVALU(SIInstrWorklist &Worklist,
72747281
MachineDominatorTree *MDT) const {
@@ -7789,15 +7796,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77897796
return;
77907797
}
77917798

7792-
// If this is a v2s copy src from 16bit to 32bit,
7793-
// replace vgpr copy to reg_sequence
7799+
// If this is a v2s copy between 16bit and 32bit reg,
7800+
// replace vgpr copy to reg_sequence/extract_subreg
77947801
// This can be remove after we have sgpr16 in place
77957802
if (ST.useRealTrue16Insts() && Inst.isCopy() &&
77967803
Inst.getOperand(1).getReg().isVirtual() &&
77977804
RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
77987805
const TargetRegisterClass *SrcRegRC = getOpRegClass(Inst, 1);
7799-
if (16 == RI.getRegSizeInBits(*SrcRegRC) &&
7800-
32 == RI.getRegSizeInBits(*NewDstRC)) {
7806+
if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
78017807
Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
78027808
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
78037809
BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
@@ -7810,18 +7816,13 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78107816
.addImm(AMDGPU::hi16);
78117817
Inst.eraseFromParent();
78127818
MRI.replaceRegWith(DstReg, NewDstReg);
7813-
// legalize useMI with mismatched size
7814-
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7815-
E = MRI.use_end();
7816-
I != E; ++I) {
7817-
MachineInstr &UseMI = *I->getParent();
7818-
unsigned UseMIOpcode = UseMI.getOpcode();
7819-
if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
7820-
(16 ==
7821-
RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
7822-
I->setSubReg(AMDGPU::lo16);
7823-
}
7824-
}
7819+
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
7820+
return;
7821+
} else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
7822+
AMDGPU::lo16)) {
7823+
Inst.getOperand(1).setSubReg(AMDGPU::lo16);
7824+
Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
7825+
MRI.replaceRegWith(DstReg, NewDstReg);
78257826
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
78267827
return;
78277828
}
@@ -7916,23 +7917,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
79167917
assert(NewDstRC);
79177918
NewDstReg = MRI.createVirtualRegister(NewDstRC);
79187919
MRI.replaceRegWith(DstReg, NewDstReg);
7919-
7920-
// Check useMI of NewInstr. If used by a true16 instruction,
7921-
// add a lo16 subreg access if size mismatched
7922-
// This can be remove after we have sgpr16 in place
7923-
if (ST.useRealTrue16Insts() && NewDstRC == &AMDGPU::VGPR_32RegClass) {
7924-
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7925-
E = MRI.use_end();
7926-
I != E; ++I) {
7927-
MachineInstr &UseMI = *I->getParent();
7928-
unsigned UseMIOpcode = UseMI.getOpcode();
7929-
if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
7930-
(16 ==
7931-
RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
7932-
I->setSubReg(AMDGPU::lo16);
7933-
}
7934-
}
7935-
}
79367920
}
79377921
fixImplicitOperands(*NewInstr);
79387922

@@ -8740,6 +8724,8 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
87408724
++I;
87418725
} while (I != E && I->getParent() == &UseMI);
87428726
} else {
8727+
legalizeOperandsVALUt16(UseMI, OpNo, MRI);
8728+
87438729
++I;
87448730
}
87458731
}

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
13041304
/// Fix operands in Inst to fix 16bit SALU to VALU lowering.
13051305
void legalizeOperandsVALUt16(MachineInstr &Inst,
13061306
MachineRegisterInfo &MRI) const;
1307+
void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx,
1308+
MachineRegisterInfo &MRI) const;
13071309

13081310
/// Replace the instructions opcode with the equivalent VALU
13091311
/// opcode. This function will also move the users of MachineInstruntions

0 commit comments

Comments
 (0)