Skip to content

[AMDGPU][True16][CodeGen] set true16 mode as default for gfx11 #138912

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1780,7 +1780,9 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureImageInsts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
FeatureMemoryAtomicFAddF32DenormalSupport]>;
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureRealTrue16Insts
]>;

// There are few workarounds that need to be
// added to all targets. This pessimizes codegen
Expand Down
53 changes: 42 additions & 11 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7221,24 +7221,44 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
return DeferredList.contains(MI);
}

// 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
// sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
// subreg access properly. This can be removed after we have sgpr16 in place
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &Inst,
// legalize operand between 16bit and 32bit registers in v2s copy
// lowering (change spgr to vgpr).
// This is mainly caused by 16bit SALU and 16bit VALU using reg with different
// size. Need to legalize the size of the operands during the vgpr lowering
// chain. This can be removed after we have sgpr16 in place
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
MachineRegisterInfo &MRI) const {
unsigned Opcode = Inst.getOpcode();
if (!AMDGPU::isTrue16Inst(Opcode) || !ST.useRealTrue16Insts())
if (!ST.useRealTrue16Insts())
return;

for (MachineOperand &Op : Inst.explicit_operands()) {
unsigned Opcode = MI.getOpcode();
MachineBasicBlock *MBB = MI.getParent();

// legalize operands and check for size mismatch
for (MachineOperand &Op : MI.explicit_operands()) {
unsigned OpIdx = Op.getOperandNo();
if (!OpIdx)
continue;
if (Op.isReg() && RI.isVGPR(MRI, Op.getReg())) {
if (Op.isReg() && Op.getReg().isVirtual() && RI.isVGPR(MRI, Op.getReg())) {
unsigned RCID = get(Opcode).operands()[OpIdx].RegClass;
const TargetRegisterClass *RC = RI.getRegClass(RCID);
if (RI.getRegSizeInBits(*RC) == 16) {
const TargetRegisterClass *ExpectedRC = RI.getRegClass(RCID);
const TargetRegisterClass *RC = MRI.getRegClass(Op.getReg());
if (32 == RI.getRegSizeInBits(*RC) &&
16 == RI.getRegSizeInBits(*ExpectedRC)) {
Op.setSubReg(AMDGPU::lo16);
} else if (16 == RI.getRegSizeInBits(*RC) &&
32 == RI.getRegSizeInBits(*ExpectedRC)) {
const DebugLoc &DL = MI.getDebugLoc();
Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
BuildMI(*MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDstReg)
.addReg(Op.getReg())
.addImm(AMDGPU::lo16)
.addReg(Undef)
.addImm(AMDGPU::hi16);
Op.setReg(NewDstReg);
}
}
}
Expand Down Expand Up @@ -7783,8 +7803,19 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
.addReg(Undef)
.addImm(AMDGPU::hi16);
Inst.eraseFromParent();

MRI.replaceRegWith(DstReg, NewDstReg);
// legalize useMI with mismatched size
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
E = MRI.use_end();
I != E; ++I) {
MachineInstr &UseMI = *I->getParent();
unsigned UseMIOpcode = UseMI.getOpcode();
if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
(16 ==
RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
I->setSubReg(AMDGPU::lo16);
}
}
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
return;
}
Expand Down
Loading
Loading