Skip to content

Commit 59fc3e5

Browse files
committed
check for vgpr16 putting into vgpr32 case in v2s lowering
1 parent cd6c4b6 commit 59fc3e5

File tree

3 files changed

+554
-265
lines changed

3 files changed

+554
-265
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7235,24 +7235,44 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72357235
return DeferredList.contains(MI);
72367236
}
72377237

7238-
// 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7239-
// sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7240-
// subreg access properly. This can be removed after we have sgpr16 in place
7241-
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &Inst,
7238+
// legalize operand between 16bit and 32bit registers in v2s copy
7239+
// lowering (change spgr to vgpr).
7240+
// This is mainly caused by 16bit SALU and 16bit VALU using reg with different
7241+
// size. Need to legalize the size of the operands during the vgpr lowering
7242+
// chain. This can be removed after we have sgpr16 in place
7243+
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
72427244
MachineRegisterInfo &MRI) const {
7243-
unsigned Opcode = Inst.getOpcode();
7244-
if (!AMDGPU::isTrue16Inst(Opcode) || !ST.useRealTrue16Insts())
7245+
if (!ST.useRealTrue16Insts())
72457246
return;
72467247

7247-
for (MachineOperand &Op : Inst.explicit_operands()) {
7248+
unsigned Opcode = MI.getOpcode();
7249+
MachineBasicBlock *MBB = MI.getParent();
7250+
7251+
// legalize operands and check for size mismatch
7252+
for (MachineOperand &Op : MI.explicit_operands()) {
72487253
unsigned OpIdx = Op.getOperandNo();
72497254
if (!OpIdx)
72507255
continue;
7251-
if (Op.isReg() && RI.isVGPR(MRI, Op.getReg())) {
7256+
if (Op.isReg() && Op.getReg().isVirtual() && RI.isVGPR(MRI, Op.getReg())) {
72527257
unsigned RCID = get(Opcode).operands()[OpIdx].RegClass;
7253-
const TargetRegisterClass *RC = RI.getRegClass(RCID);
7254-
if (RI.getRegSizeInBits(*RC) == 16) {
7258+
const TargetRegisterClass *ExpectedRC = RI.getRegClass(RCID);
7259+
const TargetRegisterClass *RC = MRI.getRegClass(Op.getReg());
7260+
if (32 == RI.getRegSizeInBits(*RC) &&
7261+
16 == RI.getRegSizeInBits(*ExpectedRC)) {
72557262
Op.setSubReg(AMDGPU::lo16);
7263+
} else if (16 == RI.getRegSizeInBits(*RC) &&
7264+
32 == RI.getRegSizeInBits(*ExpectedRC)) {
7265+
const DebugLoc &DL = MI.getDebugLoc();
7266+
Register NewDstReg =
7267+
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7268+
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7269+
BuildMI(*MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
7270+
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDstReg)
7271+
.addReg(Op.getReg())
7272+
.addImm(AMDGPU::lo16)
7273+
.addReg(Undef)
7274+
.addImm(AMDGPU::hi16);
7275+
Op.setReg(NewDstReg);
72567276
}
72577277
}
72587278
}
@@ -7793,8 +7813,19 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77937813
.add(Inst.getOperand(1))
77947814
.add(MachineOperand::CreateImm(AMDGPU::lo16));
77957815
Inst.eraseFromParent();
7796-
77977816
MRI.replaceRegWith(DstReg, NewDstReg);
7817+
// legalize useMI with mismatched size
7818+
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7819+
E = MRI.use_end();
7820+
I != E; ++I) {
7821+
MachineInstr &UseMI = *I->getParent();
7822+
unsigned UseMIOpcode = UseMI.getOpcode();
7823+
if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
7824+
(16 ==
7825+
RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
7826+
I->setSubReg(AMDGPU::lo16);
7827+
}
7828+
}
77987829
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
77997830
return;
78007831
}

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ body: |
99
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
1010
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1111
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
12-
; GCN-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:vgpr_32 = SUBREG_TO_REG 0, [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16
13-
; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, killed [[SUBREG_TO_REG]].lo16, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
12+
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
13+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16, [[DEF2]], %subreg.hi16
14+
; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
1415
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec
1516
%0:vgpr_16 = IMPLICIT_DEF
1617
%1:sreg_32 = IMPLICIT_DEF
@@ -28,8 +29,9 @@ body: |
2829
; GCN-LABEL: name: cvt_hi_f32_f16
2930
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
3031
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
31-
; GCN-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:vgpr_32 = SUBREG_TO_REG 0, [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16
32-
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[SUBREG_TO_REG]]
32+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
33+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
34+
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]]
3335
; GCN-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[COPY]].hi16, 0, 0, 0, implicit $mode, implicit $exec
3436
%0:vgpr_16 = IMPLICIT_DEF
3537
%1:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
@@ -44,8 +46,9 @@ body: |
4446
; GCN-LABEL: name: s_or_b32
4547
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
4648
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
47-
; GCN-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:vgpr_32 = SUBREG_TO_REG 0, [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16
48-
; GCN-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[SUBREG_TO_REG]], [[SUBREG_TO_REG]], implicit $exec
49+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
50+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
51+
; GCN-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[REG_SEQUENCE]], [[REG_SEQUENCE]], implicit $exec
4952
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[V_OR_B32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
5053
%0:vgpr_16 = IMPLICIT_DEF
5154
%1:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec

0 commit comments

Comments
 (0)