Skip to content

Commit 83104c0

Browse files
committed
check for vgpr16 putting into vgpr32 case in v2s lowering
1 parent 1fdf02a commit 83104c0

File tree

3 files changed

+596
-259
lines changed

3 files changed

+596
-259
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7221,24 +7221,44 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72217221
return DeferredList.contains(MI);
72227222
}
72237223

7224-
// 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7225-
// sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7226-
// subreg access properly. This can be removed after we have sgpr16 in place
7227-
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &Inst,
7224+
// legalize operand between 16bit and 32bit registers in v2s copy
7225+
// lowering (change spgr to vgpr).
7226+
// This is mainly caused by 16bit SALU and 16bit VALU using reg with different
7227+
// size. Need to legalize the size of the operands during the vgpr lowering
7228+
// chain. This can be removed after we have sgpr16 in place
7229+
void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
72287230
MachineRegisterInfo &MRI) const {
7229-
unsigned Opcode = Inst.getOpcode();
7230-
if (!AMDGPU::isTrue16Inst(Opcode) || !ST.useRealTrue16Insts())
7231+
if (!ST.useRealTrue16Insts())
72317232
return;
72327233

7233-
for (MachineOperand &Op : Inst.explicit_operands()) {
7234+
unsigned Opcode = MI.getOpcode();
7235+
MachineBasicBlock *MBB = MI.getParent();
7236+
7237+
// legalize operands and check for size mismatch
7238+
for (MachineOperand &Op : MI.explicit_operands()) {
72347239
unsigned OpIdx = Op.getOperandNo();
72357240
if (!OpIdx)
72367241
continue;
7237-
if (Op.isReg() && RI.isVGPR(MRI, Op.getReg())) {
7242+
if (Op.isReg() && Op.getReg().isVirtual() && RI.isVGPR(MRI, Op.getReg())) {
72387243
unsigned RCID = get(Opcode).operands()[OpIdx].RegClass;
7239-
const TargetRegisterClass *RC = RI.getRegClass(RCID);
7240-
if (RI.getRegSizeInBits(*RC) == 16) {
7244+
const TargetRegisterClass *ExpectedRC = RI.getRegClass(RCID);
7245+
const TargetRegisterClass *RC = MRI.getRegClass(Op.getReg());
7246+
if (32 == RI.getRegSizeInBits(*RC) &&
7247+
16 == RI.getRegSizeInBits(*ExpectedRC)) {
72417248
Op.setSubReg(AMDGPU::lo16);
7249+
} else if (16 == RI.getRegSizeInBits(*RC) &&
7250+
32 == RI.getRegSizeInBits(*ExpectedRC)) {
7251+
const DebugLoc &DL = MI.getDebugLoc();
7252+
Register NewDstReg =
7253+
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7254+
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7255+
BuildMI(*MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
7256+
BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDstReg)
7257+
.addReg(Op.getReg())
7258+
.addImm(AMDGPU::lo16)
7259+
.addReg(Undef)
7260+
.addImm(AMDGPU::hi16);
7261+
Op.setReg(NewDstReg);
72427262
}
72437263
}
72447264
}
@@ -7783,8 +7803,19 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77837803
.addReg(Undef)
77847804
.addImm(AMDGPU::hi16);
77857805
Inst.eraseFromParent();
7786-
77877806
MRI.replaceRegWith(DstReg, NewDstReg);
7807+
// legalize useMI with mismatched size
7808+
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7809+
E = MRI.use_end();
7810+
I != E; ++I) {
7811+
MachineInstr &UseMI = *I->getParent();
7812+
unsigned UseMIOpcode = UseMI.getOpcode();
7813+
if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
7814+
(16 ==
7815+
RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
7816+
I->setSubReg(AMDGPU::lo16);
7817+
}
7818+
}
77887819
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
77897820
return;
77907821
}

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,57 @@ body: |
5757
%4:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %3:sreg_32, 0, 0, 0, implicit $mode, implicit $exec
5858
...
5959

60+
---
61+
name: salu16_usedby_salu32
62+
body: |
63+
bb.0:
64+
; GCN-LABEL: name: salu16_usedby_salu32
65+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
66+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
67+
; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[DEF]].lo16, 0, 0, 0, implicit $mode, implicit $exec
68+
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
69+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_TRUNC_F16_t16_e64_]], %subreg.lo16, [[DEF2]], %subreg.hi16
70+
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[REG_SEQUENCE]], [[DEF]], implicit $exec
71+
%0:vgpr_32 = IMPLICIT_DEF
72+
%1:sreg_32 = COPY %0:vgpr_32
73+
%2:sreg_32 = S_TRUNC_F16 %1:sreg_32, implicit $mode
74+
%3:sreg_32 = S_XOR_B32 %2:sreg_32, %1:sreg_32, implicit-def $scc
75+
...
76+
77+
---
78+
name: salu32_usedby_salu16
79+
body: |
80+
bb.0:
81+
; GCN-LABEL: name: salu32_usedby_salu16
82+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
83+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
84+
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[DEF]], [[DEF]], implicit $exec
85+
; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[V_XOR_B32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
86+
%0:vgpr_32 = IMPLICIT_DEF
87+
%1:sreg_32 = COPY %0:vgpr_32
88+
%2:sreg_32 = S_XOR_B32 %1:sreg_32, %1:sreg_32, implicit-def $scc
89+
%3:sreg_32 = S_TRUNC_F16 %2:sreg_32, implicit $mode
90+
...
91+
92+
---
93+
name: S_FMAC_F16
94+
body: |
95+
bb.0:
96+
; GCN-LABEL: name: S_FMAC_F16
97+
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
98+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_lo16 = IMPLICIT_DEF
99+
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
100+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF2]], %subreg.hi16
101+
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
102+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF3]], %subreg.hi16
103+
; GCN-NEXT: [[V_FMAC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_FMAC_F16_t16_e64 0, [[REG_SEQUENCE1]].lo16, 0, [[REG_SEQUENCE1]].lo16, 0, [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
104+
%0:vgpr_16 = IMPLICIT_DEF
105+
%1:sgpr_lo16 = COPY %0:vgpr_16
106+
%2:sreg_32 = COPY %0:vgpr_16
107+
%3:sreg_32 = COPY %1:sgpr_lo16
108+
%4:sreg_32 = S_FMAC_F16 %3:sreg_32, %3:sreg_32, %2:sreg_32, implicit $mode
109+
...
110+
60111
---
61112
name: vgpr16_to_spgr32
62113
body: |

0 commit comments

Comments
 (0)