Skip to content

Commit 2945f24

Browse files
committed
fix moveToVALU in true16
1 parent 55d3a55 commit 2945f24

File tree

11 files changed

+1890
-1050
lines changed

11 files changed

+1890
-1050
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7742,6 +7742,29 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77427742
Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
77437743
return;
77447744
}
7745+
7746+
// in true16 mode, if this is a v2s copy src between vgpr16 and sgpr32,
7747+
// replace vgpr copy to subreg_to_reg
7748+
if (ST.useRealTrue16Insts() && Inst.isCopy() &&
7749+
Inst.getOperand(1).getReg().isVirtual() &&
7750+
RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
7751+
const TargetRegisterClass *SrcRegRC = getOpRegClass(Inst, 1);
7752+
if (16 == RI.getRegSizeInBits(*SrcRegRC) &&
7753+
32 == RI.getRegSizeInBits(*NewDstRC)) {
7754+
Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
7755+
BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
7756+
get(TargetOpcode::SUBREG_TO_REG), NewDstReg)
7757+
.add(MachineOperand::CreateImm(0))
7758+
.add(Inst.getOperand(1))
7759+
.add(MachineOperand::CreateImm(AMDGPU::lo16));
7760+
Inst.eraseFromParent();
7761+
7762+
MRI.replaceRegWith(DstReg, NewDstReg);
7763+
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
7764+
return;
7765+
}
7766+
}
7767+
77457768
Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
77467769
MRI.replaceRegWith(DstReg, NewDstReg);
77477770
legalizeOperands(Inst, MDT);
@@ -7835,6 +7858,22 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78357858
assert(NewDstRC);
78367859
NewDstReg = MRI.createVirtualRegister(NewDstRC);
78377860
MRI.replaceRegWith(DstReg, NewDstReg);
7861+
7862+
// Check useMI of NewInstr. If used by a true16 instruction,
7863+
// add a lo16 subreg access if size mismatched
7864+
if (ST.useRealTrue16Insts() && NewDstRC == &AMDGPU::VGPR_32RegClass) {
7865+
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
7866+
E = MRI.use_end();
7867+
I != E; ++I) {
7868+
MachineInstr &UseMI = *I->getParent();
7869+
unsigned UseMIOpcode = UseMI.getOpcode();
7870+
if (AMDGPU::isTrue16Inst(UseMIOpcode) &&
7871+
(16 ==
7872+
RI.getRegSizeInBits(*getOpRegClass(UseMI, I.getOperandNo())))) {
7873+
I->setSubReg(AMDGPU::lo16);
7874+
}
7875+
}
7876+
}
78387877
}
78397878
fixImplicitOperands(*NewInstr);
78407879
// Legalize the operands
Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,35 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
3-
# XFAIL: *
4-
# FIXME-TRUE16 reenable after fix-sgpr-copies is updated for true16 flow
53

64
---
7-
name: cmp_f16
5+
name: cvt_hi_f32_f16
86
body: |
9-
bb.0.entry:
10-
; GCN-LABEL: name: cmp_f16
7+
bb.0:
8+
; GCN-LABEL: name: cvt_hi_f32_f16
119
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
12-
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1310
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
14-
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_U16_t16_e64_]]
15-
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY killed [[COPY]]
16-
; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, [[COPY1]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
17-
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec
11+
; GCN-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:vgpr_32 = SUBREG_TO_REG 0, [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16
12+
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[SUBREG_TO_REG]]
13+
; GCN-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[COPY]].hi16, 0, 0, 0, implicit $mode, implicit $exec
1814
%0:vgpr_16 = IMPLICIT_DEF
19-
%1:sreg_32 = IMPLICIT_DEF
20-
%2:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
21-
%3:sreg_32 = COPY %2:vgpr_16
22-
nofpexcept S_CMP_LT_F16 killed %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $mode
23-
%4:sreg_32_xm0_xexec = COPY $scc
24-
%5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
15+
%1:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
16+
%2:sreg_32 = COPY %1:vgpr_16
17+
%3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
2518
...
2619

2720
---
28-
name: cvt_hi_f32_f16
21+
name: s_xor_b32
2922
body: |
3023
bb.0:
31-
; GCN-LABEL: name: cvt_hi_f32_f16
24+
; GCN-LABEL: name: s_xor_b32
3225
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
3326
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
34-
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_U16_t16_e64_]]
35-
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
36-
; GCN-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[COPY1]].hi16, 0, 0, 0, implicit $mode, implicit $exec
27+
; GCN-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:vgpr_32 = SUBREG_TO_REG 0, [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16
28+
; GCN-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[SUBREG_TO_REG]], [[SUBREG_TO_REG]], implicit $exec
29+
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[V_OR_B32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
3730
%0:vgpr_16 = IMPLICIT_DEF
3831
%1:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
3932
%2:sreg_32 = COPY %1:vgpr_16
40-
%3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
33+
%3:sreg_32 = S_OR_B32 %2:sreg_32, %2:sreg_32, implicit-def $scc
34+
%4:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %3:sreg_32, 0, 0, 0, implicit $mode, implicit $exec
4135
...

0 commit comments

Comments
 (0)