Skip to content

Commit ccf5cae

Browse files
committed
legalize movetoVALU operand in true16
1 parent afa32d3 commit ccf5cae

File tree

2 files changed

+40
-15
lines changed

2 files changed

+40
-15
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7747,6 +7747,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77477747

77487748
// If this is a v2s copy src from vgpr16 to sgpr32,
77497749
// replace vgpr copy to subreg_to_reg
7750+
// This can be remove after we have sgpr16 in place
77507751
if (ST.useRealTrue16Insts() && Inst.isCopy() &&
77517752
Inst.getOperand(1).getReg().isVirtual() &&
77527753
RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
@@ -7785,11 +7786,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77857786
NewInstr.addImm(0);
77867787
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) {
77877788
MachineOperand Src = Inst.getOperand(1);
7788-
if (AMDGPU::isTrue16Inst(NewOpcode) && ST.useRealTrue16Insts() &&
7789-
Src.isReg() && RI.isVGPR(MRI, Src.getReg()))
7790-
NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
7791-
else
7792-
NewInstr->addOperand(Src);
7789+
NewInstr->addOperand(Src);
77937790
}
77947791

77957792
if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
@@ -7851,6 +7848,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78517848
}
78527849
}
78537850
Inst.eraseFromParent();
7851+
78547852
Register NewDstReg;
78557853
if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
78567854
Register DstReg = NewInstr->getOperand(0).getReg();
@@ -7863,6 +7861,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78637861

78647862
// Check useMI of NewInstr. If used by a true16 instruction,
78657863
// add a lo16 subreg access if size mismatched
7864+
// This can be remove after we have sgpr16 in place
78667865
if (ST.useRealTrue16Insts() && NewDstRC == &AMDGPU::VGPR_32RegClass) {
78677866
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
78687867
E = MRI.use_end();
@@ -7878,6 +7877,25 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78787877
}
78797878
}
78807879
fixImplicitOperands(*NewInstr);
7880+
7881+
// 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7882+
// sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7883+
// subreg access properly. This can be remove after we have sgpr16 in place
7884+
if (AMDGPU::isTrue16Inst(NewOpcode) && ST.useRealTrue16Insts()) {
7885+
for (MachineOperand &Op : NewInstr->explicit_operands()) {
7886+
unsigned OpIdx = Op.getOperandNo();
7887+
if (!OpIdx)
7888+
continue;
7889+
if (Op.isReg() && RI.isVGPR(MRI, Op.getReg())) {
7890+
unsigned RCID = get(NewOpcode).operands()[OpIdx].RegClass;
7891+
const TargetRegisterClass *RC = RI.getRegClass(RCID);
7892+
if (RI.getRegSizeInBits(*RC) == 16) {
7893+
Op.setSubReg(AMDGPU::lo16);
7894+
}
7895+
}
7896+
}
7897+
}
7898+
78817899
// Legalize the operands
78827900
legalizeOperands(*NewInstr, MDT);
78837901
if (NewDstReg)

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,26 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2-
# FIXME-TRUE16. reenable after fix-sgpr-copies is fixed for true16 flow
3-
# XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s
4-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,FAKE16 %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=REAL16 %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=FAKE16 %s
54

65
---
76
name: fmac_f16
87
body: |
98
bb.0:
10-
; GCN-LABEL: name: fmac_f16
11-
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
12-
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
13-
; GCN-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
14-
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
15-
; GCN-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
16-
; GCN-NEXT: [[V_FMAC_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F16_fake16_e64 0, killed [[DEF1]], 0, [[DEF2]], 0, [[V_CVT_F32_U32_e64_]], 0, 0, implicit $mode, implicit $exec
9+
; REAL16-LABEL: name: fmac_f16
10+
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
11+
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
12+
; REAL16-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
13+
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
14+
; REAL16-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
15+
; REAL16-NEXT: [[V_FMAC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FMAC_F16_t16_e64 0, killed [[DEF1]], 0, [[DEF2]], 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
16+
;
17+
; FAKE16-LABEL: name: fmac_f16
18+
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
19+
; FAKE16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
20+
; FAKE16-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
21+
; FAKE16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
22+
; FAKE16-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
23+
; FAKE16-NEXT: [[V_FMAC_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAC_F16_fake16_e64 0, killed [[DEF1]], 0, [[DEF2]], 0, [[V_CVT_F32_U32_e64_]], 0, 0, implicit $mode, implicit $exec
1724
%0:vgpr_32 = IMPLICIT_DEF
1825
%1:sreg_32 = IMPLICIT_DEF
1926
%2:sreg_32 = IMPLICIT_DEF

0 commit comments

Comments
 (0)