Skip to content

Commit 29ec87c

Browse files
committed
[AMDGPU][True16] Support VOP3 source DPP operands.
1 parent a18e92d commit 29ec87c

15 files changed

+361
-109
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
314314
return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
315315
}
316316

317-
bool isRegOrInlineImmWithFP16InputMods() const {
318-
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
317+
template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
318+
return isRegOrInline(
319+
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
319320
}
320321

321322
bool isRegOrInlineImmWithFP32InputMods() const {
@@ -8151,7 +8152,7 @@ ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
81518152

81528153
// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
81538154
// the number of src operands present, then copies that bit into src0_modifiers.
8154-
void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8155+
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
81558156
int Opc = Inst.getOpcode();
81568157
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
81578158
if (OpSelIdx == -1)
@@ -8168,23 +8169,34 @@ void cvtVOP3DstOpSelOnly(MCInst &Inst) {
81688169

81698170
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
81708171

8171-
if ((OpSel & (1 << SrcNum)) != 0) {
8172-
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8173-
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8174-
Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8172+
int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8173+
if (DstIdx == -1)
8174+
return;
8175+
8176+
const MCOperand &DstOp = Inst.getOperand(DstIdx);
8177+
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8178+
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8179+
if (DstOp.isReg() &&
8180+
MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8181+
if (AMDGPU::isHi(DstOp.getReg(), MRI))
8182+
ModVal |= SISrcMods::DST_OP_SEL;
8183+
} else {
8184+
if ((OpSel & (1 << SrcNum)) != 0)
8185+
ModVal |= SISrcMods::DST_OP_SEL;
81758186
}
8187+
Inst.getOperand(ModIdx).setImm(ModVal);
81768188
}
81778189

81788190
void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
81798191
const OperandVector &Operands) {
81808192
cvtVOP3P(Inst, Operands);
8181-
cvtVOP3DstOpSelOnly(Inst);
8193+
cvtVOP3DstOpSelOnly(Inst, *getMRI());
81828194
}
81838195

81848196
void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
81858197
OptionalImmIndexMap &OptionalIdx) {
81868198
cvtVOP3P(Inst, Operands, OptionalIdx);
8187-
cvtVOP3DstOpSelOnly(Inst);
8199+
cvtVOP3DstOpSelOnly(Inst, *getMRI());
81888200
}
81898201

81908202
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
@@ -8433,8 +8445,17 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
84338445

84348446
uint32_t ModVal = 0;
84358447

8436-
if ((OpSel & (1 << J)) != 0)
8437-
ModVal |= SISrcMods::OP_SEL_0;
8448+
const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8449+
if (SrcOp.isReg() && getMRI()
8450+
->getRegClass(AMDGPU::VGPR_16RegClassID)
8451+
.contains(SrcOp.getReg())) {
8452+
bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8453+
if (VGPRSuffixIsHi)
8454+
ModVal |= SISrcMods::OP_SEL_0;
8455+
} else {
8456+
if ((OpSel & (1 << J)) != 0)
8457+
ModVal |= SISrcMods::OP_SEL_0;
8458+
}
84388459

84398460
if ((OpSelHi & (1 << J)) != 0)
84408461
ModVal |= SISrcMods::OP_SEL_1;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -913,6 +913,41 @@ static VOPModifiers collectVOPModifiers(const MCInst &MI,
913913
return Modifiers;
914914
}
915915

916+
// Instructions decode the op_sel/suffix bits into the src_modifier
917+
// operands. Copy those bits into the src operands for true16 VGPRs.
918+
void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
919+
const unsigned Opc = MI.getOpcode();
920+
const MCRegisterClass &ConversionRC =
921+
MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
922+
constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
923+
{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
924+
SISrcMods::OP_SEL_0},
925+
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
926+
SISrcMods::OP_SEL_0},
927+
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
928+
SISrcMods::OP_SEL_0},
929+
{AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
930+
SISrcMods::DST_OP_SEL}}};
931+
for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
932+
int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
933+
int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
934+
if (OpIdx == -1 || OpModsIdx == -1)
935+
continue;
936+
MCOperand &Op = MI.getOperand(OpIdx);
937+
if (!Op.isReg())
938+
continue;
939+
if (!ConversionRC.contains(Op.getReg()))
940+
continue;
941+
unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
942+
const MCOperand &OpMods = MI.getOperand(OpModsIdx);
943+
unsigned ModVal = OpMods.getImm();
944+
if (ModVal & OpSelMask) { // isHi
945+
unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
946+
Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
947+
}
948+
}
949+
}
950+
916951
// MAC opcodes have special old and src2 operands.
917952
// src2 is tied to dst, while old is not tied (but assumed to be).
918953
bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
@@ -991,6 +1026,8 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
9911026
if (isMacDPP(MI))
9921027
convertMacDPPInst(MI);
9931028

1029+
convertTrue16OpSel(MI);
1030+
9941031
int VDstInIdx =
9951032
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
9961033
if (VDstInIdx != -1)

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ class AMDGPUDisassembler : public MCDisassembler {
203203
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const;
204204
DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
205205
void convertMacDPPInst(MCInst &MI) const;
206+
void convertTrue16OpSel(MCInst &MI) const;
206207

207208
enum OpWidthTy {
208209
OPW32,

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -756,14 +756,14 @@ void SIFoldOperands::foldOperand(
756756
int UseOpIdx,
757757
SmallVectorImpl<FoldCandidate> &FoldList,
758758
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
759-
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
759+
const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx);
760760

761-
if (!isUseSafeToFold(*UseMI, UseOp))
761+
if (!isUseSafeToFold(*UseMI, *UseOp))
762762
return;
763763

764764
// FIXME: Fold operands with subregs.
765-
if (UseOp.isReg() && OpToFold.isReg() &&
766-
(UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister))
765+
if (UseOp->isReg() && OpToFold.isReg() &&
766+
(UseOp->isImplicit() || UseOp->getSubReg() != AMDGPU::NoSubRegister))
767767
return;
768768

769769
// Special case for REG_SEQUENCE: We can't fold literals into
@@ -859,14 +859,26 @@ void SIFoldOperands::foldOperand(
859859
if (MovOp == AMDGPU::COPY)
860860
return;
861861

862-
UseMI->setDesc(TII->get(MovOp));
863862
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
864863
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
865864
while (ImpOpI != ImpOpE) {
866865
MachineInstr::mop_iterator Tmp = ImpOpI;
867866
ImpOpI++;
868867
UseMI->removeOperand(UseMI->getOperandNo(Tmp));
869868
}
869+
UseMI->setDesc(TII->get(MovOp));
870+
871+
if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
872+
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
873+
MachineOperand NewSrcOp(SrcOp);
874+
MachineFunction *MF = UseMI->getParent()->getParent();
875+
UseMI->removeOperand(1);
876+
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers
877+
UseMI->addOperand(NewSrcOp); // src0
878+
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // op_sel
879+
UseOpIdx = 2;
880+
UseOp = &UseMI->getOperand(UseOpIdx);
881+
}
870882
CopiesToReplace.push_back(UseMI);
871883
} else {
872884
if (UseMI->isCopy() && OpToFold.isReg() &&
@@ -1027,7 +1039,7 @@ void SIFoldOperands::foldOperand(
10271039

10281040
// Don't fold into target independent nodes. Target independent opcodes
10291041
// don't have defined register classes.
1030-
if (UseDesc.isVariadic() || UseOp.isImplicit() ||
1042+
if (UseDesc.isVariadic() || UseOp->isImplicit() ||
10311043
UseDesc.operands()[UseOpIdx].RegClass == -1)
10321044
return;
10331045
}
@@ -1062,17 +1074,17 @@ void SIFoldOperands::foldOperand(
10621074
TRI->getRegClass(FoldDesc.operands()[0].RegClass);
10631075

10641076
// Split 64-bit constants into 32-bits for folding.
1065-
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) {
1066-
Register UseReg = UseOp.getReg();
1077+
if (UseOp->getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) {
1078+
Register UseReg = UseOp->getReg();
10671079
const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
10681080
if (AMDGPU::getRegBitWidth(*UseRC) != 64)
10691081
return;
10701082

10711083
APInt Imm(64, OpToFold.getImm());
1072-
if (UseOp.getSubReg() == AMDGPU::sub0) {
1084+
if (UseOp->getSubReg() == AMDGPU::sub0) {
10731085
Imm = Imm.getLoBits(32);
10741086
} else {
1075-
assert(UseOp.getSubReg() == AMDGPU::sub1);
1087+
assert(UseOp->getSubReg() == AMDGPU::sub1);
10761088
Imm = Imm.getHiBits(32);
10771089
}
10781090

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,7 +1148,13 @@ def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
11481148
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
11491149
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
11501150

1151-
def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>;
1151+
class FP16VCSrcInputModsMatchClass<bit IsFake16>
1152+
: FPVCSrcInputModsMatchClass<16> {
1153+
let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods",
1154+
"RegOrInlineImmWithFPT16InputMods");
1155+
let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" #
1156+
!if(IsFake16, "true", "false") # ">";
1157+
}
11521158
def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
11531159

11541160
class InputMods <AsmOperandClass matchClass> : Operand <i32> {
@@ -1166,7 +1172,8 @@ def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
11661172
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
11671173
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
11681174

1169-
def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>;
1175+
class FP16VCSrcInputMods<bit IsFake16>
1176+
: FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>;
11701177
def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
11711178

11721179
class IntInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -1653,11 +1660,11 @@ class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
16531660
}
16541661

16551662
// Return type of input modifiers operand for specified input operand for DPP
1656-
class getSrcModVOP3DPP <ValueType VT> {
1663+
class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> {
16571664
Operand ret =
16581665
!if (VT.isFP,
16591666
!if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1660-
FP16VCSrcInputMods, FP32VCSrcInputMods),
1667+
FP16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
16611668
Int32VCSrcInputMods);
16621669
}
16631670

@@ -2450,6 +2457,10 @@ class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.
24502457
class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
24512458
let IsTrue16 = 1;
24522459
let IsRealTrue16 = 1;
2460+
2461+
let HasOpSel = 1;
2462+
let HasModifiers = 1; // All instructions at least have OpSel.
2463+
24532464
// Most DstVT are 16-bit, but not all.
24542465
let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
24552466
let DstRC64 = getVALUDstForVT<DstVT>.ret;
@@ -2461,6 +2472,10 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
24612472
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
24622473
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
24632474
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
2475+
let Src0VOP3DPP = VGPRSrc_16;
2476+
let Src0ModVOP3DPP = getSrcModVOP3DPP<Src0VT, 0 /*IsFake16*/>.ret;
2477+
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
2478+
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;
24642479

24652480
let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
24662481
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,12 @@ def VGPRSrc_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
12351235
let EncoderMethod = "getMachineOpValueT16Lo128";
12361236
}
12371237

1238+
// True 16 operands.
1239+
def VGPRSrc_16 : RegisterOperand<VGPR_16> {
1240+
let DecoderMethod = "DecodeVGPR_16RegisterClass";
1241+
let EncoderMethod = "getMachineOpValueT16";
1242+
}
1243+
12381244
//===----------------------------------------------------------------------===//
12391245
// ASrc_* Operands with an AccVGPR
12401246
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ body: |
5050
; GFX11-NEXT: {{ $}}
5151
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5252
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
53-
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
53+
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
5454
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
5555
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
5656
;
@@ -88,7 +88,7 @@ body: |
8888
; GFX11: liveins: $sgpr0
8989
; GFX11-NEXT: {{ $}}
9090
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
91-
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
91+
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
9292
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
9393
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
9494
;
@@ -127,7 +127,7 @@ body: |
127127
; GFX11-NEXT: {{ $}}
128128
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
129129
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
130-
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
130+
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
131131
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
132132
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
133133
;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ body: |
5959
; GFX11-NEXT: {{ $}}
6060
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
6161
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
62-
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
62+
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
6363
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
6464
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
6565
;
@@ -97,7 +97,7 @@ body: |
9797
; GFX11: liveins: $sgpr0
9898
; GFX11-NEXT: {{ $}}
9999
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
100-
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
100+
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
101101
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
102102
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
103103
;
@@ -136,7 +136,7 @@ body: |
136136
; GFX11-NEXT: {{ $}}
137137
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
138138
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
139-
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
139+
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
140140
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
141141
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
142142
;

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ body: |
6666
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
6767
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
6868
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
69-
; REAL16-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
69+
; REAL16-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
7070
;
7171
; FAKE16-LABEL: name: ceil_f16
7272
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -87,7 +87,7 @@ body: |
8787
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
8888
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
8989
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
90-
; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
90+
; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
9191
;
9292
; FAKE16-LABEL: name: floor_f16
9393
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF

0 commit comments

Comments
 (0)