Skip to content

[AMDGPU][True16] Support VOP3 source DPP operands. #80892

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,9 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
}

bool isRegOrInlineImmWithFP16InputMods() const {
return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
return isRegOrInline(
IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
}

bool isRegOrInlineImmWithFP32InputMods() const {
Expand Down Expand Up @@ -8151,7 +8152,7 @@ ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {

// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
// the number of src operands present, then copies that bit into src0_modifiers.
void cvtVOP3DstOpSelOnly(MCInst &Inst) {
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
int Opc = Inst.getOpcode();
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
if (OpSelIdx == -1)
Expand All @@ -8168,23 +8169,34 @@ void cvtVOP3DstOpSelOnly(MCInst &Inst) {

unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();

if ((OpSel & (1 << SrcNum)) != 0) {
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
if (DstIdx == -1)
return;

const MCOperand &DstOp = Inst.getOperand(DstIdx);
int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
if (DstOp.isReg() &&
MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
if (AMDGPU::isHi(DstOp.getReg(), MRI))
ModVal |= SISrcMods::DST_OP_SEL;
} else {
if ((OpSel & (1 << SrcNum)) != 0)
ModVal |= SISrcMods::DST_OP_SEL;
}
Inst.getOperand(ModIdx).setImm(ModVal);
}

void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands) {
cvtVOP3P(Inst, Operands);
cvtVOP3DstOpSelOnly(Inst);
cvtVOP3DstOpSelOnly(Inst, *getMRI());
}

void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx) {
cvtVOP3P(Inst, Operands, OptionalIdx);
cvtVOP3DstOpSelOnly(Inst);
cvtVOP3DstOpSelOnly(Inst, *getMRI());
}

static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
Expand Down Expand Up @@ -8433,8 +8445,17 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,

uint32_t ModVal = 0;

if ((OpSel & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_0;
const MCOperand &SrcOp = Inst.getOperand(OpIdx);
if (SrcOp.isReg() && getMRI()
->getRegClass(AMDGPU::VGPR_16RegClassID)
.contains(SrcOp.getReg())) {
bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
if (VGPRSuffixIsHi)
ModVal |= SISrcMods::OP_SEL_0;
} else {
if ((OpSel & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_0;
}

if ((OpSelHi & (1 << J)) != 0)
ModVal |= SISrcMods::OP_SEL_1;
Expand Down
38 changes: 38 additions & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,41 @@ static VOPModifiers collectVOPModifiers(const MCInst &MI,
return Modifiers;
}

// Instructions decode the op_sel/suffix bits into the src_modifier
// operands. Copy those bits into the src operands for true16 VGPRs.
void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
const unsigned Opc = MI.getOpcode();
const MCRegisterClass &ConversionRC =
MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
SISrcMods::OP_SEL_0},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
SISrcMods::OP_SEL_0},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
SISrcMods::OP_SEL_0},
{AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
SISrcMods::DST_OP_SEL}}};
for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need the reference

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AMDGPUDisassembler.cpp:931:8: note: use reference type 'const std::array<std::tuple<int, int, unsigned int>, 4>::value_type &' (aka 'const std::tuple<int, int, unsigned int> &') to prevent copying
  for (const auto [OpName, OpModsName, OpSelMask] : OpAndOpMods) {
       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  &

int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
if (OpIdx == -1 || OpModsIdx == -1)
continue;
MCOperand &Op = MI.getOperand(OpIdx);
if (!Op.isReg())
continue;
if (!ConversionRC.contains(Op.getReg()))
continue;
unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
const MCOperand &OpMods = MI.getOperand(OpModsIdx);
unsigned ModVal = OpMods.getImm();
if (ModVal & OpSelMask) { // isHi
unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
}
}
}

// MAC opcodes have special old and src2 operands.
// src2 is tied to dst, while old is not tied (but assumed to be).
bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
Expand Down Expand Up @@ -968,6 +1003,7 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
convertTrue16OpSel(MI);
auto Mods = collectVOPModifiers(MI);
insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
AMDGPU::OpName::op_sel);
Expand All @@ -991,6 +1027,8 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
if (isMacDPP(MI))
convertMacDPPInst(MI);

convertTrue16OpSel(MI);

int VDstInIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
if (VDstInIdx != -1)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ class AMDGPUDisassembler : public MCDisassembler {
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const;
DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
void convertMacDPPInst(MCInst &MI) const;
void convertTrue16OpSel(MCInst &MI) const;

enum OpWidthTy {
OPW32,
Expand Down
32 changes: 22 additions & 10 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -756,14 +756,14 @@ void SIFoldOperands::foldOperand(
int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx);

if (!isUseSafeToFold(*UseMI, UseOp))
if (!isUseSafeToFold(*UseMI, *UseOp))
return;

// FIXME: Fold operands with subregs.
if (UseOp.isReg() && OpToFold.isReg() &&
(UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister))
if (UseOp->isReg() && OpToFold.isReg() &&
(UseOp->isImplicit() || UseOp->getSubReg() != AMDGPU::NoSubRegister))
return;

// Special case for REG_SEQUENCE: We can't fold literals into
Expand Down Expand Up @@ -859,14 +859,26 @@ void SIFoldOperands::foldOperand(
if (MovOp == AMDGPU::COPY)
return;

UseMI->setDesc(TII->get(MovOp));
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
while (ImpOpI != ImpOpE) {
MachineInstr::mop_iterator Tmp = ImpOpI;
ImpOpI++;
UseMI->removeOperand(UseMI->getOperandNo(Tmp));
}
UseMI->setDesc(TII->get(MovOp));

if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
const auto &SrcOp = UseMI->getOperand(UseOpIdx);
MachineOperand NewSrcOp(SrcOp);
MachineFunction *MF = UseMI->getParent()->getParent();
UseMI->removeOperand(1);
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers
UseMI->addOperand(NewSrcOp); // src0
UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // op_sel
UseOpIdx = 2;
UseOp = &UseMI->getOperand(UseOpIdx);
}
CopiesToReplace.push_back(UseMI);
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
Expand Down Expand Up @@ -1027,7 +1039,7 @@ void SIFoldOperands::foldOperand(

// Don't fold into target independent nodes. Target independent opcodes
// don't have defined register classes.
if (UseDesc.isVariadic() || UseOp.isImplicit() ||
if (UseDesc.isVariadic() || UseOp->isImplicit() ||
UseDesc.operands()[UseOpIdx].RegClass == -1)
return;
}
Expand Down Expand Up @@ -1062,17 +1074,17 @@ void SIFoldOperands::foldOperand(
TRI->getRegClass(FoldDesc.operands()[0].RegClass);

// Split 64-bit constants into 32-bits for folding.
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) {
Register UseReg = UseOp.getReg();
if (UseOp->getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) {
Register UseReg = UseOp->getReg();
const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
if (AMDGPU::getRegBitWidth(*UseRC) != 64)
return;

APInt Imm(64, OpToFold.getImm());
if (UseOp.getSubReg() == AMDGPU::sub0) {
if (UseOp->getSubReg() == AMDGPU::sub0) {
Imm = Imm.getLoBits(32);
} else {
assert(UseOp.getSubReg() == AMDGPU::sub1);
assert(UseOp->getSubReg() == AMDGPU::sub1);
Imm = Imm.getHiBits(32);
}

Expand Down
23 changes: 19 additions & 4 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,13 @@ def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;

def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>;
class FP16VCSrcInputModsMatchClass<bit IsFake16>
: FPVCSrcInputModsMatchClass<16> {
let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods",
"RegOrInlineImmWithFPT16InputMods");
let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" #
!if(IsFake16, "true", "false") # ">";
}
def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;

class InputMods <AsmOperandClass matchClass> : Operand <i32> {
Expand All @@ -1166,7 +1172,8 @@ def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;

def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>;
class FP16VCSrcInputMods<bit IsFake16>
: FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>;
def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;

class IntInputModsMatchClass <int opSize> : AsmOperandClass {
Expand Down Expand Up @@ -1653,11 +1660,11 @@ class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
}

// Return type of input modifiers operand for specified input operand for DPP
class getSrcModVOP3DPP <ValueType VT> {
class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> {
Operand ret =
!if (VT.isFP,
!if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
FP16VCSrcInputMods, FP32VCSrcInputMods),
FP16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
Int32VCSrcInputMods);
}

Expand Down Expand Up @@ -2450,6 +2457,10 @@ class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.
class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;

let HasOpSel = 1;
let HasModifiers = 1; // All instructions at least have OpSel.

// Most DstVT are 16-bit, but not all.
let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
let DstRC64 = getVALUDstForVT<DstVT>.ret;
Expand All @@ -2461,6 +2472,10 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src0ModVOP3DPP = getSrcModVOP3DPP<Src0VT, 0 /*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;

let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,12 @@ def VGPRSrc_16_Lo128 : RegisterOperand<VGPR_16_Lo128> {
let EncoderMethod = "getMachineOpValueT16Lo128";
}

// True 16 operands.
def VGPRSrc_16 : RegisterOperand<VGPR_16> {
let DecoderMethod = "DecodeVGPR_16RegisterClass";
let EncoderMethod = "getMachineOpValueT16";
}

//===----------------------------------------------------------------------===//
// ASrc_* Operands with an AccVGPR
//===----------------------------------------------------------------------===//
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
Expand Down Expand Up @@ -88,7 +88,7 @@ body: |
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
;
Expand Down Expand Up @@ -127,7 +127,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
Expand Down Expand Up @@ -97,7 +97,7 @@ body: |
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
;
Expand Down Expand Up @@ -136,7 +136,7 @@ body: |
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ body: |
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
;
; FAKE16-LABEL: name: ceil_f16
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
Expand All @@ -87,7 +87,7 @@ body: |
; REAL16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, implicit $mode, implicit $exec
; REAL16-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[V_CVT_F32_U32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
;
; FAKE16-LABEL: name: floor_f16
; FAKE16: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
Expand Down
Loading