Skip to content

[AMDGPU][True16][MC] VINTERP instructions supporting true16/fake16 #113634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 30 additions & 8 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,19 @@ static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
(AMDGPU::OperandSemantics)OperandSemantics));
}

static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
uint64_t /*Addr*/,
const MCDisassembler *Decoder) {
assert(isUInt<10>(Imm) && "10-bit encoding expected");
assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");

const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);

bool IsHi = Imm & (1 << 9);
unsigned RegIdx = Imm & 0xff;
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
}

static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const MCDisassembler *Decoder) {
Expand Down Expand Up @@ -763,14 +776,23 @@ void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
}

void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
convertTrue16OpSel(MI);
if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
// The MCInst has this field that is not directly encoded in the
// instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1244,6 +1244,14 @@ def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;

// True 16 Operands
def VRegSrc_16 : RegisterOperand<VGPR_16> {
let DecoderMethod = "decodeOperand_VGPR_16";
let EncoderMethod = "getMachineOpValueT16";
}
def VRegSrc_fake16: SrcReg9<VGPR_32, "OPW16"> {
let EncoderMethod = "getMachineOpValueT16";
}
//===----------------------------------------------------------------------===//
// VGPRSrc_*
//===----------------------------------------------------------------------===//
Expand Down
137 changes: 83 additions & 54 deletions llvm/lib/Target/AMDGPU/VINTERPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,30 @@
//===----------------------------------------------------------------------===//

class VINTERPe <VOPProfile P> : Enc64 {
bits<8> vdst;
bits<11> vdst;
bits<4> src0_modifiers;
bits<9> src0;
bits<11> src0;
bits<3> src1_modifiers;
bits<9> src1;
bits<11> src1;
bits<3> src2_modifiers;
bits<9> src2;
bits<11> src2;
bits<1> clamp;
bits<3> waitexp;

let Inst{31-26} = 0x33; // VOP3P encoding
let Inst{25-24} = 0x1; // VINTERP sub-encoding

let Inst{7-0} = vdst;
let Inst{7-0} = vdst{7-0};
let Inst{10-8} = waitexp;
let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
let Inst{14} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel(3)
// Fields for hi/lo 16-bits of register selection
let Inst{11} = !if(P.HasSrc0, src0_modifiers{2}, 0);
let Inst{12} = !if(P.HasSrc1, src1_modifiers{2}, 0);
let Inst{13} = !if(P.HasSrc2, src2_modifiers{2}, 0);
let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0);
let Inst{15} = clamp;
let Inst{40-32} = src0;
let Inst{49-41} = src1;
let Inst{58-50} = src2;
let Inst{40-32} = src0{8-0};
let Inst{49-41} = src1{8-0};
let Inst{58-50} = src2{8-0};
let Inst{61} = src0_modifiers{0}; // neg(0)
let Inst{62} = src1_modifiers{0}; // neg(1)
let Inst{63} = src2_modifiers{0}; // neg(2)
Expand All @@ -60,9 +61,10 @@ class VINTERP_Pseudo <string OpName, VOPProfile P, list<dag> pattern = []> :
let VINTERP = 1;
}

class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily> :
VOP3_Real <ps, EncodingFamily> {
class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily, string asmName> :
VOP3_Real <ps, EncodingFamily, asmName> {
let VINTERP = 1;
let IsSingle = 1;
}

def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
Expand All @@ -83,22 +85,35 @@ def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$waitexp";
}

class VOP3_VINTERP_F16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
let HasOpSel = 1;
let HasModifiers = 1;
class VOP3_VINTERP_F16_t16 <list<ValueType> ArgVT> : VOPProfile_True16<VOPProfile<ArgVT>> {
let Src0Mod = FPT16VRegInputMods</*Fake16*/0>;
let Src1Mod = FPVRegInputMods;
let Src2Mod = !if(!eq(ArgVT[3].Size, 16), FPT16VRegInputMods</*Fake16*/0>,
FPVRegInputMods);
let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_16:$src0,
Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
Src2Mod:$src2_modifiers,
!if(!eq(ArgVT[3].Size, 16), VRegSrc_16, VRegSrc_32):$src2,
Clamp:$clamp, op_sel0:$op_sel,
WaitEXP:$waitexp);

let Src0Mod = FPVRegInputMods;
let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
}

class VOP3_VINTERP_F16_fake16 <list<ValueType> ArgVT> : VOPProfile_Fake16<VOPProfile<ArgVT>> {
let Src0Mod = FPT16VRegInputMods</*Fake16*/1>;
let Src1Mod = FPVRegInputMods;
let Src2Mod = FPVRegInputMods;
let Src2Mod = !if(!eq(ArgVT[3].Size, 16), FPT16VRegInputMods</*Fake16*/1>,
FPVRegInputMods);

let Outs64 = (outs VGPR_32:$vdst);
let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_fake16:$src0,
Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
Src2Mod:$src2_modifiers,
!if(!eq(ArgVT[3].Size, 16), VRegSrc_fake16, VRegSrc_32):$src2,
Clamp:$clamp, op_sel0:$op_sel,
WaitEXP:$waitexp);

let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
}

//===----------------------------------------------------------------------===//
Expand All @@ -107,20 +122,26 @@ class VOP3_VINTERP_F16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {

let SubtargetPredicate = HasVINTERPEncoding in {

multiclass VINTERP_t16<string OpName, list<ValueType> ArgVT> {
let True16Predicate = UseRealTrue16Insts in {
def _t16 : VINTERP_Pseudo<OpName#"_t16", VOP3_VINTERP_F16_t16<ArgVT>> ;
}
let True16Predicate = UseFakeTrue16Insts in {
def _fake16 : VINTERP_Pseudo<OpName#"_fake16", VOP3_VINTERP_F16_fake16<ArgVT>> ;
}
}

let Uses = [M0, EXEC, MODE] in {
def V_INTERP_P10_F32_inreg : VINTERP_Pseudo <"v_interp_p10_f32", VOP3_VINTERP_F32>;
def V_INTERP_P2_F32_inreg : VINTERP_Pseudo <"v_interp_p2_f32", VOP3_VINTERP_F32>;
def V_INTERP_P10_F16_F32_inreg :
VINTERP_Pseudo <"v_interp_p10_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
def V_INTERP_P2_F16_F32_inreg :
VINTERP_Pseudo <"v_interp_p2_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;

defm V_INTERP_P10_F16_F32_inreg : VINTERP_t16<"v_interp_p10_f16_f32", [f32, f16, f32, f16]>;
defm V_INTERP_P2_F16_F32_inreg : VINTERP_t16<"v_interp_p2_f16_f32", [f16, f16, f32, f32]>;
} // Uses = [M0, EXEC, MODE]

let Uses = [M0, EXEC] in {
def V_INTERP_P10_RTZ_F16_F32_inreg :
VINTERP_Pseudo <"v_interp_p10_rtz_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
def V_INTERP_P2_RTZ_F16_F32_inreg :
VINTERP_Pseudo <"v_interp_p2_rtz_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_t16<"v_interp_p10_rtz_f16_f32", [f32, f16, f32, f16]>;
defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_t16 <"v_interp_p2_rtz_f16_f32", [f16, f16, f32, f32]>;
} // Uses = [M0, EXEC]

} // SubtargetPredicate = HasVINTERPEncoding.
Expand All @@ -137,11 +158,6 @@ class VInterpF32Pat <SDPatternOperator op, Instruction inst> : GCNPat <
7) /* wait_exp */
>;

def VINTERP_OPSEL {
int LOW = 0;
int HIGH = 0xa;
}

class VInterpF16Pat <SDPatternOperator op, Instruction inst,
ValueType dst_type, bit high,
list<ComplexPattern> pat> : GCNPat <
Expand All @@ -167,45 +183,58 @@ multiclass VInterpF16Pat <SDPatternOperator op, Instruction inst,

def : VInterpF32Pat<int_amdgcn_interp_inreg_p10, V_INTERP_P10_F32_inreg>;
def : VInterpF32Pat<int_amdgcn_interp_inreg_p2, V_INTERP_P2_F32_inreg>;

let True16Predicate = UseFakeTrue16Insts in {
defm : VInterpF16Pat<int_amdgcn_interp_inreg_p10_f16,
V_INTERP_P10_F16_F32_inreg, f32,
V_INTERP_P10_F16_F32_inreg_fake16, f32,
[VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
defm : VInterpF16Pat<int_amdgcn_interp_inreg_p2_f16,
V_INTERP_P2_F16_F32_inreg, f16,
V_INTERP_P2_F16_F32_inreg_fake16, f16,
[VINTERPModsHi, VINTERPMods, VINTERPMods]>;
defm : VInterpF16Pat<int_amdgcn_interp_p10_rtz_f16,
V_INTERP_P10_RTZ_F16_F32_inreg, f32,
V_INTERP_P10_RTZ_F16_F32_inreg_fake16, f32,
[VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
defm : VInterpF16Pat<int_amdgcn_interp_p2_rtz_f16,
V_INTERP_P2_RTZ_F16_F32_inreg, f16,
V_INTERP_P2_RTZ_F16_F32_inreg_fake16, f16,
[VINTERPModsHi, VINTERPMods, VINTERPMods]>;
}

//===----------------------------------------------------------------------===//
// VINTERP Real Instructions
//===----------------------------------------------------------------------===//

multiclass VINTERP_Real_gfx11 <bits<7> op> {
let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
multiclass VINTERP_Real_gfx11 <bits<7> op, string asmName> {
defvar ps = !cast<VOP3_Pseudo>(NAME);
let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" #
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
def _gfx11 :
VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX11>,
VINTERPe_gfx11<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
VINTERP_Real<ps, SIEncodingFamily.GFX11, asmName>,
VINTERPe_gfx11<op, ps.Pfl>;
}
}

multiclass VINTERP_Real_gfx12 <bits<7> op> {
let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12" in {
multiclass VINTERP_Real_gfx12 <bits<7> op, string asmName> {
defvar ps = !cast<VOP3_Pseudo>(NAME);
let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12" #
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
def _gfx12 :
VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX12>,
VINTERPe_gfx12<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
VINTERP_Real<ps, SIEncodingFamily.GFX12, asmName>,
VINTERPe_gfx12<op, ps.Pfl>;
}
}

multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op> :
VINTERP_Real_gfx11<op>, VINTERP_Real_gfx12<op>;
multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op, string asmName = !cast<VOP3_Pseudo>(NAME).Mnemonic, string opName = NAME> :
VINTERP_Real_gfx11<op, asmName>, VINTERP_Real_gfx12<op, asmName>;

multiclass VINTERP_Real_t16_and_fake16_gfx11_gfx12 <bits<7> op, string asmName = !cast<VOP3_Pseudo>(NAME).Mnemonic, string opName = NAME> {
defm _t16: VINTERP_Real_gfx11_gfx12<op, asmName, opName#"_t16">;
defm _fake16: VINTERP_Real_gfx11_gfx12<op, asmName, opName#"_fake16">;
}


defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11_gfx12<0x000>;
defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11_gfx12<0x001>;
defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x002>;
defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x003>;
defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x004>;
defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x005>;
defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x002, "v_interp_p10_f16_f32">;
defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x003, "v_interp_p2_f16_f32">;
defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x004, "v_interp_p10_rtz_f16_f32">;
defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x005, "v_interp_p2_rtz_f16_f32">;
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/waitcnt-vinterp.mir
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ body: |
; GFX11-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
; GFX11-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 2, 0, implicit $m0, implicit $exec
; GFX11-NEXT: $vgpr4 = LDS_PARAM_LOAD 0, 3, 0, implicit $m0, implicit $exec
; GFX11-NEXT: $vgpr5 = V_INTERP_P10_F16_F32_inreg 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
; GFX11-NEXT: $vgpr6 = V_INTERP_P10_F16_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
; GFX11-NEXT: $vgpr7 = V_INTERP_P10_F16_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 1, implicit $m0, implicit $exec, implicit $mode
; GFX11-NEXT: $vgpr8 = V_INTERP_P10_F16_F32_inreg 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $m0, implicit $exec, implicit $mode
; GFX11-NEXT: $vgpr5 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
; GFX11-NEXT: $vgpr6 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
; GFX11-NEXT: $vgpr7 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 1, implicit $m0, implicit $exec, implicit $mode
; GFX11-NEXT: $vgpr8 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $m0, implicit $exec, implicit $mode
$vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
$vgpr2 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
$vgpr3 = LDS_PARAM_LOAD 0, 2, 0, implicit $m0, implicit $exec
$vgpr4 = LDS_PARAM_LOAD 0, 3, 0, implicit $m0, implicit $exec
$vgpr5 = V_INTERP_P10_F16_F32_inreg 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
$vgpr6 = V_INTERP_P10_F16_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
$vgpr7 = V_INTERP_P10_F16_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
$vgpr8 = V_INTERP_P10_F16_F32_inreg 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
$vgpr5 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
$vgpr6 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
$vgpr7 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
$vgpr8 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
...
Loading
Loading