Skip to content

Commit abff8fe

Browse files
authored
[AMDGPU][True16][MC] VINTERP instructions supporting true16/fake16 (#113634)
Update VInterp instructions with true16 and fake16 formats. This patch includes instructions: v_interp_p10_f16_f32 v_interp_p2_f16_f32 v_interp_p10_rtz_f16_f32 v_interp_p2_rtz_f16_f32 dasm test vinterp-fake16.txt is removed and the testline are merged into vinterp.txt which handles both true16/fake16 cases
1 parent 0f0e2fe commit abff8fe

File tree

6 files changed

+531
-151
lines changed

6 files changed

+531
-151
lines changed

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,19 @@ static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
363363
(AMDGPU::OperandSemantics)OperandSemantics));
364364
}
365365

366+
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
367+
uint64_t /*Addr*/,
368+
const MCDisassembler *Decoder) {
369+
assert(isUInt<10>(Imm) && "10-bit encoding expected");
370+
assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
371+
372+
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
373+
374+
bool IsHi = Imm & (1 << 9);
375+
unsigned RegIdx = Imm & 0xff;
376+
return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
377+
}
378+
366379
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
367380
uint64_t Addr,
368381
const MCDisassembler *Decoder) {
@@ -763,14 +776,23 @@ void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
763776
}
764777

765778
void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
766-
if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
767-
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
768-
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
769-
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
770-
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
771-
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
772-
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
773-
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
779+
convertTrue16OpSel(MI);
780+
if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
781+
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
782+
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
783+
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
784+
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
785+
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
786+
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
787+
MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
788+
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
789+
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
790+
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
791+
MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
792+
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
793+
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
794+
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
795+
MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
774796
// The MCInst has this field that is not directly encoded in the
775797
// instruction.
776798
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,14 @@ def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
12441244
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
12451245
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;
12461246

1247+
// True 16 Operands
1248+
def VRegSrc_16 : RegisterOperand<VGPR_16> {
1249+
let DecoderMethod = "decodeOperand_VGPR_16";
1250+
let EncoderMethod = "getMachineOpValueT16";
1251+
}
1252+
def VRegSrc_fake16: SrcReg9<VGPR_32, "OPW16"> {
1253+
let EncoderMethod = "getMachineOpValueT16";
1254+
}
12471255
//===----------------------------------------------------------------------===//
12481256
// VGPRSrc_*
12491257
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/VINTERPInstructions.td

Lines changed: 83 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,30 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
class VINTERPe <VOPProfile P> : Enc64 {
14-
bits<8> vdst;
14+
bits<11> vdst;
1515
bits<4> src0_modifiers;
16-
bits<9> src0;
16+
bits<11> src0;
1717
bits<3> src1_modifiers;
18-
bits<9> src1;
18+
bits<11> src1;
1919
bits<3> src2_modifiers;
20-
bits<9> src2;
20+
bits<11> src2;
2121
bits<1> clamp;
2222
bits<3> waitexp;
2323

2424
let Inst{31-26} = 0x33; // VOP3P encoding
2525
let Inst{25-24} = 0x1; // VINTERP sub-encoding
2626

27-
let Inst{7-0} = vdst;
27+
let Inst{7-0} = vdst{7-0};
2828
let Inst{10-8} = waitexp;
29-
let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
30-
let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
31-
let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
32-
let Inst{14} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel(3)
29+
// Fields for hi/lo 16-bits of register selection
30+
let Inst{11} = !if(P.HasSrc0, src0_modifiers{2}, 0);
31+
let Inst{12} = !if(P.HasSrc1, src1_modifiers{2}, 0);
32+
let Inst{13} = !if(P.HasSrc2, src2_modifiers{2}, 0);
33+
let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0);
3334
let Inst{15} = clamp;
34-
let Inst{40-32} = src0;
35-
let Inst{49-41} = src1;
36-
let Inst{58-50} = src2;
35+
let Inst{40-32} = src0{8-0};
36+
let Inst{49-41} = src1{8-0};
37+
let Inst{58-50} = src2{8-0};
3738
let Inst{61} = src0_modifiers{0}; // neg(0)
3839
let Inst{62} = src1_modifiers{0}; // neg(1)
3940
let Inst{63} = src2_modifiers{0}; // neg(2)
@@ -60,9 +61,10 @@ class VINTERP_Pseudo <string OpName, VOPProfile P, list<dag> pattern = []> :
6061
let VINTERP = 1;
6162
}
6263

63-
class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily> :
64-
VOP3_Real <ps, EncodingFamily> {
64+
class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily, string asmName> :
65+
VOP3_Real <ps, EncodingFamily, asmName> {
6566
let VINTERP = 1;
67+
let IsSingle = 1;
6668
}
6769

6870
def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
@@ -83,22 +85,35 @@ def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
8385
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$waitexp";
8486
}
8587

86-
class VOP3_VINTERP_F16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
87-
let HasOpSel = 1;
88-
let HasModifiers = 1;
88+
class VOP3_VINTERP_F16_t16 <list<ValueType> ArgVT> : VOPProfile_True16<VOPProfile<ArgVT>> {
89+
let Src0Mod = FPT16VRegInputMods</*Fake16*/0>;
90+
let Src1Mod = FPVRegInputMods;
91+
let Src2Mod = !if(!eq(ArgVT[3].Size, 16), FPT16VRegInputMods</*Fake16*/0>,
92+
FPVRegInputMods);
93+
let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_16:$src0,
94+
Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
95+
Src2Mod:$src2_modifiers,
96+
!if(!eq(ArgVT[3].Size, 16), VRegSrc_16, VRegSrc_32):$src2,
97+
Clamp:$clamp, op_sel0:$op_sel,
98+
WaitEXP:$waitexp);
8999

90-
let Src0Mod = FPVRegInputMods;
100+
let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
101+
}
102+
103+
class VOP3_VINTERP_F16_fake16 <list<ValueType> ArgVT> : VOPProfile_Fake16<VOPProfile<ArgVT>> {
104+
let Src0Mod = FPT16VRegInputMods</*Fake16*/1>;
91105
let Src1Mod = FPVRegInputMods;
92-
let Src2Mod = FPVRegInputMods;
106+
let Src2Mod = !if(!eq(ArgVT[3].Size, 16), FPT16VRegInputMods</*Fake16*/1>,
107+
FPVRegInputMods);
93108

94-
let Outs64 = (outs VGPR_32:$vdst);
95-
let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
109+
let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_fake16:$src0,
96110
Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
97-
Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
111+
Src2Mod:$src2_modifiers,
112+
!if(!eq(ArgVT[3].Size, 16), VRegSrc_fake16, VRegSrc_32):$src2,
98113
Clamp:$clamp, op_sel0:$op_sel,
99114
WaitEXP:$waitexp);
100115

101-
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
116+
let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
102117
}
103118

104119
//===----------------------------------------------------------------------===//
@@ -107,20 +122,26 @@ class VOP3_VINTERP_F16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
107122

108123
let SubtargetPredicate = HasVINTERPEncoding in {
109124

125+
multiclass VINTERP_t16<string OpName, list<ValueType> ArgVT> {
126+
let True16Predicate = UseRealTrue16Insts in {
127+
def _t16 : VINTERP_Pseudo<OpName#"_t16", VOP3_VINTERP_F16_t16<ArgVT>> ;
128+
}
129+
let True16Predicate = UseFakeTrue16Insts in {
130+
def _fake16 : VINTERP_Pseudo<OpName#"_fake16", VOP3_VINTERP_F16_fake16<ArgVT>> ;
131+
}
132+
}
133+
110134
let Uses = [M0, EXEC, MODE] in {
111135
def V_INTERP_P10_F32_inreg : VINTERP_Pseudo <"v_interp_p10_f32", VOP3_VINTERP_F32>;
112136
def V_INTERP_P2_F32_inreg : VINTERP_Pseudo <"v_interp_p2_f32", VOP3_VINTERP_F32>;
113-
def V_INTERP_P10_F16_F32_inreg :
114-
VINTERP_Pseudo <"v_interp_p10_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
115-
def V_INTERP_P2_F16_F32_inreg :
116-
VINTERP_Pseudo <"v_interp_p2_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
137+
138+
defm V_INTERP_P10_F16_F32_inreg : VINTERP_t16<"v_interp_p10_f16_f32", [f32, f16, f32, f16]>;
139+
defm V_INTERP_P2_F16_F32_inreg : VINTERP_t16<"v_interp_p2_f16_f32", [f16, f16, f32, f32]>;
117140
} // Uses = [M0, EXEC, MODE]
118141

119142
let Uses = [M0, EXEC] in {
120-
def V_INTERP_P10_RTZ_F16_F32_inreg :
121-
VINTERP_Pseudo <"v_interp_p10_rtz_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
122-
def V_INTERP_P2_RTZ_F16_F32_inreg :
123-
VINTERP_Pseudo <"v_interp_p2_rtz_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
143+
defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_t16<"v_interp_p10_rtz_f16_f32", [f32, f16, f32, f16]>;
144+
defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_t16 <"v_interp_p2_rtz_f16_f32", [f16, f16, f32, f32]>;
124145
} // Uses = [M0, EXEC]
125146

126147
} // SubtargetPredicate = HasVINTERPEncoding.
@@ -137,11 +158,6 @@ class VInterpF32Pat <SDPatternOperator op, Instruction inst> : GCNPat <
137158
7) /* wait_exp */
138159
>;
139160

140-
def VINTERP_OPSEL {
141-
int LOW = 0;
142-
int HIGH = 0xa;
143-
}
144-
145161
class VInterpF16Pat <SDPatternOperator op, Instruction inst,
146162
ValueType dst_type, bit high,
147163
list<ComplexPattern> pat> : GCNPat <
@@ -167,45 +183,58 @@ multiclass VInterpF16Pat <SDPatternOperator op, Instruction inst,
167183

168184
def : VInterpF32Pat<int_amdgcn_interp_inreg_p10, V_INTERP_P10_F32_inreg>;
169185
def : VInterpF32Pat<int_amdgcn_interp_inreg_p2, V_INTERP_P2_F32_inreg>;
186+
187+
let True16Predicate = UseFakeTrue16Insts in {
170188
defm : VInterpF16Pat<int_amdgcn_interp_inreg_p10_f16,
171-
V_INTERP_P10_F16_F32_inreg, f32,
189+
V_INTERP_P10_F16_F32_inreg_fake16, f32,
172190
[VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
173191
defm : VInterpF16Pat<int_amdgcn_interp_inreg_p2_f16,
174-
V_INTERP_P2_F16_F32_inreg, f16,
192+
V_INTERP_P2_F16_F32_inreg_fake16, f16,
175193
[VINTERPModsHi, VINTERPMods, VINTERPMods]>;
176194
defm : VInterpF16Pat<int_amdgcn_interp_p10_rtz_f16,
177-
V_INTERP_P10_RTZ_F16_F32_inreg, f32,
195+
V_INTERP_P10_RTZ_F16_F32_inreg_fake16, f32,
178196
[VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
179197
defm : VInterpF16Pat<int_amdgcn_interp_p2_rtz_f16,
180-
V_INTERP_P2_RTZ_F16_F32_inreg, f16,
198+
V_INTERP_P2_RTZ_F16_F32_inreg_fake16, f16,
181199
[VINTERPModsHi, VINTERPMods, VINTERPMods]>;
200+
}
182201

183202
//===----------------------------------------------------------------------===//
184203
// VINTERP Real Instructions
185204
//===----------------------------------------------------------------------===//
186205

187-
multiclass VINTERP_Real_gfx11 <bits<7> op> {
188-
let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
206+
multiclass VINTERP_Real_gfx11 <bits<7> op, string asmName> {
207+
defvar ps = !cast<VOP3_Pseudo>(NAME);
208+
let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" #
209+
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
189210
def _gfx11 :
190-
VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX11>,
191-
VINTERPe_gfx11<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
211+
VINTERP_Real<ps, SIEncodingFamily.GFX11, asmName>,
212+
VINTERPe_gfx11<op, ps.Pfl>;
192213
}
193214
}
194215

195-
multiclass VINTERP_Real_gfx12 <bits<7> op> {
196-
let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12" in {
216+
multiclass VINTERP_Real_gfx12 <bits<7> op, string asmName> {
217+
defvar ps = !cast<VOP3_Pseudo>(NAME);
218+
let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12" #
219+
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
197220
def _gfx12 :
198-
VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX12>,
199-
VINTERPe_gfx12<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
221+
VINTERP_Real<ps, SIEncodingFamily.GFX12, asmName>,
222+
VINTERPe_gfx12<op, ps.Pfl>;
200223
}
201224
}
202225

203-
multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op> :
204-
VINTERP_Real_gfx11<op>, VINTERP_Real_gfx12<op>;
226+
multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op, string asmName = !cast<VOP3_Pseudo>(NAME).Mnemonic, string opName = NAME> :
227+
VINTERP_Real_gfx11<op, asmName>, VINTERP_Real_gfx12<op, asmName>;
228+
229+
multiclass VINTERP_Real_t16_and_fake16_gfx11_gfx12 <bits<7> op, string asmName = !cast<VOP3_Pseudo>(NAME).Mnemonic, string opName = NAME> {
230+
defm _t16: VINTERP_Real_gfx11_gfx12<op, asmName, opName#"_t16">;
231+
defm _fake16: VINTERP_Real_gfx11_gfx12<op, asmName, opName#"_fake16">;
232+
}
233+
205234

206235
defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11_gfx12<0x000>;
207236
defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11_gfx12<0x001>;
208-
defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x002>;
209-
defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x003>;
210-
defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x004>;
211-
defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x005>;
237+
defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x002, "v_interp_p10_f16_f32">;
238+
defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x003, "v_interp_p2_f16_f32">;
239+
defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x004, "v_interp_p10_rtz_f16_f32">;
240+
defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x005, "v_interp_p2_rtz_f16_f32">;

llvm/test/CodeGen/AMDGPU/waitcnt-vinterp.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ body: |
1515
; GFX11-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
1616
; GFX11-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 2, 0, implicit $m0, implicit $exec
1717
; GFX11-NEXT: $vgpr4 = LDS_PARAM_LOAD 0, 3, 0, implicit $m0, implicit $exec
18-
; GFX11-NEXT: $vgpr5 = V_INTERP_P10_F16_F32_inreg 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
19-
; GFX11-NEXT: $vgpr6 = V_INTERP_P10_F16_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
20-
; GFX11-NEXT: $vgpr7 = V_INTERP_P10_F16_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 1, implicit $m0, implicit $exec, implicit $mode
21-
; GFX11-NEXT: $vgpr8 = V_INTERP_P10_F16_F32_inreg 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $m0, implicit $exec, implicit $mode
18+
; GFX11-NEXT: $vgpr5 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
19+
; GFX11-NEXT: $vgpr6 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
20+
; GFX11-NEXT: $vgpr7 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 1, implicit $m0, implicit $exec, implicit $mode
21+
; GFX11-NEXT: $vgpr8 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $m0, implicit $exec, implicit $mode
2222
$vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
2323
$vgpr2 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
2424
$vgpr3 = LDS_PARAM_LOAD 0, 2, 0, implicit $m0, implicit $exec
2525
$vgpr4 = LDS_PARAM_LOAD 0, 3, 0, implicit $m0, implicit $exec
26-
$vgpr5 = V_INTERP_P10_F16_F32_inreg 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
27-
$vgpr6 = V_INTERP_P10_F16_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
28-
$vgpr7 = V_INTERP_P10_F16_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
29-
$vgpr8 = V_INTERP_P10_F16_F32_inreg 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
26+
$vgpr5 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
27+
$vgpr6 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
28+
$vgpr7 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
29+
$vgpr8 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
3030
...

0 commit comments

Comments
 (0)