11
11
//===----------------------------------------------------------------------===//
12
12
13
13
class VINTERPe <VOPProfile P> : Enc64 {
14
- bits<8 > vdst;
14
+ bits<11 > vdst;
15
15
bits<4> src0_modifiers;
16
- bits<9 > src0;
16
+ bits<11 > src0;
17
17
bits<3> src1_modifiers;
18
- bits<9 > src1;
18
+ bits<11 > src1;
19
19
bits<3> src2_modifiers;
20
- bits<9 > src2;
20
+ bits<11 > src2;
21
21
bits<1> clamp;
22
22
bits<3> waitexp;
23
23
24
24
let Inst{31-26} = 0x33; // VOP3P encoding
25
25
let Inst{25-24} = 0x1; // VINTERP sub-encoding
26
26
27
- let Inst{7-0} = vdst;
27
+ let Inst{7-0} = vdst{7-0} ;
28
28
let Inst{10-8} = waitexp;
29
- let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
30
- let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
31
- let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
32
- let Inst{14} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel(3)
29
+ // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix
30
+ let Inst{11} = !if(P.HasSrc0, src0_modifiers{2}, 0);
31
+ let Inst{12} = !if(P.HasSrc1, src1_modifiers{2}, 0);
32
+ let Inst{13} = !if(P.HasSrc2, src2_modifiers{2}, 0);
33
+ let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0);
33
34
let Inst{15} = clamp;
34
- let Inst{40-32} = src0;
35
- let Inst{49-41} = src1;
36
- let Inst{58-50} = src2;
35
+ let Inst{40-32} = src0{8-0} ;
36
+ let Inst{49-41} = src1{8-0} ;
37
+ let Inst{58-50} = src2{8-0} ;
37
38
let Inst{61} = src0_modifiers{0}; // neg(0)
38
39
let Inst{62} = src1_modifiers{0}; // neg(1)
39
40
let Inst{63} = src2_modifiers{0}; // neg(2)
@@ -60,9 +61,10 @@ class VINTERP_Pseudo <string OpName, VOPProfile P, list<dag> pattern = []> :
60
61
let VINTERP = 1;
61
62
}
62
63
63
- class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily> :
64
- VOP3_Real <ps, EncodingFamily> {
64
+ class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily, string asmName > :
65
+ VOP3_Real <ps, EncodingFamily, asmName > {
65
66
let VINTERP = 1;
67
+ let IsSingle = 1;
66
68
}
67
69
68
70
def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
@@ -83,44 +85,64 @@ def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
83
85
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$waitexp";
84
86
}
85
87
86
- class VOP3_VINTERP_F16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
87
- let HasOpSel = 1;
88
- let HasModifiers = 1;
88
+ class VOP3_VINTERP_F16_t16 <list<ValueType> ArgVT> : VOPProfile_True16<VOPProfile<ArgVT>> {
89
+ let Src0Mod = FPT16VRegInputMods</*Fake16*/0>;
90
+ let Src1Mod = FPVRegInputMods;
91
+ let Src2Mod = !if(!eq(ArgVT[3].Size, 16), FPT16VRegInputMods</*Fake16*/0>,
92
+ FPVRegInputMods);
93
+ let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_16:$src0,
94
+ Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
95
+ Src2Mod:$src2_modifiers,
96
+ !if(!eq(ArgVT[3].Size, 16), VRegSrc_16, VRegSrc_32):$src2,
97
+ Clamp:$clamp, op_sel0:$op_sel,
98
+ WaitEXP:$waitexp);
89
99
90
- let Src0Mod = FPVRegInputMods;
100
+ let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
101
+ }
102
+
103
+ class VOP3_VINTERP_F16_fake16 <list<ValueType> ArgVT> : VOPProfile_Fake16<VOPProfile<ArgVT>> {
104
+ let Src0Mod = FPT16VRegInputMods</*Fake16*/1>;
91
105
let Src1Mod = FPVRegInputMods;
92
- let Src2Mod = FPVRegInputMods;
106
+ let Src2Mod = !if(!eq(ArgVT[3].Size, 16), FPT16VRegInputMods</*Fake16*/1>,
107
+ FPVRegInputMods);
93
108
94
- let Outs64 = (outs VGPR_32:$vdst);
95
- let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
109
+ let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_fake16:$src0,
96
110
Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
97
- Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
111
+ Src2Mod:$src2_modifiers,
112
+ !if(!eq(ArgVT[3].Size, 16), VRegSrc_fake16, VRegSrc_32):$src2,
98
113
Clamp:$clamp, op_sel0:$op_sel,
99
114
WaitEXP:$waitexp);
100
115
101
- let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
102
- }
116
+ let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
117
+ }
118
+
119
+
103
120
104
121
//===----------------------------------------------------------------------===//
105
122
// VINTERP Pseudo Instructions
106
123
//===----------------------------------------------------------------------===//
107
-
108
124
let SubtargetPredicate = HasVINTERPEncoding in {
109
125
126
+ multiclass VINTERP_t16<string OpName, list<ValueType> ArgVT> {
127
+ let True16Predicate = UseRealTrue16Insts in {
128
+ def _t16 : VINTERP_Pseudo<OpName#"_t16", VOP3_VINTERP_F16_t16<ArgVT>> ;
129
+ }
130
+ let True16Predicate = UseFakeTrue16Insts in {
131
+ def _fake16 : VINTERP_Pseudo<OpName#"_fake16", VOP3_VINTERP_F16_fake16<ArgVT>> ;
132
+ }
133
+ }
134
+
110
135
let Uses = [M0, EXEC, MODE] in {
111
136
def V_INTERP_P10_F32_inreg : VINTERP_Pseudo <"v_interp_p10_f32", VOP3_VINTERP_F32>;
112
137
def V_INTERP_P2_F32_inreg : VINTERP_Pseudo <"v_interp_p2_f32", VOP3_VINTERP_F32>;
113
- def V_INTERP_P10_F16_F32_inreg :
114
- VINTERP_Pseudo <"v_interp_p10_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
115
- def V_INTERP_P2_F16_F32_inreg :
116
- VINTERP_Pseudo <"v_interp_p2_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
138
+
139
+ defm V_INTERP_P10_F16_F32_inreg : VINTERP_t16<"v_interp_p10_f16_f32", [f32, f16, f32, f16]>;
140
+ defm V_INTERP_P2_F16_F32_inreg : VINTERP_t16<"v_interp_p2_f16_f32", [f16, f16, f32, f32]>;
117
141
} // Uses = [M0, EXEC, MODE]
118
142
119
143
let Uses = [M0, EXEC] in {
120
- def V_INTERP_P10_RTZ_F16_F32_inreg :
121
- VINTERP_Pseudo <"v_interp_p10_rtz_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
122
- def V_INTERP_P2_RTZ_F16_F32_inreg :
123
- VINTERP_Pseudo <"v_interp_p2_rtz_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
144
+ defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_t16<"v_interp_p10_rtz_f16_f32", [f32, f16, f32, f16]>;
145
+ defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_t16 <"v_interp_p2_rtz_f16_f32", [f16, f16, f32, f32]>;
124
146
} // Uses = [M0, EXEC]
125
147
126
148
} // SubtargetPredicate = HasVINTERPEncoding.
@@ -137,11 +159,6 @@ class VInterpF32Pat <SDPatternOperator op, Instruction inst> : GCNPat <
137
159
7) /* wait_exp */
138
160
>;
139
161
140
- def VINTERP_OPSEL {
141
- int LOW = 0;
142
- int HIGH = 0xa;
143
- }
144
-
145
162
class VInterpF16Pat <SDPatternOperator op, Instruction inst,
146
163
ValueType dst_type, bit high,
147
164
list<ComplexPattern> pat> : GCNPat <
@@ -167,45 +184,60 @@ multiclass VInterpF16Pat <SDPatternOperator op, Instruction inst,
167
184
168
185
def : VInterpF32Pat<int_amdgcn_interp_inreg_p10, V_INTERP_P10_F32_inreg>;
169
186
def : VInterpF32Pat<int_amdgcn_interp_inreg_p2, V_INTERP_P2_F32_inreg>;
187
+
188
+ let True16Predicate = UseFakeTrue16Insts in {
170
189
defm : VInterpF16Pat<int_amdgcn_interp_inreg_p10_f16,
171
- V_INTERP_P10_F16_F32_inreg , f32,
190
+ V_INTERP_P10_F16_F32_inreg_fake16 , f32,
172
191
[VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
173
192
defm : VInterpF16Pat<int_amdgcn_interp_inreg_p2_f16,
174
- V_INTERP_P2_F16_F32_inreg , f16,
193
+ V_INTERP_P2_F16_F32_inreg_fake16 , f16,
175
194
[VINTERPModsHi, VINTERPMods, VINTERPMods]>;
176
195
defm : VInterpF16Pat<int_amdgcn_interp_p10_rtz_f16,
177
- V_INTERP_P10_RTZ_F16_F32_inreg , f32,
196
+ V_INTERP_P10_RTZ_F16_F32_inreg_fake16 , f32,
178
197
[VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
179
198
defm : VInterpF16Pat<int_amdgcn_interp_p2_rtz_f16,
180
- V_INTERP_P2_RTZ_F16_F32_inreg , f16,
199
+ V_INTERP_P2_RTZ_F16_F32_inreg_fake16 , f16,
181
200
[VINTERPModsHi, VINTERPMods, VINTERPMods]>;
201
+ }
182
202
183
203
//===----------------------------------------------------------------------===//
184
204
// VINTERP Real Instructions
185
205
//===----------------------------------------------------------------------===//
186
206
187
- multiclass VINTERP_Real_gfx11 <bits<7> op> {
188
- let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
189
- def _gfx11 :
190
- VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX11>,
191
- VINTERPe_gfx11<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
207
+ multiclass VINTERP_Real_gfx11 <bits<7> op, string asmName> {
208
+ defvar ps = !cast<VOP3_Pseudo>(NAME);
209
+ let AssemblerPredicate = isGFX11Only,
210
+ DecoderNamespace = "GFX11" #
211
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
212
+ def _gfx11 :
213
+ VINTERP_Real<ps, SIEncodingFamily.GFX11, asmName>,
214
+ VINTERPe_gfx11<op, ps.Pfl>;
192
215
}
193
216
}
194
217
195
- multiclass VINTERP_Real_gfx12 <bits<7> op> {
196
- let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12" in {
197
- def _gfx12 :
198
- VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX12>,
199
- VINTERPe_gfx12<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
218
+ multiclass VINTERP_Real_gfx12 <bits<7> op, string asmName> {
219
+ defvar ps = !cast<VOP3_Pseudo>(NAME);
220
+ let AssemblerPredicate = isGFX12Only,
221
+ DecoderNamespace = "GFX12" #
222
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
223
+ def _gfx12 :
224
+ VINTERP_Real<ps, SIEncodingFamily.GFX12, asmName>,
225
+ VINTERPe_gfx12<op, ps.Pfl>;
200
226
}
201
227
}
202
228
203
- multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op> :
204
- VINTERP_Real_gfx11<op>, VINTERP_Real_gfx12<op>;
229
+ multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op, string asmName = !cast<VOP3_Pseudo>(NAME).Mnemonic, string opName = NAME> :
230
+ VINTERP_Real_gfx11<op, asmName>, VINTERP_Real_gfx12<op, asmName>;
231
+
232
+ multiclass VINTERP_Real_t16_and_fake16_gfx11_gfx12 <bits<7> op, string asmName = !cast<VOP3_Pseudo>(NAME).Mnemonic, string opName = NAME> {
233
+ defm _t16: VINTERP_Real_gfx11_gfx12<op, asmName, opName#"_t16">;
234
+ defm _fake16: VINTERP_Real_gfx11_gfx12<op, asmName, opName#"_fake16">;
235
+ }
236
+
205
237
206
238
defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11_gfx12<0x000>;
207
239
defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11_gfx12<0x001>;
208
- defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11_gfx12 <0x002>;
209
- defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11_gfx12 <0x003>;
210
- defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12 <0x004>;
211
- defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12 <0x005>;
240
+ defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12 <0x002, "v_interp_p10_f16_f32" >;
241
+ defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12 <0x003, "v_interp_p2_f16_f32" >;
242
+ defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12 <0x004, "v_interp_p10_rtz_f16_f32" >;
243
+ defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12 <0x005, "v_interp_p2_rtz_f16_f32" >;
0 commit comments