Skip to content

Commit ffa9d95

Browse files
committed
added fake16 to vop1 isa
1 parent 5e32698 commit ffa9d95

9 files changed

+505
-433
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,9 +1748,11 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
17481748
(ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
17491749
/* else */,
17501750
// VOP1 without modifiers
1751-
!if (HasClamp,
1752-
(ins Src0RC:$src0, Clamp0:$clamp),
1753-
(ins Src0RC:$src0))
1751+
!if(HasOMod,
1752+
(ins Src0RC:$src0, Clamp0:$clamp, omod0:$omod),
1753+
!if (HasClamp,
1754+
(ins Src0RC:$src0, Clamp0:$clamp),
1755+
(ins Src0RC:$src0)))
17541756
/* endif */ ),
17551757
!if (!eq(NumSrcArgs, 2),
17561758
!if (HasModifiers,
@@ -2533,11 +2535,20 @@ class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
25332535
let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret;
25342536
}
25352537

2536-
class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
2538+
class VOPProfile_Fake16<VOPProfile P, bit UseRealTrueDecoderSpace=0> : VOPProfile<P.ArgVT> {
25372539
let IsTrue16 = 1;
2540+
2541+
// FIXME-TRUE16. Some non-implememted true16 is
2542+
// using fake16 profile. However this creates a conflicts
2543+
// with fake16 decoder namespace.
2544+
// This is a hack to avoid decoderNamespace issue
2545+
// should be removed after true16 pesudo is fully supported
2546+
let IsRealTrue16 = !if(UseRealTrueDecoderSpace,1,0);
2547+
25382548
// Most DstVT are 16-bit, but not all
25392549
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
25402550
let DstRC64 = getVALUDstForVT<DstVT>.ret;
2551+
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
25412552
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
25422553
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
25432554
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1173,9 +1173,12 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
11731173
let SubtargetPredicate = NotHasTrue16BitInsts in
11741174
defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
11751175

1176-
let SubtargetPredicate = HasTrue16BitInsts in
1176+
let SubtargetPredicate = UseRealTrue16Insts in
11771177
defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64>;
11781178

1179+
let SubtargetPredicate = UseFakeTrue16Insts in
1180+
defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
1181+
11791182
//===----------------------------------------------------------------------===//
11801183
// VOP2 Patterns
11811184
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
188188
}
189189

190190
class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
191-
VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
191+
VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>, 1/*userealtruespace*/> {
192192

193193
let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod);
194194
let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod);
@@ -202,6 +202,11 @@ def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
202202
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
203203
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
204204
def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>;
205+
def VOP1_F16_I16_fake16 : VOPProfile_Fake16 <VOP_F16_I16> {
206+
let HasModifiers = 0;
207+
let HasOMod = 1;
208+
let HasClamp = 1;
209+
}
205210

206211
def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{
207212
let HasExtVOP3DPP = 0;
@@ -217,7 +222,10 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
217222
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
218223
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
219224
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
220-
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
225+
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16, 1/*userealtruespace*/> {
226+
let HasOMod = 1;
227+
}
228+
def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
221229
let HasOMod = 1;
222230
}
223231

@@ -294,16 +302,22 @@ defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
294302
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>;
295303
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>;
296304
let FPDPRounding = 1, isReMaterializable = 0 in {
305+
// V_CVT_F16_F32 and V_CVT_F32_F16 are a special case because they are
306+
// present in targets without Has16BitInsts. Otherwise they could use
307+
// class VOP1Inst_t16.
297308
let OtherPredicates = [NotHasTrue16BitInsts] in
298-
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
299-
let OtherPredicates = [HasTrue16BitInsts] in
300-
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
309+
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
310+
let OtherPredicates = [UseRealTrue16Insts] in
311+
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32, 1/*userealtruespace*/>, any_fpround>;
312+
let OtherPredicates = [UseFakeTrue16Insts] in
313+
defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
301314
} // End FPDPRounding = 1, isReMaterializable = 0
302-
303315
let OtherPredicates = [NotHasTrue16BitInsts] in
304-
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
305-
let OtherPredicates = [HasTrue16BitInsts] in
306-
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
316+
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
317+
let OtherPredicates = [UseRealTrue16Insts] in
318+
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16, 1/*userealtruespace*/>, any_fpextend>;
319+
let OtherPredicates = [UseFakeTrue16Insts] in
320+
defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
307321

308322
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
309323
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
@@ -473,24 +487,15 @@ let SubtargetPredicate = isGFX7Plus in {
473487
} // End isReMaterializable = 1
474488

475489
let FPDPRounding = 1 in {
476-
let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
477-
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
478-
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
479-
}
480-
let OtherPredicates = [HasTrue16BitInsts] in {
481-
defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>;
482-
defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>;
483-
}
490+
defm V_CVT_F16_U16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_u16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, uint_to_fp>;
491+
defm V_CVT_F16_I16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_i16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, sint_to_fp>;
484492
} // End FPDPRounding = 1
485493
// OMod clears exceptions when set in these two instructions
486-
let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
487-
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>;
488-
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>;
489-
}
490-
let OtherPredicates = [HasTrue16BitInsts] in {
491-
defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>;
492-
defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>;
493-
}
494+
defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
495+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_uint>;
496+
defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
497+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
498+
494499
let TRANS = 1, SchedRW = [WriteTrans32] in {
495500
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
496501
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -501,12 +506,8 @@ defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
501506
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
502507
} // End TRANS = 1, SchedRW = [WriteTrans32]
503508
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
504-
let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
505-
defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>;
506-
}
507-
let OtherPredicates = [HasTrue16BitInsts] in {
508-
defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>;
509-
}
509+
defm V_FREXP_EXP_I16_F16 : VOP1Inst_t16_with_profiles <"v_frexp_exp_i16_f16",
510+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, int_amdgcn_frexp_exp>;
510511
defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
511512
defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
512513
defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
@@ -525,7 +526,7 @@ def : GCNPat<
525526
(V_CVT_F16_F32_e32 $src)
526527
>;
527528
}
528-
let OtherPredicates = [HasTrue16BitInsts] in {
529+
let OtherPredicates = [UseRealTrue16Insts] in {
529530
def : GCNPat<
530531
(f32 (f16_to_fp i16:$src)),
531532
(V_CVT_F32_F16_t16_e32 $src)
@@ -535,6 +536,16 @@ def : GCNPat<
535536
(V_CVT_F16_F32_t16_e32 $src)
536537
>;
537538
}
539+
let OtherPredicates = [UseFakeTrue16Insts] in {
540+
def : GCNPat<
541+
(f32 (f16_to_fp i16:$src)),
542+
(V_CVT_F32_F16_fake16_e32 $src)
543+
>;
544+
def : GCNPat<
545+
(i16 (AMDGPUfp_to_f16 f32:$src)),
546+
(V_CVT_F16_F32_fake16_e32 $src)
547+
>;
548+
}
538549

539550
def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> {
540551
let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1);
@@ -554,14 +565,10 @@ let SubtargetPredicate = isGFX9Plus in {
554565
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
555566

556567
let mayRaiseFPException = 0 in {
557-
let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
558-
defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>;
559-
defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>;
560-
}
561-
let OtherPredicates = [HasTrue16BitInsts] in {
562-
defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
563-
defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
564-
}
568+
defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
569+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
570+
defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
571+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
565572
} // End mayRaiseFPException = 0
566573
} // End SubtargetPredicate = isGFX9Plus
567574

@@ -975,9 +982,13 @@ defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_
975982
defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
976983

977984
defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
985+
defm V_CVT_F16_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
978986
defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
987+
defm V_CVT_F16_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
979988
defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
989+
defm V_CVT_U16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
980990
defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
991+
defm V_CVT_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
981992
defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
982993
defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
983994
defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
@@ -990,6 +1001,7 @@ defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16"
9901001
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
9911002
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
9921003
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
1004+
defm V_FREXP_EXP_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
9931005
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
9941006
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
9951007
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
@@ -1001,10 +1013,14 @@ defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16"
10011013
defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
10021014
defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
10031015
defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
1016+
defm V_CVT_NORM_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
10041017
defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
1018+
defm V_CVT_NORM_U16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
10051019

10061020
defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1021+
defm V_CVT_F16_F32_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
10071022
defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1023+
defm V_CVT_F32_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
10081024

10091025
//===----------------------------------------------------------------------===//
10101026
// GFX10.

0 commit comments

Comments
 (0)