@@ -188,7 +188,7 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
188
188
}
189
189
190
190
class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
191
- VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
191
+ VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>, 1/*userealtruespace*/ > {
192
192
193
193
let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod);
194
194
let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod);
@@ -202,6 +202,11 @@ def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
202
202
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
203
203
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
204
204
def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>;
205
+ def VOP1_F16_I16_fake16 : VOPProfile_Fake16 <VOP_F16_I16> {
206
+ let HasModifiers = 0;
207
+ let HasOMod = 1;
208
+ let HasClamp = 1;
209
+ }
205
210
206
211
def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{
207
212
let HasExtVOP3DPP = 0;
@@ -217,7 +222,10 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
217
222
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
218
223
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
219
224
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
220
- def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
225
+ def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16, 1/*userealtruespace*/> {
226
+ let HasOMod = 1;
227
+ }
228
+ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
221
229
let HasOMod = 1;
222
230
}
223
231
@@ -294,16 +302,22 @@ defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
294
302
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>;
295
303
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>;
296
304
let FPDPRounding = 1, isReMaterializable = 0 in {
305
+ // V_CVT_F16_F32 and V_CVT_F32_F16 are a special case because they are
306
+ // present in targets without Has16BitInsts. Otherwise they could use
307
+ // class VOP1Inst_t16.
297
308
let OtherPredicates = [NotHasTrue16BitInsts] in
298
- defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
299
- let OtherPredicates = [HasTrue16BitInsts] in
300
- defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
309
+ defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
310
+ let OtherPredicates = [UseRealTrue16Insts] in
311
+ defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32, 1/*userealtruespace*/>, any_fpround>;
312
+ let OtherPredicates = [UseFakeTrue16Insts] in
313
+ defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
301
314
} // End FPDPRounding = 1, isReMaterializable = 0
302
-
303
315
let OtherPredicates = [NotHasTrue16BitInsts] in
304
- defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
305
- let OtherPredicates = [HasTrue16BitInsts] in
306
- defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
316
+ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
317
+ let OtherPredicates = [UseRealTrue16Insts] in
318
+ defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16, 1/*userealtruespace*/>, any_fpextend>;
319
+ let OtherPredicates = [UseFakeTrue16Insts] in
320
+ defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
307
321
308
322
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
309
323
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
@@ -473,24 +487,15 @@ let SubtargetPredicate = isGFX7Plus in {
473
487
} // End isReMaterializable = 1
474
488
475
489
let FPDPRounding = 1 in {
476
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
477
- defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
478
- defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
479
- }
480
- let OtherPredicates = [HasTrue16BitInsts] in {
481
- defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>;
482
- defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>;
483
- }
490
+ defm V_CVT_F16_U16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_u16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, uint_to_fp>;
491
+ defm V_CVT_F16_I16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_i16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, sint_to_fp>;
484
492
} // End FPDPRounding = 1
485
493
// OMod clears exceptions when set in these two instructions
486
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
487
- defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>;
488
- defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>;
489
- }
490
- let OtherPredicates = [HasTrue16BitInsts] in {
491
- defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>;
492
- defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>;
493
- }
494
+ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
495
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_uint>;
496
+ defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
497
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
498
+
494
499
let TRANS = 1, SchedRW = [WriteTrans32] in {
495
500
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
496
501
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -501,12 +506,8 @@ defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
501
506
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
502
507
} // End TRANS = 1, SchedRW = [WriteTrans32]
503
508
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
504
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
505
- defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>;
506
- }
507
- let OtherPredicates = [HasTrue16BitInsts] in {
508
- defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>;
509
- }
509
+ defm V_FREXP_EXP_I16_F16 : VOP1Inst_t16_with_profiles <"v_frexp_exp_i16_f16",
510
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, int_amdgcn_frexp_exp>;
510
511
defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
511
512
defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
512
513
defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
@@ -525,7 +526,7 @@ def : GCNPat<
525
526
(V_CVT_F16_F32_e32 $src)
526
527
>;
527
528
}
528
- let OtherPredicates = [HasTrue16BitInsts ] in {
529
+ let OtherPredicates = [UseRealTrue16Insts ] in {
529
530
def : GCNPat<
530
531
(f32 (f16_to_fp i16:$src)),
531
532
(V_CVT_F32_F16_t16_e32 $src)
@@ -535,6 +536,16 @@ def : GCNPat<
535
536
(V_CVT_F16_F32_t16_e32 $src)
536
537
>;
537
538
}
539
+ let OtherPredicates = [UseFakeTrue16Insts] in {
540
+ def : GCNPat<
541
+ (f32 (f16_to_fp i16:$src)),
542
+ (V_CVT_F32_F16_fake16_e32 $src)
543
+ >;
544
+ def : GCNPat<
545
+ (i16 (AMDGPUfp_to_f16 f32:$src)),
546
+ (V_CVT_F16_F32_fake16_e32 $src)
547
+ >;
548
+ }
538
549
539
550
def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> {
540
551
let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1);
@@ -554,14 +565,10 @@ let SubtargetPredicate = isGFX9Plus in {
554
565
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
555
566
556
567
let mayRaiseFPException = 0 in {
557
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
558
- defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>;
559
- defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>;
560
- }
561
- let OtherPredicates = [HasTrue16BitInsts] in {
562
- defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
563
- defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
564
- }
568
+ defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
569
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
570
+ defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
571
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
565
572
} // End mayRaiseFPException = 0
566
573
} // End SubtargetPredicate = isGFX9Plus
567
574
@@ -975,9 +982,13 @@ defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_
975
982
defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
976
983
977
984
defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
985
+ defm V_CVT_F16_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
978
986
defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
987
+ defm V_CVT_F16_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
979
988
defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
989
+ defm V_CVT_U16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
980
990
defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
991
+ defm V_CVT_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
981
992
defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
982
993
defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
983
994
defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
@@ -990,6 +1001,7 @@ defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16"
990
1001
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
991
1002
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
992
1003
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
1004
+ defm V_FREXP_EXP_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
993
1005
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
994
1006
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
995
1007
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
@@ -1001,10 +1013,14 @@ defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16"
1001
1013
defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
1002
1014
defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
1003
1015
defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
1016
+ defm V_CVT_NORM_I16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
1004
1017
defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
1018
+ defm V_CVT_NORM_U16_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
1005
1019
1006
1020
defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1021
+ defm V_CVT_F16_F32_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1007
1022
defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1023
+ defm V_CVT_F32_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1008
1024
1009
1025
//===----------------------------------------------------------------------===//
1010
1026
// GFX10.
0 commit comments