@@ -187,21 +187,17 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
187
187
let HasClamp = 1;
188
188
}
189
189
190
- class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
191
- VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
192
-
193
- let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod);
194
- let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod);
195
- let AsmVOP3Base = "$vdst, $src0$clamp$omod";
196
-
197
- let HasModifiers = 0;
198
- let HasClamp = 1;
199
- }
200
-
201
190
def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
202
191
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
203
192
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
204
- def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>;
193
+ def VOP1_F16_I16_t16 : VOPProfile_True16 <VOP_F16_I16> {
194
+ let HasClamp = 1;
195
+ }
196
+ def VOP1_F16_I16_fake16 : VOPProfile_Fake16<VOP_F16_I16> {
197
+ let HasModifiers = 0;
198
+ let HasOMod = 1;
199
+ let HasClamp = 1;
200
+ }
205
201
206
202
def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{
207
203
let HasExtVOP3DPP = 0;
@@ -217,10 +213,14 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
217
213
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
218
214
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
219
215
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
220
- def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
216
+ def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
217
+ let HasOMod = 1;
218
+ }
219
+ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
221
220
let HasOMod = 1;
222
221
}
223
222
223
+
224
224
//===----------------------------------------------------------------------===//
225
225
// VOP1 Instructions
226
226
//===----------------------------------------------------------------------===//
@@ -479,24 +479,16 @@ let SubtargetPredicate = isGFX7Plus in {
479
479
} // End isReMaterializable = 1
480
480
481
481
let FPDPRounding = 1 in {
482
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
483
- defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
484
- defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
485
- }
486
- let OtherPredicates = [HasTrue16BitInsts] in {
487
- defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>;
488
- defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>;
489
- }
482
+ defm V_CVT_F16_U16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_u16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, uint_to_fp>;
483
+ defm V_CVT_F16_I16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_i16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, sint_to_fp>;
484
+
490
485
} // End FPDPRounding = 1
491
486
// OMod clears exceptions when set in these two instructions
492
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
493
- defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>;
494
- defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>;
495
- }
496
- let OtherPredicates = [HasTrue16BitInsts] in {
497
- defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>;
498
- defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>;
499
- }
487
+ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
488
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_uint>;
489
+ defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
490
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
491
+
500
492
let TRANS = 1, SchedRW = [WriteTrans32] in {
501
493
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
502
494
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -507,12 +499,8 @@ defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
507
499
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
508
500
} // End TRANS = 1, SchedRW = [WriteTrans32]
509
501
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
510
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
511
- defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>;
512
- }
513
- let OtherPredicates = [HasTrue16BitInsts] in {
514
- defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>;
515
- }
502
+ defm V_FREXP_EXP_I16_F16 : VOP1Inst_t16_with_profiles <"v_frexp_exp_i16_f16",
503
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, int_amdgcn_frexp_exp>;
516
504
defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
517
505
defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
518
506
defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
@@ -560,14 +548,10 @@ let SubtargetPredicate = isGFX9Plus in {
560
548
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
561
549
562
550
let mayRaiseFPException = 0 in {
563
- let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
564
- defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>;
565
- defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>;
566
- }
567
- let OtherPredicates = [HasTrue16BitInsts] in {
568
- defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
569
- defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
570
- }
551
+ defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
552
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
553
+ defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
554
+ VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
571
555
} // End mayRaiseFPException = 0
572
556
} // End SubtargetPredicate = isGFX9Plus
573
557
@@ -939,6 +923,14 @@ multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName,
939
923
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
940
924
VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
941
925
926
+ multiclass VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<bits<9> op, string asmName,
927
+ string opName = NAME> {
928
+ defm opName#"_t16" :
929
+ VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
930
+ defm opName#"_fake16":
931
+ VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
932
+ }
933
+
942
934
multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> :
943
935
VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>;
944
936
@@ -979,10 +971,10 @@ defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16"
979
971
defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
980
972
defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
981
973
982
- defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x050, "v_cvt_f16_u16">;
983
- defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x051, "v_cvt_f16_i16">;
984
- defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x052, "v_cvt_u16_f16">;
985
- defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x053, "v_cvt_i16_f16">;
974
+ defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x050, "v_cvt_f16_u16">;
975
+ defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x051, "v_cvt_f16_i16">;
976
+ defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x052, "v_cvt_u16_f16">;
977
+ defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x053, "v_cvt_i16_f16">;
986
978
defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
987
979
defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
988
980
defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
@@ -994,7 +986,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
994
986
defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
995
987
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
996
988
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
997
- defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x05a, "v_frexp_exp_i16_f16">;
989
+ defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x05a, "v_frexp_exp_i16_f16">;
998
990
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
999
991
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
1000
992
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
@@ -1005,13 +997,11 @@ defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f1
1005
997
defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
1006
998
defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
1007
999
defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
1008
- defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x063, "v_cvt_norm_i16_f16">;
1009
- defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12 <0x064, "v_cvt_norm_u16_f16">;
1000
+ defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x063, "v_cvt_norm_i16_f16">;
1001
+ defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12 <0x064, "v_cvt_norm_u16_f16">;
1010
1002
1011
- defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1012
- defm V_CVT_F16_F32_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1013
- defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1014
- defm V_CVT_F32_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1003
+ defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1004
+ defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1015
1005
1016
1006
//===----------------------------------------------------------------------===//
1017
1007
// GFX10.
0 commit comments