Skip to content

Commit 4a2399e

Browse files
committed
[AMDGPU][True16][MC] true16/fake16 for more VOP1 instructions
1 parent 956591b commit 4a2399e

27 files changed

+5190
-2651
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 44 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -187,21 +187,17 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
187187
let HasClamp = 1;
188188
}
189189

190-
class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
191-
VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
192-
193-
let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod);
194-
let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod);
195-
let AsmVOP3Base = "$vdst, $src0$clamp$omod";
196-
197-
let HasModifiers = 0;
198-
let HasClamp = 1;
199-
}
200-
201190
def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
202191
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
203192
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
204-
def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>;
193+
def VOP1_F16_I16_t16 : VOPProfile_True16 <VOP_F16_I16> {
194+
let HasClamp = 1;
195+
}
196+
def VOP1_F16_I16_fake16 : VOPProfile_Fake16<VOP_F16_I16> {
197+
let HasModifiers = 0;
198+
let HasOMod = 1;
199+
let HasClamp = 1;
200+
}
205201

206202
def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{
207203
let HasExtVOP3DPP = 0;
@@ -217,10 +213,14 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
217213
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
218214
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
219215
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
220-
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
216+
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
217+
let HasOMod = 1;
218+
}
219+
def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> {
221220
let HasOMod = 1;
222221
}
223222

223+
224224
//===----------------------------------------------------------------------===//
225225
// VOP1 Instructions
226226
//===----------------------------------------------------------------------===//
@@ -479,24 +479,16 @@ let SubtargetPredicate = isGFX7Plus in {
479479
} // End isReMaterializable = 1
480480

481481
let FPDPRounding = 1 in {
482-
let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
483-
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
484-
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
485-
}
486-
let OtherPredicates = [HasTrue16BitInsts] in {
487-
defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>;
488-
defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>;
489-
}
482+
defm V_CVT_F16_U16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_u16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, uint_to_fp>;
483+
defm V_CVT_F16_I16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_i16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, sint_to_fp>;
484+
490485
} // End FPDPRounding = 1
491486
// OMod clears exceptions when set in these two instructions
492-
let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
493-
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>;
494-
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>;
495-
}
496-
let OtherPredicates = [HasTrue16BitInsts] in {
497-
defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>;
498-
defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>;
499-
}
487+
defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16",
488+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_uint>;
489+
defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16",
490+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>;
491+
500492
let TRANS = 1, SchedRW = [WriteTrans32] in {
501493
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
502494
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
@@ -507,12 +499,8 @@ defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
507499
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
508500
} // End TRANS = 1, SchedRW = [WriteTrans32]
509501
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
510-
let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
511-
defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>;
512-
}
513-
let OtherPredicates = [HasTrue16BitInsts] in {
514-
defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>;
515-
}
502+
defm V_FREXP_EXP_I16_F16 : VOP1Inst_t16_with_profiles <"v_frexp_exp_i16_f16",
503+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, int_amdgcn_frexp_exp>;
516504
defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>;
517505
defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>;
518506
defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>;
@@ -560,14 +548,10 @@ let SubtargetPredicate = isGFX9Plus in {
560548
defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>;
561549

562550
let mayRaiseFPException = 0 in {
563-
let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in {
564-
defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>;
565-
defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>;
566-
}
567-
let OtherPredicates = [HasTrue16BitInsts] in {
568-
defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
569-
defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>;
570-
}
551+
defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16",
552+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
553+
defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16",
554+
VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>;
571555
} // End mayRaiseFPException = 0
572556
} // End SubtargetPredicate = isGFX9Plus
573557

@@ -939,6 +923,14 @@ multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName,
939923
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
940924
VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
941925

926+
multiclass VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<bits<9> op, string asmName,
927+
string opName = NAME> {
928+
defm opName#"_t16" :
929+
VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
930+
defm opName#"_fake16":
931+
VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
932+
}
933+
942934
multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> :
943935
VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>;
944936

@@ -979,10 +971,10 @@ defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16"
979971
defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
980972
defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
981973

982-
defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
983-
defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
984-
defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
985-
defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
974+
defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
975+
defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
976+
defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
977+
defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
986978
defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
987979
defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
988980
defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
@@ -994,7 +986,7 @@ defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
994986
defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
995987
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
996988
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
997-
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
989+
defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
998990
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
999991
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
1000992
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
@@ -1005,13 +997,11 @@ defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f1
1005997
defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
1006998
defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
1007999
defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
1008-
defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
1009-
defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
1000+
defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
1001+
defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
10101002

1011-
defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1012-
defm V_CVT_F16_F32_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1013-
defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1014-
defm V_CVT_F32_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
1003+
defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">;
1004+
defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_f16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">;
10151005

10161006
//===----------------------------------------------------------------------===//
10171007
// GFX10.

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ body: |
77
bb.0:
88
; GCN-LABEL: name: cvt_hi_f32_f16
99
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
10-
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_t16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
10+
; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
1111
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
12-
; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_t16_e64_]], implicit $exec
12+
; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_fake16_e64_]], implicit $exec
1313
; GCN-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_fake16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
1414
%0:vgpr_32 = IMPLICIT_DEF
15-
%1:vgpr_32 = V_CVT_F16_U16_t16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
15+
%1:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
1616
%2:sreg_32 = COPY %1:vgpr_32
1717
%3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
1818
...

0 commit comments

Comments
 (0)