Skip to content

Commit 605ea1c

Browse files
committed
[AMDGPU][True16][CodeGen]Support fp conversion in true/fake16 format
1 parent 00a4042 commit 605ea1c

19 files changed

+1358
-851
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5424,9 +5424,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54245424
case AMDGPU::S_CVT_F32_U32: return AMDGPU::V_CVT_F32_U32_e64;
54255425
case AMDGPU::S_CVT_I32_F32: return AMDGPU::V_CVT_I32_F32_e64;
54265426
case AMDGPU::S_CVT_U32_F32: return AMDGPU::V_CVT_U32_F32_e64;
5427-
case AMDGPU::S_CVT_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
5428-
case AMDGPU::S_CVT_HI_F32_F16: return AMDGPU::V_CVT_F32_F16_t16_e64;
5429-
case AMDGPU::S_CVT_F16_F32: return AMDGPU::V_CVT_F16_F32_t16_e64;
5427+
case AMDGPU::S_CVT_F32_F16:
5428+
case AMDGPU::S_CVT_HI_F32_F16:
5429+
return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5430+
: AMDGPU::V_CVT_F32_F16_fake16_e64;
5431+
case AMDGPU::S_CVT_F16_F32:
5432+
return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5433+
: AMDGPU::V_CVT_F16_F32_fake16_e64;
54305434
case AMDGPU::S_CEIL_F32: return AMDGPU::V_CEIL_F32_e64;
54315435
case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
54325436
case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,9 +1747,11 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
17471747
(ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
17481748
/* else */,
17491749
// VOP1 without modifiers
1750-
!if (HasClamp,
1751-
(ins Src0RC:$src0, Clamp0:$clamp),
1752-
(ins Src0RC:$src0))
1750+
!if(HasOMod,
1751+
(ins Src0RC:$src0, Clamp0:$clamp, omod0:$omod),
1752+
!if (HasClamp,
1753+
(ins Src0RC:$src0, Clamp0:$clamp),
1754+
(ins Src0RC:$src0)))
17531755
/* endif */ ),
17541756
!if (!eq(NumSrcArgs, 2),
17551757
!if (HasModifiers,
@@ -2537,6 +2539,7 @@ class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
25372539
// Most DstVT are 16-bit, but not all
25382540
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
25392541
let DstRC64 = getVALUDstForVT<DstVT>.ret;
2542+
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
25402543
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
25412544
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
25422545
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ def : Pat <
11041104
// VOP1 Patterns
11051105
//===----------------------------------------------------------------------===//
11061106

1107-
multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
1107+
multiclass f16_to_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
11081108
// f16_to_fp patterns
11091109
def : GCNPat <
11101110
(f32 (any_f16_to_fp i32:$src0)),
@@ -1131,25 +1131,42 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
11311131
(cvt_f32_f16_inst_e64 SRCMODS.NEG, $src0)
11321132
>;
11331133

1134+
// fp_to_fp16 patterns
11341135
def : GCNPat <
1135-
(f64 (any_fpextend f16:$src)),
1136-
(V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
1136+
(i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1137+
(cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
11371138
>;
11381139

1139-
// fp_to_fp16 patterns
1140+
// This is only used on targets without half support
1141+
// TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
11401142
def : GCNPat <
1141-
(i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1143+
(i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
11421144
(cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
11431145
>;
1146+
}
1147+
1148+
let SubtargetPredicate = NotHasTrue16BitInsts in
1149+
defm : f16_to_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1150+
1151+
let SubtargetPredicate = UseFakeTrue16Insts in
1152+
defm : f16_to_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
1153+
1154+
multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64,
1155+
Instruction cvt_f32_f16_inst_e64,
1156+
RegOrImmOperand VSrc> {
1157+
def : GCNPat <
1158+
(f64 (any_fpextend f16:$src)),
1159+
(V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
1160+
>;
11441161

11451162
def : GCNPat <
11461163
(i32 (fp_to_sint f16:$src)),
1147-
(V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32:$src))
1164+
(V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc:$src))
11481165
>;
11491166

11501167
def : GCNPat <
11511168
(i32 (fp_to_uint f16:$src)),
1152-
(V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32:$src))
1169+
(V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc:$src))
11531170
>;
11541171

11551172
def : GCNPat <
@@ -1161,20 +1178,16 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
11611178
(f16 (uint_to_fp i32:$src)),
11621179
(cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_U32_e32 VSrc_b32:$src))
11631180
>;
1164-
1165-
// This is only used on targets without half support
1166-
// TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
1167-
def : GCNPat <
1168-
(i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1169-
(cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
1170-
>;
11711181
}
11721182

11731183
let SubtargetPredicate = NotHasTrue16BitInsts in
1174-
defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1184+
defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64, VSrc_b32>;
11751185

1176-
let SubtargetPredicate = HasTrue16BitInsts in
1177-
defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64>;
1186+
let SubtargetPredicate = UseRealTrue16Insts in
1187+
defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64, VSrcT_b16>;
1188+
1189+
let SubtargetPredicate = UseFakeTrue16Insts in
1190+
defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64, VSrc_b16>;
11781191

11791192
//===----------------------------------------------------------------------===//
11801193
// VOP2 Patterns
@@ -2784,13 +2797,24 @@ def : GCNPat <
27842797
SSrc_i1:$src))
27852798
>;
27862799

2787-
let SubtargetPredicate = HasTrue16BitInsts in
2800+
let SubtargetPredicate = UseRealTrue16Insts in
27882801
def : GCNPat <
27892802
(f16 (sint_to_fp i1:$src)),
2790-
(V_CVT_F16_F32_t16_e32 (
2791-
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2803+
(V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2804+
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
27922805
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2793-
SSrc_i1:$src))
2806+
SSrc_i1:$src),
2807+
/*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2808+
>;
2809+
2810+
let SubtargetPredicate = UseFakeTrue16Insts in
2811+
def : GCNPat <
2812+
(f16 (sint_to_fp i1:$src)),
2813+
(V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2814+
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2815+
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2816+
SSrc_i1:$src),
2817+
/*clamp*/ 0, /*omod*/ 0)
27942818
>;
27952819

27962820
let SubtargetPredicate = NotHasTrue16BitInsts in
@@ -2801,13 +2825,25 @@ def : GCNPat <
28012825
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
28022826
SSrc_i1:$src))
28032827
>;
2804-
let SubtargetPredicate = HasTrue16BitInsts in
2828+
2829+
let SubtargetPredicate = UseRealTrue16Insts in
28052830
def : GCNPat <
28062831
(f16 (uint_to_fp i1:$src)),
2807-
(V_CVT_F16_F32_t16_e32 (
2808-
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2832+
(V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2833+
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
28092834
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2810-
SSrc_i1:$src))
2835+
SSrc_i1:$src),
2836+
/*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2837+
>;
2838+
2839+
let SubtargetPredicate = UseFakeTrue16Insts in
2840+
def : GCNPat <
2841+
(f16 (uint_to_fp i1:$src)),
2842+
(V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2843+
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2844+
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2845+
SSrc_i1:$src),
2846+
/*clamp*/ 0, /*omod*/ 0)
28112847
>;
28122848

28132849
def : GCNPat <

0 commit comments

Comments
 (0)