@@ -1104,7 +1104,7 @@ def : Pat <
1104
1104
// VOP1 Patterns
1105
1105
//===----------------------------------------------------------------------===//
1106
1106
1107
- multiclass f16_fp_Pats <Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
1107
+ multiclass f16_to_fp_Pats <Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
1108
1108
// f16_to_fp patterns
1109
1109
def : GCNPat <
1110
1110
(f32 (any_f16_to_fp i32:$src0)),
@@ -1131,25 +1131,42 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
1131
1131
(cvt_f32_f16_inst_e64 SRCMODS.NEG, $src0)
1132
1132
>;
1133
1133
1134
+ // fp_to_fp16 patterns
1134
1135
def : GCNPat <
1135
- (f64 (any_fpextend f16:$src )),
1136
- (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src) )
1136
+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)) )),
1137
+ (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0 )
1137
1138
>;
1138
1139
1139
- // fp_to_fp16 patterns
1140
+ // This is only used on targets without half support
1141
+ // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
1140
1142
def : GCNPat <
1141
- (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1143
+ (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1142
1144
(cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
1143
1145
>;
1146
+ }
1147
+
1148
+ let SubtargetPredicate = NotHasTrue16BitInsts in
1149
+ defm : f16_to_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1150
+
1151
+ let SubtargetPredicate = UseFakeTrue16Insts in
1152
+ defm : f16_to_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64>;
1153
+
1154
+ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64,
1155
+ Instruction cvt_f32_f16_inst_e64,
1156
+ RegOrImmOperand VSrc> {
1157
+ def : GCNPat <
1158
+ (f64 (any_fpextend f16:$src)),
1159
+ (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src))
1160
+ >;
1144
1161
1145
1162
def : GCNPat <
1146
1163
(i32 (fp_to_sint f16:$src)),
1147
- (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32 :$src))
1164
+ (V_CVT_I32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc :$src))
1148
1165
>;
1149
1166
1150
1167
def : GCNPat <
1151
1168
(i32 (fp_to_uint f16:$src)),
1152
- (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc_b32 :$src))
1169
+ (V_CVT_U32_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, VSrc :$src))
1153
1170
>;
1154
1171
1155
1172
def : GCNPat <
@@ -1161,20 +1178,16 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16
1161
1178
(f16 (uint_to_fp i32:$src)),
1162
1179
(cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_U32_e32 VSrc_b32:$src))
1163
1180
>;
1164
-
1165
- // This is only used on targets without half support
1166
- // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering
1167
- def : GCNPat <
1168
- (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1169
- (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0)
1170
- >;
1171
1181
}
1172
1182
1173
1183
let SubtargetPredicate = NotHasTrue16BitInsts in
1174
- defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64>;
1184
+ defm : f16_fp_Pats<V_CVT_F16_F32_e64, V_CVT_F32_F16_e64, VSrc_b32 >;
1175
1185
1176
- let SubtargetPredicate = HasTrue16BitInsts in
1177
- defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64>;
1186
+ let SubtargetPredicate = UseRealTrue16Insts in
1187
+ defm : f16_fp_Pats<V_CVT_F16_F32_t16_e64, V_CVT_F32_F16_t16_e64, VSrcT_b16>;
1188
+
1189
+ let SubtargetPredicate = UseFakeTrue16Insts in
1190
+ defm : f16_fp_Pats<V_CVT_F16_F32_fake16_e64, V_CVT_F32_F16_fake16_e64, VSrc_b16>;
1178
1191
1179
1192
//===----------------------------------------------------------------------===//
1180
1193
// VOP2 Patterns
@@ -2784,13 +2797,24 @@ def : GCNPat <
2784
2797
SSrc_i1:$src))
2785
2798
>;
2786
2799
2787
- let SubtargetPredicate = HasTrue16BitInsts in
2800
+ let SubtargetPredicate = UseRealTrue16Insts in
2788
2801
def : GCNPat <
2789
2802
(f16 (sint_to_fp i1:$src)),
2790
- (V_CVT_F16_F32_t16_e32 (
2791
- V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2803
+ (V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2804
+ ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2792
2805
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2793
- SSrc_i1:$src))
2806
+ SSrc_i1:$src),
2807
+ /*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2808
+ >;
2809
+
2810
+ let SubtargetPredicate = UseFakeTrue16Insts in
2811
+ def : GCNPat <
2812
+ (f16 (sint_to_fp i1:$src)),
2813
+ (V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2814
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2815
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
2816
+ SSrc_i1:$src),
2817
+ /*clamp*/ 0, /*omod*/ 0)
2794
2818
>;
2795
2819
2796
2820
let SubtargetPredicate = NotHasTrue16BitInsts in
@@ -2801,13 +2825,25 @@ def : GCNPat <
2801
2825
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2802
2826
SSrc_i1:$src))
2803
2827
>;
2804
- let SubtargetPredicate = HasTrue16BitInsts in
2828
+
2829
+ let SubtargetPredicate = UseRealTrue16Insts in
2805
2830
def : GCNPat <
2806
2831
(f16 (uint_to_fp i1:$src)),
2807
- (V_CVT_F16_F32_t16_e32 (
2808
- V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2832
+ (V_CVT_F16_F32_t16_e64 /*src0_modifiers*/ 0,
2833
+ ( V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2809
2834
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2810
- SSrc_i1:$src))
2835
+ SSrc_i1:$src),
2836
+ /*clamp*/ 0, /*omod*/ 0, /*op_sel*/ 0)
2837
+ >;
2838
+
2839
+ let SubtargetPredicate = UseFakeTrue16Insts in
2840
+ def : GCNPat <
2841
+ (f16 (uint_to_fp i1:$src)),
2842
+ (V_CVT_F16_F32_fake16_e64 /*src0_modifiers*/ 0,
2843
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
2844
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
2845
+ SSrc_i1:$src),
2846
+ /*clamp*/ 0, /*omod*/ 0)
2811
2847
>;
2812
2848
2813
2849
def : GCNPat <
0 commit comments