@@ -1035,6 +1035,20 @@ multiclass VOPCClassPat64<string inst_name> {
1035
1035
>;
1036
1036
}
1037
1037
1038
+ multiclass VOPCClassPat64_t16<string inst_name> {
1039
+ defvar inst = !cast<VOP_Pseudo>(inst_name#"_t16_e64");
1040
+ defvar P = inst.Pfl;
1041
+ def : GCNPat <
1042
+ (i1:$sdst
1043
+ (AMDGPUfp_class
1044
+ (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
1045
+ i32:$src1)),
1046
+ (inst i32:$src0_modifiers, VSrcT_f16:$src0,
1047
+ 0 /* src1_modifiers */, (f16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
1048
+ 0) /* op_sel */
1049
+ >;
1050
+ }
1051
+
1038
1052
multiclass VOPCClassPat64_fake16<string inst_name> {
1039
1053
defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
1040
1054
defvar P = inst.Pfl;
@@ -1158,6 +1172,7 @@ multiclass VOPC_CLASS_F16 <string opName> {
1158
1172
}
1159
1173
let True16Predicate = UseRealTrue16Insts in {
1160
1174
defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
1175
+ defm : VOPCClassPat64_t16<NAME>;
1161
1176
}
1162
1177
let True16Predicate = UseFakeTrue16Insts in {
1163
1178
defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
@@ -1207,27 +1222,30 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
1207
1222
1208
1223
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
1209
1224
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
1210
- multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
1225
+ multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1) > {
1211
1226
let WaveSizePredicate = isWave64 in
1212
1227
def : GCNPat <
1213
1228
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1214
- (i64 (COPY_TO_REGCLASS (inst $src0, $src1) , SReg_64))
1229
+ (i64 (COPY_TO_REGCLASS dstInst , SReg_64))
1215
1230
>;
1216
1231
1217
1232
let WaveSizePredicate = isWave32 in {
1218
1233
def : GCNPat <
1219
1234
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1220
- (i32 (COPY_TO_REGCLASS (inst $src0, $src1) , SReg_32))
1235
+ (i32 (COPY_TO_REGCLASS dstInst , SReg_32))
1221
1236
>;
1222
1237
1223
1238
// Support codegen of i64 setcc in wave32 mode.
1224
1239
def : GCNPat <
1225
1240
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1226
- (i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1) , sub0, (S_MOV_B32 (i32 0)), sub1))
1241
+ (i64 (REG_SEQUENCE SReg_64, dstInst , sub0, (S_MOV_B32 (i32 0)), sub1))
1227
1242
>;
1228
1243
}
1229
1244
}
1230
1245
1246
+ multiclass ICMP_Pattern_t16<PatFrags cond, Instruction inst, ValueType vt>
1247
+ : ICMP_Pattern<cond, inst, vt, (inst 0, $src0, 0, $src1)>;
1248
+
1231
1249
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
1232
1250
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
1233
1251
defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
@@ -1250,6 +1268,19 @@ defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
1250
1268
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
1251
1269
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
1252
1270
1271
+ let True16Predicate = UseRealTrue16Insts in {
1272
+ defm : ICMP_Pattern_t16 <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
1273
+ defm : ICMP_Pattern_t16 <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
1274
+ defm : ICMP_Pattern_t16 <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
1275
+ defm : ICMP_Pattern_t16 <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
1276
+ defm : ICMP_Pattern_t16 <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
1277
+ defm : ICMP_Pattern_t16 <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
1278
+ defm : ICMP_Pattern_t16 <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
1279
+ defm : ICMP_Pattern_t16 <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
1280
+ defm : ICMP_Pattern_t16 <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
1281
+ defm : ICMP_Pattern_t16 <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
1282
+ } // End True16Predicate = UseRealTrue16Insts
1283
+
1253
1284
let True16Predicate = UseFakeTrue16Insts in {
1254
1285
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
1255
1286
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
@@ -1335,6 +1366,24 @@ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
1335
1366
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
1336
1367
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
1337
1368
1369
+ let True16Predicate = UseRealTrue16Insts in {
1370
+ defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
1371
+ defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
1372
+ defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
1373
+ defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
1374
+ defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
1375
+ defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
1376
+ defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
1377
+ defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
1378
+
1379
+ defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
1380
+ defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
1381
+ defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
1382
+ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
1383
+ defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
1384
+ defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
1385
+ } // End True16Predicate = UseRealTrue16Insts
1386
+
1338
1387
let True16Predicate = UseFakeTrue16Insts in {
1339
1388
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
1340
1389
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
0 commit comments