Skip to content

Commit 33d401f

Browse files
authored
[AMDGPU][True16][CodeGen] true16 codegen for icmp and is_fpclass (#124757)
True16 codegen pattern for icmp patterns and is_fpclass
1 parent 44c0719 commit 33d401f

File tree

8 files changed

+1784
-827
lines changed

8 files changed

+1784
-827
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,9 +1207,8 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
12071207
unsigned FakeS16Opc, unsigned S32Opc,
12081208
unsigned S64Opc) {
12091209
if (Size == 16)
1210-
// FIXME-TRUE16 use TrueS16Opc when realtrue16 is supported for CMP code
12111210
return ST.hasTrue16BitInsts()
1212-
? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
1211+
? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
12131212
: S16Opc;
12141213
if (Size == 32)
12151214
return S32Opc;

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,6 +1035,20 @@ multiclass VOPCClassPat64<string inst_name> {
10351035
>;
10361036
}
10371037

1038+
multiclass VOPCClassPat64_t16<string inst_name> {
1039+
defvar inst = !cast<VOP_Pseudo>(inst_name#"_t16_e64");
1040+
defvar P = inst.Pfl;
1041+
def : GCNPat <
1042+
(i1:$sdst
1043+
(AMDGPUfp_class
1044+
(P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
1045+
i32:$src1)),
1046+
(inst i32:$src0_modifiers, VSrcT_f16:$src0,
1047+
0 /* src1_modifiers */, (f16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
1048+
0) /* op_sel */
1049+
>;
1050+
}
1051+
10381052
multiclass VOPCClassPat64_fake16<string inst_name> {
10391053
defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
10401054
defvar P = inst.Pfl;
@@ -1158,6 +1172,7 @@ multiclass VOPC_CLASS_F16 <string opName> {
11581172
}
11591173
let True16Predicate = UseRealTrue16Insts in {
11601174
defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
1175+
defm : VOPCClassPat64_t16<NAME>;
11611176
}
11621177
let True16Predicate = UseFakeTrue16Insts in {
11631178
defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
@@ -1207,27 +1222,30 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
12071222

12081223
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
12091224
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
1210-
multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
1225+
multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt, dag dstInst = (inst $src0, $src1)> {
12111226
let WaveSizePredicate = isWave64 in
12121227
def : GCNPat <
12131228
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1214-
(i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
1229+
(i64 (COPY_TO_REGCLASS dstInst, SReg_64))
12151230
>;
12161231

12171232
let WaveSizePredicate = isWave32 in {
12181233
def : GCNPat <
12191234
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1220-
(i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
1235+
(i32 (COPY_TO_REGCLASS dstInst, SReg_32))
12211236
>;
12221237

12231238
// Support codegen of i64 setcc in wave32 mode.
12241239
def : GCNPat <
12251240
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
1226-
(i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1), sub0, (S_MOV_B32 (i32 0)), sub1))
1241+
(i64 (REG_SEQUENCE SReg_64, dstInst, sub0, (S_MOV_B32 (i32 0)), sub1))
12271242
>;
12281243
}
12291244
}
12301245

1246+
multiclass ICMP_Pattern_t16<PatFrags cond, Instruction inst, ValueType vt>
1247+
: ICMP_Pattern<cond, inst, vt, (inst 0, $src0, 0, $src1)>;
1248+
12311249
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
12321250
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
12331251
defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
@@ -1250,6 +1268,19 @@ defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
12501268
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
12511269
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
12521270

1271+
let True16Predicate = UseRealTrue16Insts in {
1272+
defm : ICMP_Pattern_t16 <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
1273+
defm : ICMP_Pattern_t16 <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
1274+
defm : ICMP_Pattern_t16 <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
1275+
defm : ICMP_Pattern_t16 <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
1276+
defm : ICMP_Pattern_t16 <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
1277+
defm : ICMP_Pattern_t16 <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
1278+
defm : ICMP_Pattern_t16 <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
1279+
defm : ICMP_Pattern_t16 <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
1280+
defm : ICMP_Pattern_t16 <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
1281+
defm : ICMP_Pattern_t16 <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
1282+
} // End True16Predicate = UseRealTrue16Insts
1283+
12531284
let True16Predicate = UseFakeTrue16Insts in {
12541285
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
12551286
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
@@ -1335,6 +1366,24 @@ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
13351366
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
13361367
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
13371368

1369+
let True16Predicate = UseRealTrue16Insts in {
1370+
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
1371+
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
1372+
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
1373+
defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
1374+
defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
1375+
defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
1376+
defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
1377+
defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
1378+
1379+
defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
1380+
defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
1381+
defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
1382+
defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
1383+
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
1384+
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
1385+
} // End True16Predicate = UseRealTrue16Insts
1386+
13381387
let True16Predicate = UseFakeTrue16Insts in {
13391388
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
13401389
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,6 +2019,10 @@ def : ClassPat<V_CMP_CLASS_F16_e64, f16> {
20192019
let True16Predicate = NotHasTrue16BitInsts;
20202020
}
20212021

2022+
def : ClassPat_t16<V_CMP_CLASS_F16_t16_e64, f16> {
2023+
let True16Predicate = UseRealTrue16Insts;
2024+
}
2025+
20222026
def : ClassPat_t16<V_CMP_CLASS_F16_fake16_e64, f16> {
20232027
let True16Predicate = UseFakeTrue16Insts;
20242028
}

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@ body: |
1717
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1818
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1919
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
20-
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
2120
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
22-
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
23-
; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
24-
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
21+
; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
22+
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
2523
;
2624
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
2725
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
@@ -55,11 +53,9 @@ body: |
5553
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5654
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
5755
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
58-
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
5956
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
60-
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
61-
; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
62-
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
57+
; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
58+
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
6359
;
6460
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
6561
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@ body: |
1717
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1818
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1919
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
20-
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
2120
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
22-
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
23-
; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
24-
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
21+
; GFX11-TRUE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
22+
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
2523
;
2624
; GFX11-FAKE16-LABEL: name: fcmp_false_f16
2725
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
@@ -55,11 +53,9 @@ body: |
5553
; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5654
; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
5755
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
58-
; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_]]
5956
; GFX11-TRUE16-NEXT: [[V_CVT_F16_F32_t16_e64_1:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
60-
; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[V_CVT_F16_F32_t16_e64_1]]
61-
; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
62-
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
57+
; GFX11-TRUE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_t16_e64_]], 0, [[V_CVT_F16_F32_t16_e64_1]], 0, 0, implicit $mode, implicit $exec
58+
; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
6359
;
6460
; GFX11-FAKE16-LABEL: name: fcmp_true_f16
6561
; GFX11-FAKE16: liveins: $vgpr0, $vgpr1

0 commit comments

Comments
 (0)