Skip to content

[AMDGPU][True16][Codegen] true16 codegen for FPtoI1 #125120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2680,15 +2680,20 @@ class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, S
(i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE))
>;

let OtherPredicates = [NotHasTrue16BitInsts] in {
let True16Predicate = NotHasTrue16BitInsts in {
def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
} // end OtherPredicates = [NotHasTrue16BitInsts]
} // end True16Predicate = NotHasTrue16BitInsts

let True16Predicate = UseRealTrue16Insts in {
def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
} // end True16Predicate = UseRealTrue16BitInsts

let OtherPredicates = [HasTrue16BitInsts] in {
let True16Predicate = UseFakeTrue16Insts in {
def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
} // end OtherPredicates = [HasTrue16BitInsts]
} // end True16Predicate = UseFakeTrue16BitInsts

def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,374 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -mattr=-real-true16 -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX11 %s

---
name: fcmp_false_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_false_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(false), [[TRUNC]](s16), [[TRUNC1]]
; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(false), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_oeq_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_oeq_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_EQ_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_EQ_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(oeq), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_ogt_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_ogt_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_GT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GT_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(ogt), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_oge_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_oge_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_GE_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GE_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_GE_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(oge), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_olt_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_olt_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_LT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(olt), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_ole_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_ole_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_LE_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LE_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(ole), %2, %3
S_ENDPGM 0, implicit %4
...
---
name: fcmp_one_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_one_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_LG_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(one), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_ord_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_ord_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_LG_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LG_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LG_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(one), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_uno_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_uno_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_U_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_U_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_U_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(uno), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_ueq_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_ueq_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_NLG_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLG_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(ueq), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_ugt_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_ugt_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_NLE_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLE_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(ugt), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_uge_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_uge_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_NLT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NLT_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(uge), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_ult_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_ult_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_NGE_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGE_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(ult), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_ule_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_ule_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_NGT_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NGT_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(ule), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_une_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_une_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_CMP_NEQ_F16_fake16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_NEQ_F16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(une), %2, %3
S_ENDPGM 0, implicit %4
...

---
name: fcmp_true_s16_vv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1
; GFX11-LABEL: name: fcmp_true_s16_vv
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; GFX11-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(true), [[TRUNC]](s16), [[TRUNC1]]
; GFX11-NEXT: S_ENDPGM 0, implicit [[FCMP]](s1)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
%3:vgpr(s16) = G_TRUNC %1
%4:vcc(s1) = G_FCMP floatpred(true), %2, %3
S_ENDPGM 0, implicit %4
...

Loading