Skip to content

Commit c663b25

Browse files
committed
[AArch64][GISel] Add FP16 fcmp lowering
This adds v4f16 and v8f16 lowering for fp16 vector compares. It splits the getActionDefinitionsBuilder of G_FCMP from G_ICMP, as they are quite different operations, and adds fp16 vector lowering. Differential Revision: https://reviews.llvm.org/D147947
1 parent bddd7a6 commit c663b25

File tree

6 files changed

+166
-9
lines changed

6 files changed

+166
-9
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
418418
})
419419
.clampScalar(0, MinFPScalar, s128);
420420

421-
getActionDefinitionsBuilder({G_ICMP, G_FCMP})
421+
getActionDefinitionsBuilder(G_ICMP)
422422
.legalFor({{s32, s32},
423423
{s32, s64},
424424
{s32, p0},
@@ -449,6 +449,43 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
449449
s64)
450450
.clampNumElements(0, v2s32, v4s32);
451451

452+
getActionDefinitionsBuilder(G_FCMP)
453+
// If we don't have full FP16 support, then scalarize the elements of
454+
// vectors containing fp16 types.
455+
.fewerElementsIf(
456+
[=](const LegalityQuery &Query) {
457+
const auto &Ty = Query.Types[0];
458+
return Ty.isVector() && Ty.getElementType() == s16 && !HasFP16;
459+
},
460+
[=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
461+
// If we don't have full FP16 support, then widen s16 to s32 if we
462+
// encounter it.
463+
.widenScalarIf(
464+
[=](const LegalityQuery &Query) {
465+
return Query.Types[0] == s16 && !HasFP16;
466+
},
467+
[=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
468+
.legalFor({{s16, s16},
469+
{s32, s32},
470+
{s32, s64},
471+
{v4s32, v4s32},
472+
{v2s32, v2s32},
473+
{v2s64, v2s64},
474+
{v4s16, v4s16},
475+
{v8s16, v8s16}})
476+
.widenScalarOrEltToNextPow2(1)
477+
.clampScalar(1, s32, s64)
478+
.clampScalar(0, s32, s32)
479+
.minScalarEltSameAsIf(
480+
[=](const LegalityQuery &Query) {
481+
const LLT &Ty = Query.Types[0];
482+
const LLT &SrcTy = Query.Types[1];
483+
return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
484+
Ty.getElementType() != SrcTy.getElementType();
485+
},
486+
0, 1)
487+
.clampNumElements(0, v2s32, v4s32);
488+
452489
// Extensions
453490
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
454491
unsigned DstSize = Query.Types[0].getSizeInBits();

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -961,9 +961,10 @@ static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
961961
const auto Pred =
962962
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
963963
Register LHS = MI.getOperand(2).getReg();
964-
// TODO: Handle v4s16 case.
965964
unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
966-
if (EltSize != 32 && EltSize != 64)
965+
if (EltSize == 16 && !ST.hasFullFP16())
966+
return false;
967+
if (EltSize != 16 && EltSize != 32 && EltSize != 64)
967968
return false;
968969
Register RHS = MI.getOperand(3).getReg();
969970
auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,99 @@ body: |
2323
$w0 = COPY %5(s32)
2424
2525
...
26+
---
27+
name: legalize_v8s16
28+
alignment: 4
29+
legalized: true
30+
body: |
31+
bb.0:
32+
liveins: $q0, $q1
33+
34+
; CHECK-LABEL: name: legalize_v8s16
35+
; CHECK: liveins: $q0, $q1
36+
; CHECK-NEXT: {{ $}}
37+
; CHECK-NEXT: %lhs:_(<8 x s16>) = COPY $q0
38+
; CHECK-NEXT: %rhs:_(<8 x s16>) = COPY $q1
39+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %lhs(<8 x s16>)
40+
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %rhs(<8 x s16>)
41+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
42+
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16)
43+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]]
44+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP]](s32)
45+
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
46+
; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16)
47+
; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT2]](s32), [[FPEXT3]]
48+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP1]](s32)
49+
; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
50+
; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16)
51+
; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT4]](s32), [[FPEXT5]]
52+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP2]](s32)
53+
; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
54+
; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16)
55+
; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT6]](s32), [[FPEXT7]]
56+
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP3]](s32)
57+
; CHECK-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
58+
; CHECK-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV12]](s16)
59+
; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT8]](s32), [[FPEXT9]]
60+
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP4]](s32)
61+
; CHECK-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
62+
; CHECK-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[UV13]](s16)
63+
; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT10]](s32), [[FPEXT11]]
64+
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP5]](s32)
65+
; CHECK-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
66+
; CHECK-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[UV14]](s16)
67+
; CHECK-NEXT: [[FCMP6:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT12]](s32), [[FPEXT13]]
68+
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP6]](s32)
69+
; CHECK-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
70+
; CHECK-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[UV15]](s16)
71+
; CHECK-NEXT: [[FCMP7:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT14]](s32), [[FPEXT15]]
72+
; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP7]](s32)
73+
; CHECK-NEXT: %fcmp:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16)
74+
; CHECK-NEXT: $q0 = COPY %fcmp(<8 x s16>)
75+
; CHECK-NEXT: RET_ReallyLR implicit $q0
76+
%lhs:_(<8 x s16>) = COPY $q0
77+
%rhs:_(<8 x s16>) = COPY $q1
78+
%fcmp:_(<8 x s16>) = G_FCMP floatpred(oeq), %lhs(<8 x s16>), %rhs
79+
$q0 = COPY %fcmp(<8 x s16>)
80+
RET_ReallyLR implicit $q0
81+
82+
...
83+
---
84+
name: legalize_v4s16
85+
alignment: 4
86+
legalized: true
87+
body: |
88+
bb.0:
89+
liveins: $d0, $d1
90+
91+
; CHECK-LABEL: name: legalize_v4s16
92+
; CHECK: liveins: $d0, $d1
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: %lhs:_(<4 x s16>) = COPY $d0
95+
; CHECK-NEXT: %rhs:_(<4 x s16>) = COPY $d1
96+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %lhs(<4 x s16>)
97+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %rhs(<4 x s16>)
98+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
99+
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
100+
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]]
101+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP]](s32)
102+
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
103+
; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
104+
; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT2]](s32), [[FPEXT3]]
105+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP1]](s32)
106+
; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
107+
; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
108+
; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT4]](s32), [[FPEXT5]]
109+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP2]](s32)
110+
; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
111+
; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
112+
; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT6]](s32), [[FPEXT7]]
113+
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP3]](s32)
114+
; CHECK-NEXT: %fcmp:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
115+
; CHECK-NEXT: $d0 = COPY %fcmp(<4 x s16>)
116+
; CHECK-NEXT: RET_ReallyLR implicit $d0
117+
%lhs:_(<4 x s16>) = COPY $d0
118+
%rhs:_(<4 x s16>) = COPY $d1
119+
%fcmp:_(<4 x s16>) = G_FCMP floatpred(oeq), %lhs(<4 x s16>), %rhs
120+
$d0 = COPY %fcmp(<4 x s16>)
121+
RET_ReallyLR implicit $d0

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,6 @@
324324
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
325325
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
326326
# DEBUG-NEXT: G_FCMP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
327-
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
328327
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
329328
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
330329
# DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices

llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
33
...
44
---
55
name: oeq
@@ -702,27 +702,50 @@ body: |
702702
703703
...
704704
---
705-
name: dont_lower_s16
705+
name: lower_v8s16
706706
alignment: 4
707707
legalized: true
708708
body: |
709709
bb.0:
710710
liveins: $q0, $q1
711711
712-
; CHECK-LABEL: name: dont_lower_s16
712+
; CHECK-LABEL: name: lower_v8s16
713713
; CHECK: liveins: $q0, $q1
714714
; CHECK-NEXT: {{ $}}
715715
; CHECK-NEXT: %lhs:_(<8 x s16>) = COPY $q0
716716
; CHECK-NEXT: %rhs:_(<8 x s16>) = COPY $q1
717-
; CHECK-NEXT: %fcmp:_(<8 x s16>) = G_FCMP floatpred(oeq), %lhs(<8 x s16>), %rhs
718-
; CHECK-NEXT: $q0 = COPY %fcmp(<8 x s16>)
717+
; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<8 x s16>) = G_FCMEQ %lhs, %rhs(<8 x s16>)
718+
; CHECK-NEXT: $q0 = COPY [[FCMEQ]](<8 x s16>)
719719
; CHECK-NEXT: RET_ReallyLR implicit $q0
720720
%lhs:_(<8 x s16>) = COPY $q0
721721
%rhs:_(<8 x s16>) = COPY $q1
722722
%fcmp:_(<8 x s16>) = G_FCMP floatpred(oeq), %lhs(<8 x s16>), %rhs
723723
$q0 = COPY %fcmp(<8 x s16>)
724724
RET_ReallyLR implicit $q0
725725
726+
...
727+
---
728+
name: lower_v4s16
729+
alignment: 4
730+
legalized: true
731+
body: |
732+
bb.0:
733+
liveins: $d0, $d1
734+
735+
; CHECK-LABEL: name: lower_v4s16
736+
; CHECK: liveins: $d0, $d1
737+
; CHECK-NEXT: {{ $}}
738+
; CHECK-NEXT: %lhs:_(<4 x s16>) = COPY $d0
739+
; CHECK-NEXT: %rhs:_(<4 x s16>) = COPY $d1
740+
; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<4 x s16>) = G_FCMEQ %lhs, %rhs(<4 x s16>)
741+
; CHECK-NEXT: $d0 = COPY [[FCMEQ]](<4 x s16>)
742+
; CHECK-NEXT: RET_ReallyLR implicit $d0
743+
%lhs:_(<4 x s16>) = COPY $d0
744+
%rhs:_(<4 x s16>) = COPY $d1
745+
%fcmp:_(<4 x s16>) = G_FCMP floatpred(oeq), %lhs(<4 x s16>), %rhs
746+
$d0 = COPY %fcmp(<4 x s16>)
747+
RET_ReallyLR implicit $d0
748+
726749
...
727750
---
728751
name: is_not_nan

llvm/test/CodeGen/AArch64/vacg.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s
3+
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel | FileCheck %s
34

45

56
define <4 x i32> @gt_v4f32(<4 x float> %a, <4 x float> %b) {

0 commit comments

Comments
 (0)