Skip to content

Commit 42c7bc0

Browse files
authored
[AArch64][ARM] Make neon fp16 generic intrinsics always available. (#87467)
By generic intrinsics this mean things like dup, ext, zip and bsl that can always be executed with integer s16 operations and do not require fullfp16. This makes them always available, and brings them inline with GCC. https://godbolt.org/z/azs8eMv54 The relevant test cases have been moved into their own files, to allow them to be tested with armv8-a and armv8.2-a+fp16.
1 parent 8a5a1b7 commit 42c7bc0

File tree

6 files changed

+1100
-676
lines changed

6 files changed

+1100
-676
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,24 +1758,21 @@ let TargetGuard = "fullfp16" in {
17581758
// Mul lane
17591759
def VMUL_LANEH : IOpInst<"vmul_lane", "..qI", "hQh", OP_MUL_LN>;
17601760
def VMUL_NH : IOpInst<"vmul_n", "..1", "hQh", OP_MUL_N>;
1761+
}
17611762

1762-
// Data processing intrinsics - section 5
1763-
1764-
// Logical operations
1765-
let isHiddenLInst = 1 in
1766-
def VBSLH : SInst<"vbsl", ".U..", "hQh">;
1767-
1768-
// Transposition operations
1769-
def VZIPH : WInst<"vzip", "2..", "hQh">;
1770-
def VUZPH : WInst<"vuzp", "2..", "hQh">;
1771-
def VTRNH : WInst<"vtrn", "2..", "hQh">;
1772-
1773-
// Vector Extract
1774-
def VEXTH : WInst<"vext", "...I", "hQh">;
1763+
// Data processing intrinsics - section 5. Do not require fullfp16.
17751764

1776-
// Reverse vector elements
1777-
def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
1778-
}
1765+
// Logical operations
1766+
let isHiddenLInst = 1 in
1767+
def VBSLH : SInst<"vbsl", ".U..", "hQh">;
1768+
// Transposition operations
1769+
def VZIPH : WInst<"vzip", "2..", "hQh">;
1770+
def VUZPH : WInst<"vuzp", "2..", "hQh">;
1771+
def VTRNH : WInst<"vtrn", "2..", "hQh">;
1772+
// Vector Extract
1773+
def VEXTH : WInst<"vext", "...I", "hQh">;
1774+
// Reverse vector elements
1775+
def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
17791776

17801777
// ARMv8.2-A FP16 vector intrinsics for A64 only.
17811778
let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in {
@@ -1857,7 +1854,9 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in {
18571854
def VMINVH : SInst<"vminv", "1.", "hQh">;
18581855
def FMAXNMVH : SInst<"vmaxnmv", "1.", "hQh">;
18591856
def FMINNMVH : SInst<"vminnmv", "1.", "hQh">;
1857+
}
18601858

1859+
let ArchGuard = "defined(__aarch64__)" in {
18611860
// Permutation
18621861
def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>;
18631862
def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>;

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7281,8 +7281,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
72817281
{ NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
72827282
{ NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
72837283
{ NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7284-
{ NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, },
7285-
{ NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, },
72867284
{ NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
72877285
{ NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
72887286
{ NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
@@ -7301,8 +7299,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
73017299
{ NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
73027300
{ NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
73037301
{ NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7304-
{ NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, },
7305-
{ NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, },
73067302
{ NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
73077303
{ NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
73087304
{ NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
@@ -7405,12 +7401,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
74057401
{ NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
74067402
{ NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
74077403
{ NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7408-
{ NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, },
7409-
{ NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, },
7410-
{ NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, },
7411-
{ NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, },
7412-
{ NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, },
7413-
{ NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, },
74147404
// The mangling rules cause us to have one ID for each type for vldap1(q)_lane
74157405
// and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
74167406
// arbitrary one to be handled as tha canonical variation.

0 commit comments

Comments
 (0)