Skip to content

Commit b879f99

Browse files
committed
[AArch64][ARM] Alter most of arm_neon.h to be target-based, not preprocessor based.
Similar to D131064, this alters most of the intrinsics in arm_neon.h to be target based, not preprocessor based. The intrinsics that are changed are the ones with obvious target features (fp16, fp16fml, cryptos, i8mm and bf16). The ones that are not yet altered are the ones without target features like rdma (8.1) and complex (8.3). Those will be switched in a followup patch that allows targeting architecture versions. The existing ArchGuard in arm_neon.td is split into ArchGuard that still adds ifdef defines (for example for intrinsics that require __aarch64__), and TargetGuards for intrinsics dependant on target features. From there the TargetGuards are used in two ways: - For intrinsics emitted as functions, __attribute__((target(TargetGuard))) is added to the definition of the function. Along with the existing always_inline intrinsic, this will give a compile time error if the function is used in a context where the target feature is not available. - For intrinsics emitted as macros, the __builtins are emitted into arm_neon.inc using TARGET_BUILTIN as opposed to BUILTIN, which includes the target feature and gives an error if the builtin is found in a function without the required features, similar to arm_sve.h. The second method requires that the intrinsics be separable from the existing _v intrinsics used in other types. For example __builtin_neon_splat_lane_bf16 is used as opposed to __builtin_neon_splat_lane_v. There are some adjustments to the CGBuiltin to account for intrinsics that can be treated similarly, except for their target features. Differential Revision: https://reviews.llvm.org/D132034
1 parent 4547227 commit b879f99

File tree

12 files changed

+785
-428
lines changed

12 files changed

+785
-428
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ namespace clang {
2727
enum {
2828
LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
2929
#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
30+
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
3031
#include "clang/Basic/BuiltinsNEON.def"
3132
FirstTSBuiltin
3233
};

clang/include/clang/Basic/arm_neon.td

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
289289
"UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
290290
let isLaneQ = 1;
291291
}
292-
let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)" in {
292+
let TargetGuard = "bf16" in {
293293
def SPLAT_BF : WInst<"splat_lane", ".(!q)I", "bQb">;
294294
def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb"> {
295295
let isLaneQ = 1;
@@ -1120,14 +1120,14 @@ def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">;
11201120

11211121
////////////////////////////////////////////////////////////////////////////////
11221122
// Crypto
1123-
let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_AES)" in {
1123+
let ArchGuard = "__ARM_ARCH >= 8", TargetGuard = "aes" in {
11241124
def AESE : SInst<"vaese", "...", "QUc">;
11251125
def AESD : SInst<"vaesd", "...", "QUc">;
11261126
def AESMC : SInst<"vaesmc", "..", "QUc">;
11271127
def AESIMC : SInst<"vaesimc", "..", "QUc">;
11281128
}
11291129

1130-
let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA2)" in {
1130+
let ArchGuard = "__ARM_ARCH >= 8", TargetGuard = "sha2" in {
11311131
def SHA1H : SInst<"vsha1h", "11", "Ui">;
11321132
def SHA1SU1 : SInst<"vsha1su1", "...", "QUi">;
11331133
def SHA256SU0 : SInst<"vsha256su0", "...", "QUi">;
@@ -1141,7 +1141,7 @@ def SHA256H2 : SInst<"vsha256h2", "....", "QUi">;
11411141
def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">;
11421142
}
11431143

1144-
let ArchGuard = "defined(__ARM_FEATURE_SHA3) && defined(__aarch64__)" in {
1144+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "sha3" in {
11451145
def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">;
11461146
def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
11471147
def RAX1 : SInst<"vrax1", "...", "QUl">;
@@ -1151,15 +1151,14 @@ def XAR : SInst<"vxar", "...I", "QUl">;
11511151
}
11521152
}
11531153

1154-
let ArchGuard = "defined(__ARM_FEATURE_SHA512) && defined(__aarch64__)" in {
1155-
1154+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "sha3" in {
11561155
def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">;
11571156
def SHA512su1 : SInst<"vsha512su1", "....", "QUl">;
11581157
def SHA512H : SInst<"vsha512h", "....", "QUl">;
11591158
def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
11601159
}
11611160

1162-
let ArchGuard = "defined(__ARM_FEATURE_SM3) && defined(__aarch64__)" in {
1161+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "sm4" in {
11631162
def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
11641163
def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
11651164
def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
@@ -1169,7 +1168,7 @@ def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">;
11691168
def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">;
11701169
}
11711170

1172-
let ArchGuard = "defined(__ARM_FEATURE_SM4) && defined(__aarch64__)" in {
1171+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "sm4" in {
11731172
def SM4E : SInst<"vsm4e", "...", "QUi">;
11741173
def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">;
11751174
}
@@ -1648,7 +1647,7 @@ def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcS
16481647
} // ArchGuard = "defined(__aarch64__)"
16491648

16501649
// ARMv8.2-A FP16 vector intrinsics for A32/A64.
1651-
let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
1650+
let TargetGuard = "fullfp16" in {
16521651

16531652
// ARMv8.2-A FP16 one-operand vector intrinsics.
16541653

@@ -1673,7 +1672,7 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
16731672
def VCVTP_U16 : SInst<"vcvtp_u16", "U.", "hQh">;
16741673

16751674
// Vector rounding
1676-
let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
1675+
let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)", TargetGuard = "fullfp16" in {
16771676
def FRINTZH : SInst<"vrnd", "..", "hQh">;
16781677
def FRINTNH : SInst<"vrndn", "..", "hQh">;
16791678
def FRINTAH : SInst<"vrnda", "..", "hQh">;
@@ -1722,7 +1721,7 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
17221721
// Max/Min
17231722
def VMAXH : SInst<"vmax", "...", "hQh">;
17241723
def VMINH : SInst<"vmin", "...", "hQh">;
1725-
let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
1724+
let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN)", TargetGuard = "fullfp16" in {
17261725
def FMAXNMH : SInst<"vmaxnm", "...", "hQh">;
17271726
def FMINNMH : SInst<"vminnm", "...", "hQh">;
17281727
}
@@ -1772,7 +1771,7 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
17721771
}
17731772

17741773
// ARMv8.2-A FP16 vector intrinsics for A64 only.
1775-
let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in {
1774+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in {
17761775

17771776
// Vector rounding
17781777
def FRINTIH : SInst<"vrndi", "..", "hQh">;
@@ -1867,19 +1866,19 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarc
18671866
}
18681867

18691868
// v8.2-A dot product instructions.
1870-
let ArchGuard = "defined(__ARM_FEATURE_DOTPROD)" in {
1869+
let TargetGuard = "dotprod" in {
18711870
def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">;
18721871
def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<<q)I", "iUiQiQUi", OP_DOT_LN>;
18731872
}
1874-
let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in {
1873+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "dotprod" in {
18751874
// Variants indexing into a 128-bit vector are A64 only.
18761875
def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ> {
18771876
let isLaneQ = 1;
18781877
}
18791878
}
18801879

18811880
// v8.2-A FP16 fused multiply-add long instructions.
1882-
let ArchGuard = "defined(__ARM_FEATURE_FP16_FML) && defined(__aarch64__)" in {
1881+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp16fml" in {
18831882
def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">;
18841883
def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">;
18851884
def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">;
@@ -1904,7 +1903,7 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_FML) && defined(__aarch64__)" in {
19041903
}
19051904
}
19061905

1907-
let ArchGuard = "defined(__ARM_FEATURE_MATMUL_INT8)" in {
1906+
let TargetGuard = "i8mm" in {
19081907
def VMMLA : SInst<"vmmla", "..(<<)(<<)", "QUiQi">;
19091908
def VUSMMLA : SInst<"vusmmla", "..(<<U)(<<)", "Qi">;
19101909

@@ -1921,7 +1920,7 @@ let ArchGuard = "defined(__ARM_FEATURE_MATMUL_INT8)" in {
19211920
}
19221921
}
19231922

1924-
let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)" in {
1923+
let TargetGuard = "bf16" in {
19251924
def VDOT_BF : SInst<"vbfdot", "..BB", "fQf">;
19261925
def VDOT_LANE_BF : SOpInst<"vbfdot_lane", "..B(Bq)I", "fQf", OP_BFDOT_LN>;
19271926
def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ> {
@@ -1965,7 +1964,7 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
19651964
}
19661965

19671966
// v8.3-A Vector complex addition intrinsics
1968-
let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
1967+
let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)", TargetGuard = "fullfp16" in {
19691968
def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">;
19701969
def VCADD_ROT270_FP16 : SInst<"vcadd_rot270", "...", "h">;
19711970
def VCADDQ_ROT90_FP16 : SInst<"vcaddq_rot90", "QQQ", "h">;
@@ -1989,7 +1988,7 @@ let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in {
19891988
}
19901989

19911990
// V8.2-A BFloat intrinsics
1992-
let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)" in {
1991+
let TargetGuard = "bf16" in {
19931992
def VCREATE_BF : NoTestOpInst<"vcreate", ".(IU>)", "b", OP_CAST> {
19941993
let BigEndianSafe = 1;
19951994
}
@@ -2053,14 +2052,14 @@ let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)" in {
20532052
def SCALAR_CVT_F32_BF16 : SOpInst<"vcvtah_f32", "(1F>)(1!)", "b", OP_CVT_F32_BF16>;
20542053
}
20552054

2056-
let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) && !defined(__aarch64__)" in {
2055+
let ArchGuard = "!defined(__aarch64__)", TargetGuard = "bf16" in {
20572056
def VCVT_BF16_F32_A32_INTERNAL : WInst<"__a32_vcvt_bf16", "BQ", "f">;
20582057
def VCVT_BF16_F32_A32 : SOpInst<"vcvt_bf16", "BQ", "f", OP_VCVT_BF16_F32_A32>;
20592058
def VCVT_LOW_BF16_F32_A32 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A32>;
20602059
def VCVT_HIGH_BF16_F32_A32 : SOpInst<"vcvt_high_bf16", "BBQ", "Qf", OP_VCVT_BF16_F32_HI_A32>;
20612060
}
20622061

2063-
let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in {
2062+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in {
20642063
def VCVT_LOW_BF16_F32_A64_INTERNAL : WInst<"__a64_vcvtq_low_bf16", "BQ", "Hf">;
20652064
def VCVT_LOW_BF16_F32_A64 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A64>;
20662065
def VCVT_HIGH_BF16_F32_A64 : SInst<"vcvt_high_bf16", "BBQ", "Qf">;
@@ -2072,14 +2071,14 @@ let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) && defined(__aarc
20722071
def COPYQ_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..I.I", "Qb", OP_COPY_LN>;
20732072
}
20742073

2075-
let ArchGuard = "defined(__ARM_FEATURE_BF16) && !defined(__aarch64__)" in {
2074+
let ArchGuard = "!defined(__aarch64__)", TargetGuard = "bf16" in {
20762075
let BigEndianSafe = 1 in {
20772076
defm VREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
20782077
"csilUcUsUiUlhfPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQPcQPsQPl", "bQb">;
20792078
}
20802079
}
20812080

2082-
let ArchGuard = "defined(__ARM_FEATURE_BF16) && defined(__aarch64__)" in {
2081+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in {
20832082
let BigEndianSafe = 1 in {
20842083
defm VVREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
20852084
"csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", "bQb">;

clang/include/clang/Basic/arm_neon_incl.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ class Inst <string n, string p, string t, Operation o> {
265265
string Prototype = p;
266266
string Types = t;
267267
string ArchGuard = "";
268+
string TargetGuard = "";
268269

269270
Operation Operation = o;
270271
bit BigEndianSafe = 0;

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ using namespace clang::targets;
2525
const Builtin::Info AArch64TargetInfo::BuiltinInfo[] = {
2626
#define BUILTIN(ID, TYPE, ATTRS) \
2727
{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
28+
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
29+
{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
2830
#include "clang/Basic/BuiltinsNEON.def"
2931

3032
#define BUILTIN(ID, TYPE, ATTRS) \

clang/lib/Basic/Targets/ARM.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,8 @@ bool ARMTargetInfo::hasFeature(StringRef Feature) const {
630630
}
631631

632632
bool ARMTargetInfo::hasBFloat16Type() const {
633-
return HasBFloat16 && !SoftFloat;
633+
// The __bf16 type is generally available so long as we have any fp registers.
634+
return HasBFloat16 || (FPU && !SoftFloat);
634635
}
635636

636637
bool ARMTargetInfo::isValidCPUName(StringRef Name) const {
@@ -973,6 +974,8 @@ const Builtin::Info ARMTargetInfo::BuiltinInfo[] = {
973974
{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
974975
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
975976
{#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr},
977+
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
978+
{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
976979
#include "clang/Basic/BuiltinsNEON.def"
977980

978981
#define BUILTIN(ID, TYPE, ATTRS) \

0 commit comments

Comments
 (0)