-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ARM64EC] Fix arm_neon.h on ARM64EC. #88572
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Since 97fe519, in ARM64EC mode, we don't define __aarch64__. Fix various preprocessor guards to account for this.
@llvm/pr-subscribers-clang Author: Eli Friedman (efriedma-quic) ChangesSince 97fe519, in ARM64EC mode, we don't define Full diff: https://github.com/llvm/llvm-project/pull/88572.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td
index cb2a09303e8e12..d36b4617bef5d2 100644
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@@ -14,7 +14,7 @@
include "arm_neon_incl.td"
// ARMv8.2-A FP16 intrinsics.
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fullfp16" in {
// Negate
def VNEGSH : SInst<"vneg", "11", "Sh">;
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 7edac5afafaa99..6d655c39360d3b 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -605,11 +605,11 @@ def VQDMULL_LANE : SOpInst<"vqdmull_lane", "(>Q)..I", "si", OP_QDMULL_LN>;
def VQDMULH_N : SOpInst<"vqdmulh_n", "..1", "siQsQi", OP_QDMULH_N>;
def VQRDMULH_N : SOpInst<"vqrdmulh_n", "..1", "siQsQi", OP_QRDMULH_N>;
-let ArchGuard = "!defined(__aarch64__)" in {
+let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)" in {
def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>;
def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
}
-let ArchGuard = "defined(__aarch64__)" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
def A64_VQDMULH_LANE : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">;
def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">;
}
@@ -686,7 +686,7 @@ multiclass REINTERPRET_CROSS_TYPES<string TypesA, string TypesB> {
// E.3.31 Vector reinterpret cast operations
def VREINTERPRET : REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs"> {
- let ArchGuard = "!defined(__aarch64__)";
+ let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)";
let BigEndianSafe = 1;
}
@@ -714,7 +714,7 @@ def VADDP : WInst<"vadd", "...", "PcPsPlQPcQPsQPl">;
////////////////////////////////////////////////////////////////////////////////
// AArch64 Intrinsics
-let ArchGuard = "defined(__aarch64__)" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
////////////////////////////////////////////////////////////////////////////////
// Load/Store
@@ -1091,14 +1091,14 @@ let isLaneQ = 1 in {
def VQDMULH_LANEQ : SInst<"vqdmulh_laneq", "..QI", "siQsQi">;
def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">;
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" in {
def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
let isLaneQ = 1;
}
def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN> {
let isLaneQ = 1;
}
-} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a"
+} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a"
// Note: d type implemented by SCALAR_VMULX_LANE
def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>;
@@ -1143,7 +1143,7 @@ def SHA256H2 : SInst<"vsha256h2", "....", "QUi">;
def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">;
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "sha3" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3" in {
def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">;
def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
def RAX1 : SInst<"vrax1", "...", "QUl">;
@@ -1153,14 +1153,14 @@ def XAR : SInst<"vxar", "...I", "QUl">;
}
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "sha3" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sha3" in {
def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">;
def SHA512su1 : SInst<"vsha512su1", "....", "QUl">;
def SHA512H : SInst<"vsha512h", "....", "QUl">;
def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "sm4" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4" in {
def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
@@ -1170,7 +1170,7 @@ def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">;
def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">;
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "sm4" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4" in {
def SM4E : SInst<"vsm4e", "...", "QUi">;
def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">;
}
@@ -1193,7 +1193,7 @@ def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">;
def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">;
}
-let ArchGuard = "defined(__aarch64__)" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">;
def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">;
def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">;
@@ -1217,7 +1217,7 @@ def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">;
def FRINTI_S32 : SInst<"vrndi", "..", "fQf">;
}
-let ArchGuard = "defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in {
+let ArchGuard = "(defined(__aarch64__) || defined(__arm64ec__)) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in {
def FRINTN_S64 : SInst<"vrndn", "..", "dQd">;
def FRINTA_S64 : SInst<"vrnda", "..", "dQd">;
def FRINTP_S64 : SInst<"vrndp", "..", "dQd">;
@@ -1227,7 +1227,7 @@ def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">;
def FRINTI_S64 : SInst<"vrndi", "..", "dQd">;
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.5a" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.5a" in {
def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">;
def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;
@@ -1247,7 +1247,7 @@ def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">;
def FMINNM_S32 : SInst<"vminnm", "...", "fQf">;
}
-let ArchGuard = "defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in {
+let ArchGuard = "(defined(__aarch64__) || defined(__arm64ec__)) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in {
def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">;
def FMINNM_S64 : SInst<"vminnm", "...", "dQd">;
}
@@ -1289,7 +1289,7 @@ def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">;
// itself during generation so, unlike all other intrinsics, this one should
// include *all* types, not just additional ones.
def VVREINTERPRET : REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk"> {
- let ArchGuard = "defined(__aarch64__)";
+ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)";
let BigEndianSafe = 1;
}
@@ -1401,7 +1401,7 @@ def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">;
// Scalar Integer Saturating Rounding Doubling Multiply Half High
def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">;
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a" in {
////////////////////////////////////////////////////////////////////////////////
// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">;
@@ -1409,7 +1409,7 @@ def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">;
////////////////////////////////////////////////////////////////////////////////
// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
def SCALAR_SQRDMLSH : SInst<"vqrdmlsh", "1111", "SsSi">;
-} // ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.1a"
+} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a"
////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Multiply Extended
@@ -1651,7 +1651,7 @@ def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcS
let isLaneQ = 1;
}
-} // ArchGuard = "defined(__aarch64__)"
+} // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)"
// ARMv8.2-A FP16 vector intrinsics for A32/A64.
let TargetGuard = "fullfp16" in {
@@ -1775,7 +1775,7 @@ def VEXTH : WInst<"vext", "...I", "hQh">;
def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
// ARMv8.2-A FP16 vector intrinsics for A64 only.
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fullfp16" in {
// Vector rounding
def FRINTIH : SInst<"vrndi", "..", "hQh">;
@@ -1856,7 +1856,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in {
def FMINNMVH : SInst<"vminnmv", "1.", "hQh">;
}
-let ArchGuard = "defined(__aarch64__)" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
// Permutation
def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>;
def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>;
@@ -1876,7 +1876,7 @@ let TargetGuard = "dotprod" in {
def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">;
def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<<q)I", "iUiQiQUi", OP_DOT_LN>;
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "dotprod" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "dotprod" in {
// Variants indexing into a 128-bit vector are A64 only.
def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ> {
let isLaneQ = 1;
@@ -1884,7 +1884,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "dotprod" in {
}
// v8.2-A FP16 fused multiply-add long instructions.
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp16fml" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "fp16fml" in {
def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">;
def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">;
def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">;
@@ -1918,7 +1918,7 @@ let TargetGuard = "i8mm" in {
def VUSDOT_LANE : SOpInst<"vusdot_lane", "..(<<U)(<<q)I", "iQi", OP_USDOT_LN>;
def VSUDOT_LANE : SOpInst<"vsudot_lane", "..(<<)(<<qU)I", "iQi", OP_SUDOT_LN>;
- let ArchGuard = "defined(__aarch64__)" in {
+ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
let isLaneQ = 1 in {
def VUSDOT_LANEQ : SOpInst<"vusdot_laneq", "..(<<U)(<<Q)I", "iQi", OP_USDOT_LNQ>;
def VSUDOT_LANEQ : SOpInst<"vsudot_laneq", "..(<<)(<<QU)I", "iQi", OP_SUDOT_LNQ>;
@@ -1986,7 +1986,7 @@ let TargetGuard = "v8.3a" in {
defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "v8.3a" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.3a" in {
def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">;
def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
@@ -2058,14 +2058,14 @@ let TargetGuard = "bf16" in {
def SCALAR_CVT_F32_BF16 : SOpInst<"vcvtah_f32", "(1F>)(1!)", "b", OP_CVT_F32_BF16>;
}
-let ArchGuard = "!defined(__aarch64__)", TargetGuard = "bf16" in {
+let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16" in {
def VCVT_BF16_F32_A32_INTERNAL : WInst<"__a32_vcvt_bf16", "BQ", "f">;
def VCVT_BF16_F32_A32 : SOpInst<"vcvt_bf16", "BQ", "f", OP_VCVT_BF16_F32_A32>;
def VCVT_LOW_BF16_F32_A32 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A32>;
def VCVT_HIGH_BF16_F32_A32 : SOpInst<"vcvt_high_bf16", "BBQ", "Qf", OP_VCVT_BF16_F32_HI_A32>;
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16" in {
def VCVT_LOW_BF16_F32_A64_INTERNAL : WInst<"__a64_vcvtq_low_bf16", "BQ", "Hf">;
def VCVT_LOW_BF16_F32_A64 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A64>;
def VCVT_HIGH_BF16_F32_A64 : SInst<"vcvt_high_bf16", "BBQ", "Qf">;
@@ -2077,14 +2077,14 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in {
def COPYQ_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..I.I", "Qb", OP_COPY_LN>;
}
-let ArchGuard = "!defined(__aarch64__)", TargetGuard = "bf16" in {
+let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16" in {
let BigEndianSafe = 1 in {
defm VREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
"csilUcUsUiUlhfPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQPcQPsQPl", "bQb">;
}
}
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16" in {
let BigEndianSafe = 1 in {
defm VVREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
"csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", "bQb">;
@@ -2092,7 +2092,7 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "bf16" in {
}
// v8.9a/v9.4a LRCPC3 intrinsics
-let ArchGuard = "defined(__aarch64__)", TargetGuard = "rcpc3" in {
+let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3" in {
def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 04e1acc2705004..56f1fdf9ef574f 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2266,7 +2266,7 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
InIfdef = false;
}
if (!InIfdef && IsA64) {
- OS << "#ifdef __aarch64__\n";
+ OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
InIfdef = true;
}
@@ -2299,7 +2299,7 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
InIfdef = false;
}
if (!InIfdef && IsA64) {
- OS << "#ifdef __aarch64__\n";
+ OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
InIfdef = true;
}
@@ -2381,7 +2381,7 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "#include <arm_vector_types.h>\n";
// For now, signedness of polynomial types depends on target
- OS << "#ifdef __aarch64__\n";
+ OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
OS << "typedef uint8_t poly8_t;\n";
OS << "typedef uint16_t poly16_t;\n";
OS << "typedef uint64_t poly64_t;\n";
@@ -2582,7 +2582,7 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) {
OS << "typedef float float32_t;\n";
OS << "typedef __fp16 float16_t;\n";
- OS << "#ifdef __aarch64__\n";
+ OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
OS << "typedef double float64_t;\n";
OS << "#endif\n\n";
|
dpaoliello
approved these changes
Apr 12, 2024
cjacek
approved these changes
Apr 15, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
clang:frontend
Language frontend issues, e.g. anything involving "Sema"
clang
Clang issues not falling into any other category
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Since 97fe519, in ARM64EC mode, we don't define
__aarch64__
. Fix various preprocessor guards to account for this.