Skip to content

Commit 2b33591

Browse files
authored
[llvm][AArch64] Support -mcpu=apple-m4 (#95478)
1 parent 2693811 commit 2b33591

File tree

5 files changed

+57
-6
lines changed

5 files changed

+57
-6
lines changed

clang/test/Misc/target-invalid-cpu-note.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55

66
// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
77
// AARCH64: error: unknown target CPU 'not-a-cpu'
8-
// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-a725, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, cortex-x925, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}}
8+
// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-a725, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, cortex-x925, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-m4, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}}
99

1010
// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
1111
// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
12-
// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-a725, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, cortex-x925, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}}
12+
// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a520ae, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-a720ae, cortex-a725, cortex-r82, cortex-r82ae, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, cortex-x925, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-n3, neoverse-512tvb, neoverse-v1, neoverse-v2, neoverse-v3, neoverse-v3ae, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-a17, apple-m1, apple-m2, apple-m3, apple-m4, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, ampere1b, oryon-1, cobalt-100, grace{{$}}
1313

1414
// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
1515
// X86: error: unknown target CPU 'not-a-cpu'

llvm/include/llvm/TargetParser/AArch64TargetParser.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,14 @@ inline constexpr CpuInfo CpuInfos[] = {
521521
AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_SHA2,
522522
AArch64::AEK_SHA3, AArch64::AEK_FP16,
523523
AArch64::AEK_FP16FML})},
524-
524+
// Technically apple-m4 is ARMv9.2a, but a quirk of LLVM defines v9.0 as
525+
// requiring SVE, which is optional according to the Arm ARM and not
526+
// supported by the core. ARMv8.7a is the next closest choice.
527+
{"apple-m4", ARMV8_7A,
528+
AArch64::ExtensionBitset(
529+
{AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3,
530+
AArch64::AEK_FP16, AArch64::AEK_FP16FML, AArch64::AEK_SME,
531+
AArch64::AEK_SME2, AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64})},
525532
{"apple-s4", ARMV8_3A,
526533
AArch64::ExtensionBitset(
527534
{AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_FP16})},

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,22 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
398398
FeatureZCRegMove,
399399
FeatureZCZeroing]>;
400400

401+
def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
402+
"Apple M4", [
403+
FeatureAlternateSExtLoadCVTF32Pattern,
404+
FeatureArithmeticBccFusion,
405+
FeatureArithmeticCbzFusion,
406+
FeatureDisableLatencySchedHeuristic,
407+
FeatureFuseAddress,
408+
FeatureFuseAES,
409+
FeatureFuseArithmeticLogic,
410+
FeatureFuseCCSelect,
411+
FeatureFuseCryptoEOR,
412+
FeatureFuseLiterals,
413+
FeatureZCRegMove,
414+
FeatureZCZeroing
415+
]>;
416+
401417
def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
402418
"Samsung Exynos-M3 processors",
403419
[FeatureExynosCheapAsMoveHandling,
@@ -784,6 +800,14 @@ def ProcessorFeatures {
784800
FeatureNEON, FeaturePerfMon, FeatureSHA3,
785801
FeatureFullFP16, FeatureFP16FML,
786802
FeatureHCX];
803+
// Technically apple-m4 is ARMv9.2. See the corresponding comment in
804+
// AArch64TargetParser.h.
805+
list<SubtargetFeature> AppleM4 = [HasV8_7aOps, FeatureCrypto, FeatureFPARMv8,
806+
FeatureNEON, FeaturePerfMon, FeatureSHA3,
807+
FeatureFullFP16, FeatureFP16FML,
808+
FeatureAES, FeatureBF16,
809+
FeatureSME2,
810+
FeatureSMEF64F64, FeatureSMEI16I64];
787811
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
788812
FeaturePerfMon];
789813
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
@@ -1010,6 +1034,9 @@ def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,
10101034
[TuneAppleA16]>;
10111035
def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17,
10121036
[TuneAppleA17]>;
1037+
def : ProcessorModel<"apple-m4", CycloneModel, ProcessorFeatures.AppleM4,
1038+
[TuneAppleM4]>;
1039+
10131040
// Mac CPUs
10141041
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
10151042
[TuneAppleA14]>;
@@ -1025,8 +1052,8 @@ def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
10251052
[TuneAppleA12]>;
10261053

10271054
// Alias for the latest Apple processor model supported by LLVM.
1028-
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16,
1029-
[TuneAppleA16]>;
1055+
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleM4,
1056+
[TuneAppleM4]>;
10301057

10311058
// Fujitsu A64FX
10321059
def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
182182
case AppleA15:
183183
case AppleA16:
184184
case AppleA17:
185+
case AppleM4:
185186
CacheLineSize = 64;
186187
PrefetchDistance = 280;
187188
MinPrefetchStride = 2048;
@@ -191,6 +192,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
191192
case AppleA15:
192193
case AppleA16:
193194
case AppleA17:
195+
case AppleM4:
194196
MaxInterleaveFactor = 4;
195197
break;
196198
default:

llvm/unittests/TargetParser/TargetParserTest.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,21 @@ INSTANTIATE_TEST_SUITE_P(
16441644
AArch64::AEK_I8MM, AArch64::AEK_JSCVT, AArch64::AEK_FCMA,
16451645
AArch64::AEK_PAUTH}),
16461646
"8.6-A"),
1647+
ARMCPUTestParams<AArch64::ExtensionBitset>(
1648+
"apple-m4", "armv8.7-a", "crypto-neon-fp-armv8",
1649+
AArch64::ExtensionBitset(
1650+
{AArch64::AEK_CRC, AArch64::AEK_AES,
1651+
AArch64::AEK_SHA2, AArch64::AEK_SHA3,
1652+
AArch64::AEK_FP, AArch64::AEK_SIMD,
1653+
AArch64::AEK_LSE, AArch64::AEK_RAS,
1654+
AArch64::AEK_RDM, AArch64::AEK_RCPC,
1655+
AArch64::AEK_DOTPROD, AArch64::AEK_FP16,
1656+
AArch64::AEK_FP16FML, AArch64::AEK_BF16,
1657+
AArch64::AEK_I8MM, AArch64::AEK_JSCVT,
1658+
AArch64::AEK_FCMA, AArch64::AEK_PAUTH,
1659+
AArch64::AEK_SME, AArch64::AEK_SME2,
1660+
AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64}),
1661+
"8.7-A"),
16471662
ARMCPUTestParams<AArch64::ExtensionBitset>(
16481663
"apple-s4", "armv8.3-a", "crypto-neon-fp-armv8",
16491664
AArch64::ExtensionBitset(
@@ -1872,7 +1887,7 @@ INSTANTIATE_TEST_SUITE_P(
18721887
ARMCPUTestParams<AArch64::ExtensionBitset>::PrintToStringParamName);
18731888

18741889
// Note: number of CPUs includes aliases.
1875-
static constexpr unsigned NumAArch64CPUArchs = 79;
1890+
static constexpr unsigned NumAArch64CPUArchs = 80;
18761891

18771892
TEST(TargetParserTest, testAArch64CPUArchList) {
18781893
SmallVector<StringRef, NumAArch64CPUArchs> List;

0 commit comments

Comments
 (0)