Skip to content

Commit cdf6693

Browse files
authored
[AArch64][SME] Add support for sme-fa64 (#70809)
1 parent a2e1de1 commit cdf6693

File tree

12 files changed

+92
-8
lines changed

12 files changed

+92
-8
lines changed

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
685685
.Case("sme", HasSME)
686686
.Case("sme-f64f64", HasSMEF64F64)
687687
.Case("sme-i16i64", HasSMEI16I64)
688+
.Case("sme-fa64", HasSMEFA64)
688689
.Cases("memtag", "memtag2", HasMTE)
689690
.Case("sb", HasSB)
690691
.Case("predres", HasPredRes)
@@ -814,6 +815,13 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
814815
HasBFloat16 = true;
815816
HasFullFP16 = true;
816817
}
818+
if (Feature == "+sme-fa64") {
819+
FPU |= NeonMode;
820+
FPU |= SveMode;
821+
HasSME = true;
822+
HasSVE2 = true;
823+
HasSMEFA64 = true;
824+
}
817825
if (Feature == "+sb")
818826
HasSB = true;
819827
if (Feature == "+predres")

clang/lib/Basic/Targets/AArch64.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
8383
bool HasFMV = true;
8484
bool HasGCS = false;
8585
bool HasRCPC3 = false;
86+
bool HasSMEFA64 = false;
8687

8788
const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A;
8889

llvm/include/llvm/TargetParser/AArch64TargetParser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ enum ArchExtKind : unsigned {
172172
AEK_SME_LUTv2 = 68, // FEAT_SME_LUTv2
173173
AEK_SMEF8F16 = 69, // FEAT_SME_F8F16
174174
AEK_SMEF8F32 = 70, // FEAT_SME_F8F32
175+
AEK_SMEFA64 = 71, // FEAT_SME_FA64
175176
AEK_NUM_EXTENSIONS
176177
};
177178
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -293,6 +294,7 @@ inline constexpr ExtensionInfo Extensions[] = {
293294
{"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "", 0},
294295
{"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+sme2,+fp8", 0},
295296
{"sme-f8f32", AArch64::AEK_SMEF8F32, "+sme-f8f32", "-sme-f8f32", FEAT_INIT, "+sme2,+fp8", 0},
297+
{"sme-fa64", AArch64::AEK_SMEFA64, "+sme-fa64", "-sme-fa64", FEAT_INIT, "", 0},
296298
// Special cases
297299
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
298300
};

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true",
508508
def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true",
509509
"Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>;
510510

511+
def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true",
512+
"Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>;
513+
511514
def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true",
512515
"Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>;
513516

@@ -800,7 +803,7 @@ def SME2Unsupported : AArch64Unsupported {
800803
}
801804

802805
def SMEUnsupported : AArch64Unsupported {
803-
let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64],
806+
let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64, HasSMEFA64],
804807
SME2Unsupported.F);
805808
}
806809

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">,
154154
AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
155155
def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">,
156156
AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
157+
def HasSMEFA64 : Predicate<"Subtarget->hasSMEFA64()">,
158+
AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
157159
def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">,
158160
AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
159161
def HasSME2 : Predicate<"Subtarget->hasSME2()">,

llvm/lib/Target/AArch64/AArch64SchedA64FX.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ def A64FXModel : SchedMachineModel {
2323
list<Predicate> UnsupportedFeatures =
2424
[HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
2525
HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
26-
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32];
26+
HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32,
27+
HasSMEFA64];
2728

2829
let FullInstRWOverlapCheck = 0;
2930
}

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -502,13 +502,13 @@ bool AArch64Subtarget::isStreamingCompatible() const {
502502
}
503503

504504
bool AArch64Subtarget::isNeonAvailable() const {
505-
return hasNEON() && !isStreaming() && !isStreamingCompatible();
505+
return hasNEON() &&
506+
(hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
506507
}
507508

508-
bool AArch64Subtarget::isSVEAvailable() const{
509-
// FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet
510-
// as we don't yet support the feature in LLVM.
511-
return hasSVE() && !isStreaming() && !isStreamingCompatible();
509+
bool AArch64Subtarget::isSVEAvailable() const {
510+
return hasSVE() &&
511+
(hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
512512
}
513513

514514
// If return address signing is enabled, tail calls are emitted as follows:

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3663,6 +3663,7 @@ static const struct Extension {
36633663
{"sme-lutv2", {AArch64::FeatureSME_LUTv2}},
36643664
{"sme-f8f16", {AArch64::FeatureSMEF8F16}},
36653665
{"sme-f8f32", {AArch64::FeatureSMEF8F32}},
3666+
{"sme-fa64", {AArch64::FeatureSMEFA64}},
36663667
};
36673668

36683669
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64
3+
; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64
4+
5+
6+
target triple = "aarch64-unknown-linux-gnu"
7+
8+
define half @fadda_v4f16(half %start, <4 x half> %a) {
9+
; FA64-LABEL: fadda_v4f16:
10+
; FA64: // %bb.0:
11+
; FA64-NEXT: ptrue p0.h, vl4
12+
; FA64-NEXT: // kill: def $h0 killed $h0 def $z0
13+
; FA64-NEXT: // kill: def $d1 killed $d1 def $z1
14+
; FA64-NEXT: fadda h0, p0, h0, z1.h
15+
; FA64-NEXT: // kill: def $h0 killed $h0 killed $z0
16+
; FA64-NEXT: ret
17+
;
18+
; NO-FA64-LABEL: fadda_v4f16:
19+
; NO-FA64: // %bb.0:
20+
; NO-FA64-NEXT: // kill: def $d1 killed $d1 def $z1
21+
; NO-FA64-NEXT: fadd h0, h0, h1
22+
; NO-FA64-NEXT: mov z2.h, z1.h[1]
23+
; NO-FA64-NEXT: fadd h0, h0, h2
24+
; NO-FA64-NEXT: mov z2.h, z1.h[2]
25+
; NO-FA64-NEXT: mov z1.h, z1.h[3]
26+
; NO-FA64-NEXT: fadd h0, h0, h2
27+
; NO-FA64-NEXT: fadd h0, h0, h1
28+
; NO-FA64-NEXT: ret
29+
%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
30+
ret half %res
31+
}
32+
33+
declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=FA64
3+
; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s -check-prefix=NO-FA64
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
8+
; FA64-LABEL: mla8xi8:
9+
; FA64: // %bb.0:
10+
; FA64-NEXT: mla v2.8b, v0.8b, v1.8b
11+
; FA64-NEXT: fmov d0, d2
12+
; FA64-NEXT: ret
13+
;
14+
; NO-FA64-LABEL: mla8xi8:
15+
; NO-FA64: // %bb.0:
16+
; NO-FA64-NEXT: ptrue p0.b, vl8
17+
; NO-FA64-NEXT: // kill: def $d0 killed $d0 def $z0
18+
; NO-FA64-NEXT: // kill: def $d2 killed $d2 def $z2
19+
; NO-FA64-NEXT: // kill: def $d1 killed $d1 def $z1
20+
; NO-FA64-NEXT: mad z0.b, p0/m, z1.b, z2.b
21+
; NO-FA64-NEXT: // kill: def $d0 killed $d0 killed $z0
22+
; NO-FA64-NEXT: ret
23+
%tmp1 = mul <8 x i8> %A, %B;
24+
%tmp2 = add <8 x i8> %C, %tmp1;
25+
ret <8 x i8> %tmp2
26+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+sme-fa64 < %s | FileCheck %s
2+
3+
// Verify sme-fa64 implies SVE2
4+
ldnt1sh z0.s, p0/z, [z1.s]
5+
// CHECK: ldnt1sh { z0.s }, p0/z, [z1.s]

llvm/unittests/TargetParser/TargetParserTest.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1787,7 +1787,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
17871787
AArch64::AEK_SSVE_FP8DOT2, AArch64::AEK_FP8DOT4,
17881788
AArch64::AEK_SSVE_FP8DOT4, AArch64::AEK_LUT,
17891789
AArch64::AEK_SME_LUTv2, AArch64::AEK_SMEF8F16,
1790-
AArch64::AEK_SMEF8F32};
1790+
AArch64::AEK_SMEF8F32, AArch64::AEK_SMEFA64};
17911791

17921792
std::vector<StringRef> Features;
17931793

@@ -1872,6 +1872,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
18721872
EXPECT_TRUE(llvm::is_contained(Features, "+sme-lutv2"));
18731873
EXPECT_TRUE(llvm::is_contained(Features, "+sme-f8f16"));
18741874
EXPECT_TRUE(llvm::is_contained(Features, "+sme-f8f32"));
1875+
EXPECT_TRUE(llvm::is_contained(Features, "+sme-fa64"));
18751876

18761877
// Assuming we listed every extension above, this should produce the same
18771878
// result. (note that AEK_NONE doesn't have a name so it won't be in the
@@ -1987,6 +1988,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
19871988
{"f32mm", "nof32mm", "+f32mm", "-f32mm"},
19881989
{"f64mm", "nof64mm", "+f64mm", "-f64mm"},
19891990
{"sme", "nosme", "+sme", "-sme"},
1991+
{"sme-fa64", "nosme-fa64", "+sme-fa64", "-sme-fa64"},
19901992
{"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"},
19911993
{"sme-i16i64", "nosme-i16i64", "+sme-i16i64", "-sme-i16i64"},
19921994
{"sme-f16f16", "nosme-f16f16", "+sme-f16f16", "-sme-f16f16"},

0 commit comments

Comments
 (0)