Skip to content

Commit 9846bc9

Browse files
committed
[AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics
Adds the builtins: void svldr_zt(uint64_t zt, const void *rn) void svstr_zt(uint64_t zt, void *rn) And the intrinsics: call void @llvm.aarch64.sme.ldr.zt(i32, ptr) tail call void @llvm.aarch64.sme.str.zt(i32, ptr)
1 parent d5cfdca commit 9846bc9

File tree

11 files changed

+153
-13
lines changed

11 files changed

+153
-13
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,8 @@ multiclass ZAAddSub<string n_suffix> {
298298

299299
defm SVADD : ZAAddSub<"add">;
300300
defm SVSUB : ZAAddSub<"sub">;
301+
302+
let TargetGuard = "sme2" in {
303+
def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
304+
def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
305+
}

clang/include/clang/Basic/arm_sve.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,6 +1813,15 @@ def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_
18131813
def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>;
18141814
}
18151815

1816+
// //
1817+
// // Spill and fill of ZT0
1818+
// //
1819+
1820+
// let TargetGuard = "sme2" in {
1821+
// def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
1822+
// def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
1823+
// }
1824+
18161825
////////////////////////////////////////////////////////////////////////////////
18171826
// SVE2 - Extended table lookup/permute
18181827
let TargetGuard = "sve2" in {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
8+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
9+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
10+
11+
#include <arm_sme_draft_spec_subject_to_change.h>
12+
13+
#ifdef SVE_OVERLOADED_FORMS
14+
// A simple used,unused... macro, long enough to represent any SVE builtin.
15+
#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
16+
#else
17+
#define SVE_ACLE_FUNC(A1,A2) A1##A2
18+
#endif
19+
20+
// LDR ZT0
21+
22+
// CHECK-LABEL: @test_svldr_zt(
23+
// CHECK-NEXT: entry:
24+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]])
25+
// CHECK-NEXT: ret void
26+
//
27+
// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv(
28+
// CPP-CHECK-NEXT: entry:
29+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]])
30+
// CPP-CHECK-NEXT: ret void
31+
//
32+
void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za {
33+
svldr_zt(0, base);
34+
} ;
35+
36+
37+
// STR ZT0
38+
39+
// CHECK-LABEL: @test_svstr_zt(
40+
// CHECK-NEXT: entry:
41+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]])
42+
// CHECK-NEXT: ret void
43+
//
44+
// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv(
45+
// CPP-CHECK-NEXT: entry:
46+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]])
47+
// CPP-CHECK-NEXT: ret void
48+
//
49+
void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za {
50+
svstr_zt(0, base);
51+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2679,10 +2679,10 @@ let TargetPrefix = "aarch64" in {
26792679
def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>;
26802680

26812681
// Spill + fill
2682-
def int_aarch64_sme_ldr : DefaultAttrsIntrinsic<
2683-
[], [llvm_i32_ty, llvm_ptr_ty]>;
2684-
def int_aarch64_sme_str : DefaultAttrsIntrinsic<
2685-
[], [llvm_i32_ty, llvm_ptr_ty]>;
2682+
class SME_LDR_STR_Intrinsic
2683+
: DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>;
2684+
def int_aarch64_sme_ldr : SME_LDR_STR_Intrinsic;
2685+
def int_aarch64_sme_str : SME_LDR_STR_Intrinsic;
26862686

26872687
class SME_TileToVector_Intrinsic
26882688
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -3454,4 +3454,7 @@ let TargetPrefix = "aarch64" in {
34543454
def int_aarch64_sve_sel_x2 : SVE2_VG2_Sel_Intrinsic;
34553455
def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic;
34563456

3457+
def int_aarch64_sme_ldr_zt : SME_LDR_STR_Intrinsic;
3458+
def int_aarch64_sme_str_zt : SME_LDR_STR_Intrinsic;
3459+
34573460
}

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,15 +326,18 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
326326
return false;
327327
}
328328

329-
template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
329+
template <unsigned BaseReg, unsigned Max> bool ImmToTile(SDValue N, SDValue &Imm) {
330330
if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
331331
uint64_t C = CI->getZExtValue();
332+
333+
if (C > Max)
334+
return false;
335+
332336
Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
333337
return true;
334338
}
335339
return false;
336340
}
337-
338341
/// Form sequences of consecutive 64/128-bit registers for use in NEON
339342
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
340343
/// between 1 and 4 elements. If it contains a single element that is returned

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2746,6 +2746,23 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
27462746
return BB;
27472747
}
27482748

2749+
MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI,
2750+
MachineBasicBlock *BB,
2751+
bool IsSpill) const {
2752+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2753+
MachineInstrBuilder MIB;
2754+
if (IsSpill) {
2755+
MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STR_TX));
2756+
MIB.addReg(MI.getOperand(0).getReg());
2757+
}
2758+
else
2759+
MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_TX),
2760+
MI.getOperand(0).getReg());
2761+
MIB.add(MI.getOperand(1)); // Base
2762+
MI.eraseFromParent(); // The pseudo is gone now.
2763+
return BB;
2764+
}
2765+
27492766
MachineBasicBlock *
27502767
AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
27512768
MachineInstr &MI,
@@ -2862,6 +2879,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
28622879
return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
28632880
case AArch64::LDR_ZA_PSEUDO:
28642881
return EmitFill(MI, BB);
2882+
case AArch64::LDR_TX_PSEUDO:
2883+
return EmitZTSpillFill(MI, BB, /*IsSpill=*/false);
2884+
case AArch64::STR_TX_PSEUDO:
2885+
return EmitZTSpillFill(MI, BB, /*IsSpill=*/true);
28652886
case AArch64::ZERO_M_PSEUDO:
28662887
return EmitZero(MI, BB);
28672888
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,8 @@ class AArch64TargetLowering : public TargetLowering {
608608
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
609609
MachineInstr &MI, MachineBasicBlock *BB,
610610
bool HasTile) const;
611+
MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB,
612+
bool IsSpill) const;
611613
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
612614

613615
MachineBasicBlock *

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,12 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
440440
Reserved.set(SubReg);
441441
}
442442

443+
if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
444+
for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
445+
SubReg.isValid(); ++SubReg)
446+
Reserved.set(*SubReg);
447+
}
448+
443449
markSuperRegs(Reserved, AArch64::FPCR);
444450

445451
assert(checkAllSuperRegsMarked(Reserved));

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -541,8 +541,8 @@ defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops
541541

542542
def ZERO_T : sme2_zero_zt<"zero", 0b0001>;
543543

544-
def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>;
545-
def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>;
544+
defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>;
545+
defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>;
546546

547547
def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>;
548548
def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>;

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
14-
def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
15-
def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
16-
def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
17-
def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
13+
def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0, 0>", []>;
14+
def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0, 1>", []>;
15+
def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0, 3>", []>;
16+
def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0, 7>", []>;
17+
def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0, 15>", []>;
18+
def imm_to_tile_zt : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZT0, 0>", []>;
1819

1920
def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
2021
def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
@@ -3132,6 +3133,18 @@ class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
31323133
let mayStore = opc{7};
31333134
}
31343135

3136+
3137+
multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> {
3138+
def NAME : sme2_spill_fill_vector<mnemonic, opc>;
3139+
def NAME # _PSEUDO
3140+
: Pseudo<(outs), (ins MatrixOp:$ZTt, GPR64sp:$base), []>, Sched<[]> {
3141+
// Translated to actual instruction in AArch64ISelLowering.cpp
3142+
let usesCustomInserter = 1;
3143+
}
3144+
def : Pat<(op (imm_to_tile_zt untyped:$tile), GPR64sp:$base),
3145+
(!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>;
3146+
}
3147+
31353148
//===----------------------------------------------------------------------===///
31363149
// SME2 move to/from lookup table
31373150
class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
3+
4+
; LDR
5+
6+
define void @ldr_zt0(ptr %ptr) {
7+
; CHECK-LABEL: ldr_zt0:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: ldr zt0, [x0]
10+
; CHECK-NEXT: ret
11+
call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %ptr)
12+
ret void;
13+
}
14+
15+
; STR
16+
17+
define void @str_zt0(ptr %ptr) {
18+
; CHECK-LABEL: str_zt0:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: str zt0, [x0]
21+
; CHECK-NEXT: ret
22+
call void @llvm.aarch64.sme.str.zt(i32 0, ptr %ptr)
23+
ret void;
24+
}
25+
26+
declare void @llvm.aarch64.sme.ldr.zt(i32, ptr)
27+
declare void @llvm.aarch64.sme.str.zt(i32, ptr)

0 commit comments

Comments
 (0)