Skip to content

Commit 56e9f55

Browse files
Dinar TemirbulatovDinar Temirbulatov
authored andcommitted
[AArch64][SME] Enable memory operations lowering to custom SME functions.
This change allows to lower memcpy, memset, memmove to custom SME version provided by LibRT.
1 parent 71d64ed commit 56e9f55

File tree

5 files changed

+153
-2
lines changed

5 files changed

+153
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7659,8 +7659,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
76597659
SMEAttrs CalleeAttrs, CallerAttrs(MF.getFunction());
76607660
if (CLI.CB)
76617661
CalleeAttrs = SMEAttrs(*CLI.CB);
7662-
else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
7663-
CalleeAttrs = SMEAttrs(ES->getSymbol());
7662+
else if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
7663+
if (StringRef(ES->getSymbol()) == StringRef("__arm_sc_memcpy")) {
7664+
auto Attrs = AttributeList().addFnAttribute(
7665+
*DAG.getContext(), "aarch64_pstate_sm_compatible");
7666+
CalleeAttrs = SMEAttrs(Attrs);
7667+
} else
7668+
CalleeAttrs = SMEAttrs(ES->getSymbol());
7669+
}
76647670

76657671
auto DescribeCallsite =
76667672
[&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {

llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,74 @@ SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
7676
}
7777
}
7878

79+
SDValue AArch64SelectionDAGInfo::EmitSpecializedLibcall(
80+
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
81+
SDValue Size, RTLIB::Libcall LC) const {
82+
const AArch64Subtarget &STI =
83+
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
84+
const AArch64TargetLowering *TLI = STI.getTargetLowering();
85+
TargetLowering::ArgListTy Args;
86+
TargetLowering::ArgListEntry Entry;
87+
Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
88+
Entry.Node = Dst;
89+
Args.push_back(Entry);
90+
91+
enum { SME_MEMCPY = 0, SME_MEMMOVE, SME_MEMSET } SMELibcall;
92+
switch (LC) {
93+
case RTLIB::MEMCPY:
94+
SMELibcall = SME_MEMCPY;
95+
Entry.Node = Src;
96+
Args.push_back(Entry);
97+
break;
98+
case RTLIB::MEMMOVE:
99+
SMELibcall = SME_MEMMOVE;
100+
Entry.Node = Src;
101+
Args.push_back(Entry);
102+
break;
103+
case RTLIB::MEMSET:
104+
SMELibcall = SME_MEMSET;
105+
if (Src.getValueType().bitsGT(MVT::i32))
106+
Src = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);
107+
else if (Src.getValueType().bitsLT(MVT::i32))
108+
Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
109+
Entry.Node = Src;
110+
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
111+
Entry.IsSExt = false;
112+
Args.push_back(Entry);
113+
break;
114+
default:
115+
return SDValue();
116+
}
117+
Entry.Node = Size;
118+
Args.push_back(Entry);
119+
char const *FunctionNames[3] = {"__arm_sc_memcpy", "__arm_sc_memmove",
120+
"__arm_sc_memset"};
121+
122+
TargetLowering::CallLoweringInfo CLI(DAG);
123+
CLI.setDebugLoc(DL)
124+
.setChain(Chain)
125+
.setLibCallee(
126+
TLI->getLibcallCallingConv(RTLIB::MEMCPY),
127+
Type::getVoidTy(*DAG.getContext()),
128+
DAG.getExternalSymbol(FunctionNames[SMELibcall],
129+
TLI->getPointerTy(DAG.getDataLayout())),
130+
std::move(Args))
131+
.setDiscardResult();
132+
std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
133+
return CallResult.second;
134+
}
135+
79136
SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
80137
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
81138
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
82139
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
83140
const AArch64Subtarget &STI =
84141
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
142+
143+
SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
144+
if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
145+
return EmitSpecializedLibcall(DAG, DL, Chain, Dst, Src, Size,
146+
RTLIB::MEMCPY);
85147
if (STI.hasMOPS())
86148
return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size,
87149
Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
@@ -95,6 +157,11 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
95157
const AArch64Subtarget &STI =
96158
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
97159

160+
SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
161+
if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
162+
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
163+
RTLIB::MEMSET);
164+
98165
if (STI.hasMOPS()) {
99166
return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
100167
Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
@@ -108,6 +175,11 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
108175
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
109176
const AArch64Subtarget &STI =
110177
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
178+
179+
SMEAttrs Attrs(DAG.getMachineFunction().getFunction());
180+
if (Attrs.hasStreamingBody() || Attrs.hasStreamingCompatibleInterface())
181+
return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
182+
RTLIB::MEMMOVE);
111183
if (STI.hasMOPS()) {
112184
return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size,
113185
Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);

llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
4747
SDValue Chain, SDValue Op1, SDValue Op2,
4848
MachinePointerInfo DstPtrInfo,
4949
bool ZeroData) const override;
50+
51+
SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &DL,
52+
SDValue Chain, SDValue Dst, SDValue Src,
53+
SDValue Size, RTLIB::Libcall LC) const;
5054
};
5155
}
5256

llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) {
5151
if (FuncName == "__arm_tpidr2_restore")
5252
Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::ZA_Shared |
5353
SMEAttrs::SME_ABI_Routine);
54+
if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" ||
55+
FuncName == "__arm_sc_memmove")
56+
Bitmask |= SMEAttrs::SM_Compatible;
5457
}
5558

5659
SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
3+
4+
@dst = global [512 x i8] zeroinitializer, align 1
5+
@src = global [512 x i8] zeroinitializer, align 1
6+
7+
define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" {
8+
; CHECK-LABEL: sc_memcpy:
9+
; CHECK: // %bb.0: // %entry
10+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
11+
; CHECK-NEXT: .cfi_def_cfa_offset 16
12+
; CHECK-NEXT: .cfi_offset w30, -16
13+
; CHECK-NEXT: mov x2, x0
14+
; CHECK-NEXT: adrp x0, :got:dst
15+
; CHECK-NEXT: adrp x1, :got:src
16+
; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
17+
; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
18+
; CHECK-NEXT: bl __arm_sc_memcpy
19+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
20+
; CHECK-NEXT: ret
21+
entry:
22+
tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
23+
ret void
24+
}
25+
26+
define void @sc_memset(i64 noundef %n) "aarch64_pstate_sm_compatible" {
27+
; CHECK-LABEL: sc_memset:
28+
; CHECK: // %bb.0: // %entry
29+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
30+
; CHECK-NEXT: .cfi_def_cfa_offset 16
31+
; CHECK-NEXT: .cfi_offset w30, -16
32+
; CHECK-NEXT: mov x2, x0
33+
; CHECK-NEXT: adrp x0, :got:dst
34+
; CHECK-NEXT: mov w1, #2 // =0x2
35+
; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
36+
; CHECK-NEXT: // kill: def $w2 killed $w2 killed $x2
37+
; CHECK-NEXT: bl __arm_sc_memset
38+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
39+
; CHECK-NEXT: ret
40+
entry:
41+
tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false)
42+
ret void
43+
}
44+
45+
define void @sc_memmove(i64 noundef %n) "aarch64_pstate_sm_compatible" {
46+
; CHECK-LABEL: sc_memmove:
47+
; CHECK: // %bb.0: // %entry
48+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
49+
; CHECK-NEXT: .cfi_def_cfa_offset 16
50+
; CHECK-NEXT: .cfi_offset w30, -16
51+
; CHECK-NEXT: mov x2, x0
52+
; CHECK-NEXT: adrp x0, :got:dst
53+
; CHECK-NEXT: adrp x1, :got:src
54+
; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst]
55+
; CHECK-NEXT: ldr x1, [x1, :got_lo12:src]
56+
; CHECK-NEXT: bl __arm_sc_memmove
57+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
58+
; CHECK-NEXT: ret
59+
entry:
60+
tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false)
61+
ret void
62+
}
63+
64+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
65+
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
66+
declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)

0 commit comments

Comments
 (0)