Skip to content

Commit a72cc95

Browse files
[CodeGen][AArch64] Add support for LDAPR
This diff adds support for LDAPR (RCPC extension) (llvm#55561). Differential revision: https://reviews.llvm.org/D126250 Test plan: ninja check-all
1 parent c63d4fa commit a72cc95

File tree

6 files changed

+2046
-9
lines changed

6 files changed

+2046
-9
lines changed

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
6464
def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
6565
"Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules">;
6666

67+
def FeatureLDAPR : SubtargetFeature<"ldapr", "HasLDAPR", "true",
68+
"Use LDAPR to lower atomic loads; experimental until we "
69+
"have more testing/a formal correctness proof">;
70+
6771
def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
6872
"Enable out of line atomics to support LSE instructions">;
6973

llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,43 @@ def : Pat<(atomic_fence (timm), (timm)), (DMB (i32 0xb))>;
2727
// supported, but when they're relaxed and anything can be used, all the
2828
// standard modes would be valid and may give efficiency gains.
2929

30+
// An atomic load operation that does not need either acquire or release
31+
// semantics.
32+
class relaxed_load<PatFrag base>
33+
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
34+
let IsAtomic = 1;
35+
let IsAtomicOrderingAcquireOrStronger = 0;
36+
}
37+
3038
// A atomic load operation that actually needs acquire semantics.
3139
class acquiring_load<PatFrag base>
3240
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
3341
let IsAtomic = 1;
34-
let IsAtomicOrderingAcquireOrStronger = 1;
42+
let IsAtomicOrderingAcquire = 1;
3543
}
3644

37-
// An atomic load operation that does not need either acquire or release
38-
// semantics.
39-
class relaxed_load<PatFrag base>
45+
// An atomic load operation that needs sequential consistency.
46+
class seq_cst_load<PatFrag base>
4047
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
4148
let IsAtomic = 1;
42-
let IsAtomicOrderingAcquireOrStronger = 0;
49+
let IsAtomicOrderingSequentiallyConsistent = 1;
50+
}
51+
52+
// RCPC extension, currently opt-in under a separate feature.
53+
let Predicates = [HasLDAPR] in {
54+
// v8.3 Release Consistent Processor Consistent support, optional in v8.2.
55+
// 8-bit loads
56+
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDAPRB GPR64sp:$ptr)>;
57+
// 16-bit loads
58+
def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDAPRH GPR64sp:$ptr)>;
59+
// 32-bit loads
60+
def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDAPRW GPR64sp:$ptr)>;
61+
// 64-bit loads
62+
def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDAPRX GPR64sp:$ptr)>;
4363
}
4464

4565
// 8-bit loads
66+
def : Pat<(seq_cst_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
4667
def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
4768
def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
4869
ro_Wextend8:$offset)),
@@ -58,6 +79,7 @@ def : Pat<(relaxed_load<atomic_load_8>
5879
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
5980

6081
// 16-bit loads
82+
def : Pat<(seq_cst_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
6183
def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
6284
def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
6385
ro_Wextend16:$extend)),
@@ -73,6 +95,7 @@ def : Pat<(relaxed_load<atomic_load_16>
7395
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
7496

7597
// 32-bit loads
98+
def : Pat<(seq_cst_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
7699
def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
77100
def : Pat<(relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
78101
ro_Wextend32:$extend)),
@@ -88,6 +111,7 @@ def : Pat<(relaxed_load<atomic_load_32>
88111
(LDURWi GPR64sp:$Rn, simm9:$offset)>;
89112

90113
// 64-bit loads
114+
def : Pat<(seq_cst_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
91115
def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
92116
def : Pat<(relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
93117
ro_Wextend64:$extend)),

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ def HasNEONorSME
160160
"neon or sme">;
161161
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
162162
AssemblerPredicate<(all_of FeatureRCPC), "rcpc">;
163+
def HasLDAPR : Predicate<"Subtarget->hasLDAPR()">,
164+
AssemblerPredicate<(all_of FeatureLDAPR), "ldapr">;
163165
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
164166
AssemblerPredicate<(all_of FeatureAltFPCmp), "altnzcv">;
165167
def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2790,12 +2790,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27902790
return false;
27912791

27922792
if (isa<GLoad>(LdSt)) {
2793-
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2794-
AArch64::LDARW, AArch64::LDARX};
2793+
static constexpr unsigned LDAPROpcodes[] = {
2794+
AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2795+
static constexpr unsigned LDAROpcodes[] = {
2796+
AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2797+
ArrayRef<unsigned> Opcodes =
2798+
STI.hasLDAPR() && Order != AtomicOrdering::SequentiallyConsistent
2799+
? LDAPROpcodes
2800+
: LDAROpcodes;
27952801
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
27962802
} else {
2797-
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2798-
AArch64::STLRW, AArch64::STLRX};
2803+
static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2804+
AArch64::STLRW, AArch64::STLRX};
27992805
Register ValReg = LdSt.getReg(0);
28002806
if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
28012807
// Emit a subreg copy of 32 bits.

0 commit comments

Comments
 (0)