Skip to content

Commit 9fe5700

Browse files
[AArch64] Add support for v8.4a ldapur/stlur
AArch64 backend now features v8.4a atomic Load-Acquire RCpc and Store-Release register unscaled support.
1 parent a879971 commit 9fe5700

File tree

6 files changed

+63
-51
lines changed

6 files changed

+63
-51
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,15 @@ static bool isWorthFoldingADDlow(SDValue N) {
997997
return true;
998998
}
999999

1000+
/// Check if the immediate offset is valid as a scaled immediate.
1001+
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1002+
unsigned Size) {
1003+
if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1004+
Offset < (Range << Log2_32(Size)))
1005+
return true;
1006+
return false;
1007+
}
1008+
10001009
/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
10011010
/// immediate" address. The "Size" argument is the size in bytes of the memory
10021011
/// reference, which determines the scale.
@@ -1092,7 +1101,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
10921101
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
10931102
int64_t RHSC = (int64_t)RHS->getZExtValue();
10941103
unsigned Scale = Log2_32(Size);
1095-
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
1104+
if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
10961105
Base = N.getOperand(0);
10971106
if (Base.getOpcode() == ISD::FrameIndex) {
10981107
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
@@ -1130,10 +1139,6 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
11301139
return false;
11311140
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
11321141
int64_t RHSC = RHS->getSExtValue();
1133-
// If the offset is valid as a scaled immediate, don't match here.
1134-
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
1135-
RHSC < (0x1000 << Log2_32(Size)))
1136-
return false;
11371142
if (RHSC >= -256 && RHSC < 256) {
11381143
Base = N.getOperand(0);
11391144
if (Base.getOpcode() == ISD::FrameIndex) {
@@ -1312,11 +1317,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
13121317
// LDR X2, [BaseReg, X0]
13131318
if (isa<ConstantSDNode>(RHS)) {
13141319
int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1315-
unsigned Scale = Log2_32(Size);
13161320
// Skip the immediate can be selected by load/store addressing mode.
13171321
// Also skip the immediate can be encoded by a single ADD (SUB is also
13181322
// checked by using -ImmOff).
1319-
if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1323+
if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
13201324
isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
13211325
return false;
13221326

llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,34 @@ let Predicates = [HasRCPC3, HasNEON] in {
573573
(i64 (bitconvert (v1f64 VecListOne64:$Vt)))),
574574
(STL1 (SUBREG_TO_REG (i64 0), VecListOne64:$Vt, dsub), (i64 0), GPR64sp:$Rn)>;
575575
}
576+
577+
// v8.4a FEAT_LRCPC2 patterns
578+
let Predicates = [HasRCPC_IMMO] in {
579+
// Load-Acquire RCpc Register unscaled loads
580+
def : Pat<(acquiring_load<atomic_load_az_8>
581+
(am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
582+
(LDAPURBi GPR64sp:$Rn, simm9:$offset)>;
583+
def : Pat<(acquiring_load<atomic_load_az_16>
584+
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
585+
(LDAPURHi GPR64sp:$Rn, simm9:$offset)>;
586+
def : Pat<(acquiring_load<atomic_load_32>
587+
(am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
588+
(LDAPURi GPR64sp:$Rn, simm9:$offset)>;
589+
def : Pat<(acquiring_load<atomic_load_64>
590+
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
591+
(LDAPURXi GPR64sp:$Rn, simm9:$offset)>;
592+
593+
// Store-Release Register unscaled stores
594+
def : Pat<(releasing_store<atomic_store_8>
595+
(am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
596+
(STLURBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
597+
def : Pat<(releasing_store<atomic_store_16>
598+
(am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
599+
(STLURHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
600+
def : Pat<(releasing_store<atomic_store_32>
601+
(am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
602+
(STLURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
603+
def : Pat<(releasing_store<atomic_store_64>
604+
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
605+
(STLURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
606+
}

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
9494
def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
9595
AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
9696

97-
def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
97+
def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
9898
AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
9999

100100
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7397,9 +7397,6 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
73977397
return std::nullopt;
73987398
RHSC = RHSOp1.getCImm()->getSExtValue();
73997399

7400-
// If the offset is valid as a scaled immediate, don't match here.
7401-
if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
7402-
return std::nullopt;
74037400
if (RHSC >= -256 && RHSC < 256) {
74047401
MachineOperand &Base = RootDef->getOperand(1);
74057402
return {{

llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,15 @@ define i8 @load_atomic_i8_aligned_monotonic_const(ptr readonly %ptr) {
3636

3737
define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) {
3838
; CHECK-LABEL: load_atomic_i8_aligned_acquire:
39-
; CHECK: add x8, x0, #4
40-
; CHECK: ldaprb w0, [x8]
39+
; CHECK: ldapurb w0, [x0, #4]
4140
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
4241
%r = load atomic i8, ptr %gep acquire, align 1
4342
ret i8 %r
4443
}
4544

4645
define i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) {
4746
; CHECK-LABEL: load_atomic_i8_aligned_acquire_const:
48-
; CHECK: add x8, x0, #4
49-
; CHECK: ldaprb w0, [x8]
47+
; CHECK: ldapurb w0, [x0, #4]
5048
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
5149
%r = load atomic i8, ptr %gep acquire, align 1
5250
ret i8 %r
@@ -104,17 +102,15 @@ define i16 @load_atomic_i16_aligned_monotonic_const(ptr readonly %ptr) {
104102

105103
define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) {
106104
; CHECK-LABEL: load_atomic_i16_aligned_acquire:
107-
; CHECK: add x8, x0, #8
108-
; CHECK: ldaprh w0, [x8]
105+
; CHECK: ldapurh w0, [x0, #8]
109106
%gep = getelementptr inbounds i16, ptr %ptr, i32 4
110107
%r = load atomic i16, ptr %gep acquire, align 2
111108
ret i16 %r
112109
}
113110

114111
define i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) {
115112
; CHECK-LABEL: load_atomic_i16_aligned_acquire_const:
116-
; CHECK: add x8, x0, #8
117-
; CHECK: ldaprh w0, [x8]
113+
; CHECK: ldapurh w0, [x0, #8]
118114
%gep = getelementptr inbounds i16, ptr %ptr, i32 4
119115
%r = load atomic i16, ptr %gep acquire, align 2
120116
ret i16 %r
@@ -172,17 +168,15 @@ define i32 @load_atomic_i32_aligned_monotonic_const(ptr readonly %ptr) {
172168

173169
define i32 @load_atomic_i32_aligned_acquire(ptr %ptr) {
174170
; CHECK-LABEL: load_atomic_i32_aligned_acquire:
175-
; CHECK: add x8, x0, #16
176-
; CHECK: ldapr w0, [x8]
171+
; CHECK: ldapur w0, [x0, #16]
177172
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
178173
%r = load atomic i32, ptr %gep acquire, align 4
179174
ret i32 %r
180175
}
181176

182177
define i32 @load_atomic_i32_aligned_acquire_const(ptr readonly %ptr) {
183178
; CHECK-LABEL: load_atomic_i32_aligned_acquire_const:
184-
; CHECK: add x8, x0, #16
185-
; CHECK: ldapr w0, [x8]
179+
; CHECK: ldapur w0, [x0, #16]
186180
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
187181
%r = load atomic i32, ptr %gep acquire, align 4
188182
ret i32 %r
@@ -240,17 +234,15 @@ define i64 @load_atomic_i64_aligned_monotonic_const(ptr readonly %ptr) {
240234

241235
define i64 @load_atomic_i64_aligned_acquire(ptr %ptr) {
242236
; CHECK-LABEL: load_atomic_i64_aligned_acquire:
243-
; CHECK: add x8, x0, #32
244-
; CHECK: ldapr x0, [x8]
237+
; CHECK: ldapur x0, [x0, #32]
245238
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
246239
%r = load atomic i64, ptr %gep acquire, align 8
247240
ret i64 %r
248241
}
249242

250243
define i64 @load_atomic_i64_aligned_acquire_const(ptr readonly %ptr) {
251244
; CHECK-LABEL: load_atomic_i64_aligned_acquire_const:
252-
; CHECK: add x8, x0, #32
253-
; CHECK: ldapr x0, [x8]
245+
; CHECK: ldapur x0, [x0, #32]
254246
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
255247
%r = load atomic i64, ptr %gep acquire, align 8
256248
ret i64 %r
@@ -376,17 +368,15 @@ define i8 @load_atomic_i8_unaligned_monotonic_const(ptr readonly %ptr) {
376368

377369
define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) {
378370
; CHECK-LABEL: load_atomic_i8_unaligned_acquire:
379-
; CHECK: add x8, x0, #4
380-
; CHECK: ldaprb w0, [x8]
371+
; CHECK: ldapurb w0, [x0, #4]
381372
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
382373
%r = load atomic i8, ptr %gep acquire, align 1
383374
ret i8 %r
384375
}
385376

386377
define i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) {
387378
; CHECK-LABEL: load_atomic_i8_unaligned_acquire_const:
388-
; CHECK: add x8, x0, #4
389-
; CHECK: ldaprb w0, [x8]
379+
; CHECK: ldapurb w0, [x0, #4]
390380
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
391381
%r = load atomic i8, ptr %gep acquire, align 1
392382
ret i8 %r

llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc_immo.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,15 @@ define void @store_atomic_i8_aligned_monotonic(i8 %value, ptr %ptr) {
2020

2121
define void @store_atomic_i8_aligned_release(i8 %value, ptr %ptr) {
2222
; CHECK-LABEL: store_atomic_i8_aligned_release:
23-
; CHECK: add x8, x1, #4
24-
; CHECK: stlrb w0, [x8]
23+
; CHECK: stlurb w0, [x1, #4]
2524
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
2625
store atomic i8 %value, ptr %gep release, align 1
2726
ret void
2827
}
2928

3029
define void @store_atomic_i8_aligned_seq_cst(i8 %value, ptr %ptr) {
3130
; CHECK-LABEL: store_atomic_i8_aligned_seq_cst:
32-
; CHECK: add x8, x1, #4
33-
; CHECK: stlrb w0, [x8]
31+
; CHECK: stlurb w0, [x1, #4]
3432
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
3533
store atomic i8 %value, ptr %gep seq_cst, align 1
3634
ret void
@@ -54,17 +52,15 @@ define void @store_atomic_i16_aligned_monotonic(i16 %value, ptr %ptr) {
5452

5553
define void @store_atomic_i16_aligned_release(i16 %value, ptr %ptr) {
5654
; CHECK-LABEL: store_atomic_i16_aligned_release:
57-
; CHECK: add x8, x1, #8
58-
; CHECK: stlrh w0, [x8]
55+
; CHECK: stlurh w0, [x1, #8]
5956
%gep = getelementptr inbounds i16, ptr %ptr, i32 4
6057
store atomic i16 %value, ptr %gep release, align 2
6158
ret void
6259
}
6360

6461
define void @store_atomic_i16_aligned_seq_cst(i16 %value, ptr %ptr) {
6562
; CHECK-LABEL: store_atomic_i16_aligned_seq_cst:
66-
; CHECK: add x8, x1, #8
67-
; CHECK: stlrh w0, [x8]
63+
; CHECK: stlurh w0, [x1, #8]
6864
%gep = getelementptr inbounds i16, ptr %ptr, i32 4
6965
store atomic i16 %value, ptr %gep seq_cst, align 2
7066
ret void
@@ -88,17 +84,15 @@ define void @store_atomic_i32_aligned_monotonic(i32 %value, ptr %ptr) {
8884

8985
define void @store_atomic_i32_aligned_release(i32 %value, ptr %ptr) {
9086
; CHECK-LABEL: store_atomic_i32_aligned_release:
91-
; CHECK: add x8, x1, #16
92-
; CHECK: stlr w0, [x8]
87+
; CHECK: stlur w0, [x1, #16]
9388
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
9489
store atomic i32 %value, ptr %gep release, align 4
9590
ret void
9691
}
9792

9893
define void @store_atomic_i32_aligned_seq_cst(i32 %value, ptr %ptr) {
9994
; CHECK-LABEL: store_atomic_i32_aligned_seq_cst:
100-
; CHECK: add x8, x1, #16
101-
; CHECK: stlr w0, [x8]
95+
; CHECK: stlur w0, [x1, #16]
10296
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
10397
store atomic i32 %value, ptr %gep seq_cst, align 4
10498
ret void
@@ -122,17 +116,15 @@ define void @store_atomic_i64_aligned_monotonic(i64 %value, ptr %ptr) {
122116

123117
define void @store_atomic_i64_aligned_release(i64 %value, ptr %ptr) {
124118
; CHECK-LABEL: store_atomic_i64_aligned_release:
125-
; CHECK: add x8, x1, #32
126-
; CHECK: stlr x0, [x8]
119+
; CHECK: stlur x0, [x1, #32]
127120
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
128121
store atomic i64 %value, ptr %gep release, align 8
129122
ret void
130123
}
131124

132125
define void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
133126
; CHECK-LABEL: store_atomic_i64_aligned_seq_cst:
134-
; CHECK: add x8, x1, #32
135-
; CHECK: stlr x0, [x8]
127+
; CHECK: stlur x0, [x1, #32]
136128
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
137129
store atomic i64 %value, ptr %gep seq_cst, align 8
138130
ret void
@@ -191,17 +183,15 @@ define void @store_atomic_i8_unaligned_monotonic(i8 %value, ptr %ptr) {
191183

192184
define void @store_atomic_i8_unaligned_release(i8 %value, ptr %ptr) {
193185
; CHECK-LABEL: store_atomic_i8_unaligned_release:
194-
; CHECK: add x8, x1, #4
195-
; CHECK: stlrb w0, [x8]
186+
; CHECK: stlurb w0, [x1, #4]
196187
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
197188
store atomic i8 %value, ptr %gep release, align 1
198189
ret void
199190
}
200191

201192
define void @store_atomic_i8_unaligned_seq_cst(i8 %value, ptr %ptr) {
202193
; CHECK-LABEL: store_atomic_i8_unaligned_seq_cst:
203-
; CHECK: add x8, x1, #4
204-
; CHECK: stlrb w0, [x8]
194+
; CHECK: stlurb w0, [x1, #4]
205195
%gep = getelementptr inbounds i8, ptr %ptr, i32 4
206196
store atomic i8 %value, ptr %gep seq_cst, align 1
207197
ret void

0 commit comments

Comments
 (0)