Skip to content

Commit 1610311

Browse files
committed
[AArch64] Fixes for BigEndian 128bit volatile, atomic and non-temporal loads/stores
This fixes up the generation of 128bit atomic, volatile and non-temporal loads/stores, under the assumption that they should usually be the same as standard versions. https://godbolt.org/z/xxc89eMKE Fixes #64580 Closes #67413
1 parent 6e3d2a4 commit 1610311

9 files changed

+79
-145
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5705,11 +5705,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
57055705
// legalization will break up 256 bit inputs.
57065706
ElementCount EC = MemVT.getVectorElementCount();
57075707
if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
5708-
EC.isKnownEven() &&
5709-
((MemVT.getScalarSizeInBits() == 8u ||
5710-
MemVT.getScalarSizeInBits() == 16u ||
5711-
MemVT.getScalarSizeInBits() == 32u ||
5712-
MemVT.getScalarSizeInBits() == 64u))) {
5708+
EC.isKnownEven() && DAG.getDataLayout().isLittleEndian() &&
5709+
(MemVT.getScalarSizeInBits() == 8u ||
5710+
MemVT.getScalarSizeInBits() == 16u ||
5711+
MemVT.getScalarSizeInBits() == 32u ||
5712+
MemVT.getScalarSizeInBits() == 64u)) {
57135713
SDValue Lo =
57145714
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
57155715
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
@@ -5769,6 +5769,8 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
57695769
SDLoc DL(Op);
57705770
auto StoreValue = DAG.SplitScalar(Value, DL, MVT::i64, MVT::i64);
57715771
unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
5772+
if (DAG.getDataLayout().isBigEndian())
5773+
std::swap(StoreValue.first, StoreValue.second);
57725774
SDValue Result = DAG.getMemIntrinsicNode(
57735775
Opcode, DL, DAG.getVTList(MVT::Other),
57745776
{StoreNode->getChain(), StoreValue.first, StoreValue.second,
@@ -24162,8 +24164,11 @@ void AArch64TargetLowering::ReplaceNodeResults(
2416224164
{LoadNode->getChain(), LoadNode->getBasePtr()},
2416324165
LoadNode->getMemoryVT(), LoadNode->getMemOperand());
2416424166

24165-
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
24166-
Result.getValue(0), Result.getValue(1));
24167+
unsigned FirstRes = DAG.getDataLayout().isBigEndian() ? 1 : 0;
24168+
24169+
SDValue Pair =
24170+
DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
24171+
Result.getValue(FirstRes), Result.getValue(1 - FirstRes));
2416724172
Results.append({Pair, Result.getValue(2) /* Chain */});
2416824173
}
2416924174
return;

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -229,59 +229,59 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
229229

230230
define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
231231
; CHECK-LABEL: load_atomic_i128_aligned_unordered:
232-
; CHECK: ldp x1, x0, [x0]
232+
; CHECK: ldp x0, x1, [x0]
233233
%r = load atomic i128, ptr %ptr unordered, align 16
234234
ret i128 %r
235235
}
236236

237237
define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
238238
; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
239-
; CHECK: ldp x1, x0, [x0]
239+
; CHECK: ldp x0, x1, [x0]
240240
%r = load atomic i128, ptr %ptr unordered, align 16
241241
ret i128 %r
242242
}
243243

244244
define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
245245
; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
246-
; CHECK: ldp x1, x0, [x0]
246+
; CHECK: ldp x0, x1, [x0]
247247
%r = load atomic i128, ptr %ptr monotonic, align 16
248248
ret i128 %r
249249
}
250250

251251
define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
252252
; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
253-
; CHECK: ldp x1, x0, [x0]
253+
; CHECK: ldp x0, x1, [x0]
254254
%r = load atomic i128, ptr %ptr monotonic, align 16
255255
ret i128 %r
256256
}
257257

258258
define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
259259
; CHECK-LABEL: load_atomic_i128_aligned_acquire:
260-
; CHECK: ldp x1, x0, [x0]
260+
; CHECK: ldp x0, x1, [x0]
261261
; CHECK: dmb ishld
262262
%r = load atomic i128, ptr %ptr acquire, align 16
263263
ret i128 %r
264264
}
265265

266266
define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
267267
; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
268-
; CHECK: ldp x1, x0, [x0]
268+
; CHECK: ldp x0, x1, [x0]
269269
; CHECK: dmb ishld
270270
%r = load atomic i128, ptr %ptr acquire, align 16
271271
ret i128 %r
272272
}
273273

274274
define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
275275
; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
276-
; CHECK: ldp x1, x0, [x0]
276+
; CHECK: ldp x0, x1, [x0]
277277
; CHECK: dmb ish
278278
%r = load atomic i128, ptr %ptr seq_cst, align 16
279279
ret i128 %r
280280
}
281281

282282
define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
283283
; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
284-
; CHECK: ldp x1, x0, [x0]
284+
; CHECK: ldp x0, x1, [x0]
285285
; CHECK: dmb ish
286286
%r = load atomic i128, ptr %ptr seq_cst, align 16
287287
ret i128 %r

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-lse2_lse128.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -229,59 +229,59 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
229229

230230
define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
231231
; CHECK-LABEL: load_atomic_i128_aligned_unordered:
232-
; CHECK: ldp x1, x0, [x0]
232+
; CHECK: ldp x0, x1, [x0]
233233
%r = load atomic i128, ptr %ptr unordered, align 16
234234
ret i128 %r
235235
}
236236

237237
define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
238238
; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
239-
; CHECK: ldp x1, x0, [x0]
239+
; CHECK: ldp x0, x1, [x0]
240240
%r = load atomic i128, ptr %ptr unordered, align 16
241241
ret i128 %r
242242
}
243243

244244
define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
245245
; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
246-
; CHECK: ldp x1, x0, [x0]
246+
; CHECK: ldp x0, x1, [x0]
247247
%r = load atomic i128, ptr %ptr monotonic, align 16
248248
ret i128 %r
249249
}
250250

251251
define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
252252
; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
253-
; CHECK: ldp x1, x0, [x0]
253+
; CHECK: ldp x0, x1, [x0]
254254
%r = load atomic i128, ptr %ptr monotonic, align 16
255255
ret i128 %r
256256
}
257257

258258
define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
259259
; CHECK-LABEL: load_atomic_i128_aligned_acquire:
260-
; CHECK: ldp x1, x0, [x0]
260+
; CHECK: ldp x0, x1, [x0]
261261
; CHECK: dmb ishld
262262
%r = load atomic i128, ptr %ptr acquire, align 16
263263
ret i128 %r
264264
}
265265

266266
define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
267267
; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
268-
; CHECK: ldp x1, x0, [x0]
268+
; CHECK: ldp x0, x1, [x0]
269269
; CHECK: dmb ishld
270270
%r = load atomic i128, ptr %ptr acquire, align 16
271271
ret i128 %r
272272
}
273273

274274
define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
275275
; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
276-
; CHECK: ldp x1, x0, [x0]
276+
; CHECK: ldp x0, x1, [x0]
277277
; CHECK: dmb ish
278278
%r = load atomic i128, ptr %ptr seq_cst, align 16
279279
ret i128 %r
280280
}
281281

282282
define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
283283
; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
284-
; CHECK: ldp x1, x0, [x0]
284+
; CHECK: ldp x0, x1, [x0]
285285
; CHECK: dmb ish
286286
%r = load atomic i128, ptr %ptr seq_cst, align 16
287287
ret i128 %r

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -229,57 +229,57 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
229229

230230
define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
231231
; CHECK-LABEL: load_atomic_i128_aligned_unordered:
232-
; CHECK: ldp x1, x0, [x0]
232+
; CHECK: ldp x0, x1, [x0]
233233
%r = load atomic i128, ptr %ptr unordered, align 16
234234
ret i128 %r
235235
}
236236

237237
define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
238238
; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
239-
; CHECK: ldp x1, x0, [x0]
239+
; CHECK: ldp x0, x1, [x0]
240240
%r = load atomic i128, ptr %ptr unordered, align 16
241241
ret i128 %r
242242
}
243243

244244
define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
245245
; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
246-
; CHECK: ldp x1, x0, [x0]
246+
; CHECK: ldp x0, x1, [x0]
247247
%r = load atomic i128, ptr %ptr monotonic, align 16
248248
ret i128 %r
249249
}
250250

251251
define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
252252
; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
253-
; CHECK: ldp x1, x0, [x0]
253+
; CHECK: ldp x0, x1, [x0]
254254
%r = load atomic i128, ptr %ptr monotonic, align 16
255255
ret i128 %r
256256
}
257257

258258
define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
259259
; CHECK-LABEL: load_atomic_i128_aligned_acquire:
260-
; CHECK: ldiapp x1, x0, [x0]
260+
; CHECK: ldiapp x0, x1, [x0]
261261
%r = load atomic i128, ptr %ptr acquire, align 16
262262
ret i128 %r
263263
}
264264

265265
define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
266266
; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
267-
; CHECK: ldiapp x1, x0, [x0]
267+
; CHECK: ldiapp x0, x1, [x0]
268268
%r = load atomic i128, ptr %ptr acquire, align 16
269269
ret i128 %r
270270
}
271271

272272
define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
273273
; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
274-
; CHECK: ldp x1, x0, [x0]
274+
; CHECK: ldp x0, x1, [x0]
275275
; CHECK: dmb ish
276276
%r = load atomic i128, ptr %ptr seq_cst, align 16
277277
ret i128 %r
278278
}
279279

280280
define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
281281
; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
282-
; CHECK: ldp x1, x0, [x0]
282+
; CHECK: ldp x0, x1, [x0]
283283
; CHECK: dmb ish
284284
%r = load atomic i128, ptr %ptr seq_cst, align 16
285285
ret i128 %r

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,30 +117,30 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
117117

118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; CHECK-LABEL: store_atomic_i128_aligned_unordered:
120-
; CHECK: stp x1, x0, [x2]
120+
; CHECK: stp x0, x1, [x2]
121121
store atomic i128 %value, ptr %ptr unordered, align 16
122122
ret void
123123
}
124124

125125
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
126126
; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
127-
; CHECK: stp x1, x0, [x2]
127+
; CHECK: stp x0, x1, [x2]
128128
store atomic i128 %value, ptr %ptr monotonic, align 16
129129
ret void
130130
}
131131

132132
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
133133
; CHECK-LABEL: store_atomic_i128_aligned_release:
134134
; CHECK: dmb ish
135-
; CHECK: stp x1, x0, [x2]
135+
; CHECK: stp x0, x1, [x2]
136136
store atomic i128 %value, ptr %ptr release, align 16
137137
ret void
138138
}
139139

140140
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
141141
; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
142142
; CHECK: dmb ish
143-
; CHECK: stp x1, x0, [x2]
143+
; CHECK: stp x0, x1, [x2]
144144
; CHECK: dmb ish
145145
store atomic i128 %value, ptr %ptr seq_cst, align 16
146146
ret void

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse2_lse128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,14 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
117117

118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; CHECK-LABEL: store_atomic_i128_aligned_unordered:
120-
; CHECK: stp x1, x0, [x2]
120+
; CHECK: stp x0, x1, [x2]
121121
store atomic i128 %value, ptr %ptr unordered, align 16
122122
ret void
123123
}
124124

125125
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
126126
; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
127-
; CHECK: stp x1, x0, [x2]
127+
; CHECK: stp x0, x1, [x2]
128128
store atomic i128 %value, ptr %ptr monotonic, align 16
129129
ret void
130130
}

llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,29 +117,29 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
117117

118118
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
119119
; CHECK-LABEL: store_atomic_i128_aligned_unordered:
120-
; CHECK: stp x1, x0, [x2]
120+
; CHECK: stp x0, x1, [x2]
121121
store atomic i128 %value, ptr %ptr unordered, align 16
122122
ret void
123123
}
124124

125125
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
126126
; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
127-
; CHECK: stp x1, x0, [x2]
127+
; CHECK: stp x0, x1, [x2]
128128
store atomic i128 %value, ptr %ptr monotonic, align 16
129129
ret void
130130
}
131131

132132
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
133133
; CHECK-LABEL: store_atomic_i128_aligned_release:
134-
; CHECK: stilp x1, x0, [x2]
134+
; CHECK: stilp x0, x1, [x2]
135135
store atomic i128 %value, ptr %ptr release, align 16
136136
ret void
137137
}
138138

139139
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
140140
; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
141141
; CHECK: dmb ish
142-
; CHECK: stp x1, x0, [x2]
142+
; CHECK: stp x0, x1, [x2]
143143
; CHECK: dmb ish
144144
store atomic i128 %value, ptr %ptr seq_cst, align 16
145145
ret void

llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,10 @@ entry:
129129
}
130130

131131
define i128 @load_vol(i32, i32, ptr %p) {
132-
; CHECK-LE-LABEL: load_vol:
133-
; CHECK-LE: // %bb.0: // %entry
134-
; CHECK-LE-NEXT: ldp x0, x1, [x2]
135-
; CHECK-LE-NEXT: ret
136-
;
137-
; CHECK-BE-LABEL: load_vol:
138-
; CHECK-BE: // %bb.0: // %entry
139-
; CHECK-BE-NEXT: ldp x1, x0, [x2]
140-
; CHECK-BE-NEXT: ret
132+
; CHECK-LABEL: load_vol:
133+
; CHECK: // %bb.0: // %entry
134+
; CHECK-NEXT: ldp x0, x1, [x2]
135+
; CHECK-NEXT: ret
141136
entry:
142137
%l = load volatile i128, ptr %p, align 16
143138
ret i128 %l
@@ -154,16 +149,14 @@ entry:
154149
}
155150

156151
define void @loadstore_vol(i128 %a, ptr %p) {
157-
; CHECK-LE-LABEL: loadstore_vol:
158-
; CHECK-LE: // %bb.0: // %entry
159-
; CHECK-LE-NEXT: stp x0, x1, [x2]
160-
; CHECK-LE-NEXT: ret
161-
;
162-
; CHECK-BE-LABEL: loadstore_vol:
163-
; CHECK-BE: // %bb.0: // %entry
164-
; CHECK-BE-NEXT: stp x1, x0, [x2]
165-
; CHECK-BE-NEXT: ret
152+
; CHECK-LABEL: loadstore_vol:
153+
; CHECK: // %bb.0: // %entry
154+
; CHECK-NEXT: stp x0, x1, [x2]
155+
; CHECK-NEXT: ret
166156
entry:
167157
store volatile i128 %a, ptr %p, align 16
168158
ret void
169159
}
160+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
161+
; CHECK-BE: {{.*}}
162+
; CHECK-LE: {{.*}}

0 commit comments

Comments
 (0)