Skip to content

Commit 5c3c318

Browse files
committed
Check if libcall
1 parent 472967f commit 5c3c318

File tree

5 files changed

+202
-70
lines changed

5 files changed

+202
-70
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27055,13 +27055,33 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2705527055
: AtomicExpansionKind::LLSC;
2705627056
}
2705727057

27058+
// Return true if the atomic operation expansion will lower to use a library
27059+
// call, and is thus ineligible to use an LLSC expansion.
27060+
static bool rmwOpMayLowerToLibcall(const AtomicRMWInst *RMW) {
27061+
if (!RMW->isFloatingPointOperation())
27062+
return false;
27063+
switch (RMW->getType()->getScalarType()->getTypeID()) {
27064+
case Type::FloatTyID:
27065+
case Type::DoubleTyID:
27066+
case Type::HalfTyID:
27067+
case Type::BFloatTyID:
27068+
return false;
27069+
default:
27070+
// fp128 will emit library calls.
27071+
return true;
27072+
}
27073+
27074+
llvm_unreachable("covered type switch");
27075+
}
27076+
2705827077
// The "default" for integer RMW operations is to expand to an LL/SC loop.
2705927078
// However, with the LSE instructions (or outline-atomics mode, which provides
2706027079
// library routines in place of the LSE-instructions), we can directly emit many
2706127080
// operations instead.
2706227081
TargetLowering::AtomicExpansionKind
2706327082
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
27064-
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
27083+
Type *Ty = AI->getType();
27084+
unsigned Size = Ty->getPrimitiveSizeInBits();
2706527085
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");
2706627086

2706727087
bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
@@ -27100,7 +27120,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2710027120
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
2710127121
// we have a single CAS instruction that can replace the loop.
2710227122
if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
27103-
Subtarget->hasLSE())
27123+
Subtarget->hasLSE() || rmwOpMayLowerToLibcall(AI))
2710427124
return AtomicExpansionKind::CmpXChg;
2710527125

2710627126
return AtomicExpansionKind::LLSC;

llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -273,26 +273,54 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #
273273
define fp128 @test_atomicrmw_fadd_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
274274
; NOLSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:
275275
; NOLSE: // %bb.0:
276-
; NOLSE-NEXT: sub sp, sp, #80
277-
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
276+
; NOLSE-NEXT: sub sp, sp, #96
277+
; NOLSE-NEXT: ldr q1, [x0]
278+
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
278279
; NOLSE-NEXT: mov x19, x0
279-
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
280-
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
281-
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
282-
; NOLSE-NEXT: ldaxp x8, x9, [x19]
283-
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
284-
; NOLSE-NEXT: stp x8, x9, [sp, #48]
285-
; NOLSE-NEXT: ldr q0, [sp, #48]
286280
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
281+
; NOLSE-NEXT: b .LBB6_2
282+
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
283+
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
284+
; NOLSE-NEXT: stp x12, x13, [sp, #32]
285+
; NOLSE-NEXT: cmp x13, x10
286+
; NOLSE-NEXT: ldr q1, [sp, #32]
287+
; NOLSE-NEXT: ccmp x12, x11, #0, eq
288+
; NOLSE-NEXT: b.eq .LBB6_6
289+
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
290+
; NOLSE-NEXT: // =>This Loop Header: Depth=1
291+
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
292+
; NOLSE-NEXT: mov v0.16b, v1.16b
293+
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
294+
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
287295
; NOLSE-NEXT: bl __addtf3
288-
; NOLSE-NEXT: str q0, [sp, #32]
289-
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
290-
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
291-
; NOLSE-NEXT: cbnz w10, .LBB6_1
292-
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
293-
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
294-
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
295-
; NOLSE-NEXT: add sp, sp, #80
296+
; NOLSE-NEXT: str q0, [sp, #48]
297+
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
298+
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
299+
; NOLSE-NEXT: str q0, [sp, #64]
300+
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
301+
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
302+
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
303+
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
304+
; NOLSE-NEXT: ldaxp x12, x13, [x19]
305+
; NOLSE-NEXT: cmp x12, x11
306+
; NOLSE-NEXT: cset w14, ne
307+
; NOLSE-NEXT: cmp x13, x10
308+
; NOLSE-NEXT: cinc w14, w14, ne
309+
; NOLSE-NEXT: cbz w14, .LBB6_5
310+
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
311+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
312+
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
313+
; NOLSE-NEXT: cbnz w14, .LBB6_3
314+
; NOLSE-NEXT: b .LBB6_1
315+
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
316+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
317+
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
318+
; NOLSE-NEXT: cbnz w14, .LBB6_3
319+
; NOLSE-NEXT: b .LBB6_1
320+
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
321+
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
322+
; NOLSE-NEXT: mov v0.16b, v1.16b
323+
; NOLSE-NEXT: add sp, sp, #96
296324
; NOLSE-NEXT: ret
297325
;
298326
; LSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:

llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -273,26 +273,54 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
273273
define fp128 @test_atomicrmw_fmax_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
274274
; NOLSE-LABEL: test_atomicrmw_fmax_fp128_seq_cst_align16:
275275
; NOLSE: // %bb.0:
276-
; NOLSE-NEXT: sub sp, sp, #80
277-
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
276+
; NOLSE-NEXT: sub sp, sp, #96
277+
; NOLSE-NEXT: ldr q1, [x0]
278+
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
278279
; NOLSE-NEXT: mov x19, x0
279-
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
280-
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
281-
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
282-
; NOLSE-NEXT: ldaxp x8, x9, [x19]
283-
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
284-
; NOLSE-NEXT: stp x8, x9, [sp, #48]
285-
; NOLSE-NEXT: ldr q0, [sp, #48]
286280
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
281+
; NOLSE-NEXT: b .LBB6_2
282+
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
283+
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
284+
; NOLSE-NEXT: stp x12, x13, [sp, #32]
285+
; NOLSE-NEXT: cmp x13, x10
286+
; NOLSE-NEXT: ldr q1, [sp, #32]
287+
; NOLSE-NEXT: ccmp x12, x11, #0, eq
288+
; NOLSE-NEXT: b.eq .LBB6_6
289+
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
290+
; NOLSE-NEXT: // =>This Loop Header: Depth=1
291+
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
292+
; NOLSE-NEXT: mov v0.16b, v1.16b
293+
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
294+
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
287295
; NOLSE-NEXT: bl fmaxl
288-
; NOLSE-NEXT: str q0, [sp, #32]
289-
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
290-
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
291-
; NOLSE-NEXT: cbnz w10, .LBB6_1
292-
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
293-
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
294-
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
295-
; NOLSE-NEXT: add sp, sp, #80
296+
; NOLSE-NEXT: str q0, [sp, #48]
297+
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
298+
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
299+
; NOLSE-NEXT: str q0, [sp, #64]
300+
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
301+
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
302+
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
303+
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
304+
; NOLSE-NEXT: ldaxp x12, x13, [x19]
305+
; NOLSE-NEXT: cmp x12, x11
306+
; NOLSE-NEXT: cset w14, ne
307+
; NOLSE-NEXT: cmp x13, x10
308+
; NOLSE-NEXT: cinc w14, w14, ne
309+
; NOLSE-NEXT: cbz w14, .LBB6_5
310+
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
311+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
312+
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
313+
; NOLSE-NEXT: cbnz w14, .LBB6_3
314+
; NOLSE-NEXT: b .LBB6_1
315+
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
316+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
317+
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
318+
; NOLSE-NEXT: cbnz w14, .LBB6_3
319+
; NOLSE-NEXT: b .LBB6_1
320+
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
321+
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
322+
; NOLSE-NEXT: mov v0.16b, v1.16b
323+
; NOLSE-NEXT: add sp, sp, #96
296324
; NOLSE-NEXT: ret
297325
;
298326
; LSE-LABEL: test_atomicrmw_fmax_fp128_seq_cst_align16:

llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -273,26 +273,54 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
273273
define fp128 @test_atomicrmw_fmin_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
274274
; NOLSE-LABEL: test_atomicrmw_fmin_fp128_seq_cst_align16:
275275
; NOLSE: // %bb.0:
276-
; NOLSE-NEXT: sub sp, sp, #80
277-
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
276+
; NOLSE-NEXT: sub sp, sp, #96
277+
; NOLSE-NEXT: ldr q1, [x0]
278+
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
278279
; NOLSE-NEXT: mov x19, x0
279-
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
280-
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
281-
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
282-
; NOLSE-NEXT: ldaxp x8, x9, [x19]
283-
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
284-
; NOLSE-NEXT: stp x8, x9, [sp, #48]
285-
; NOLSE-NEXT: ldr q0, [sp, #48]
286280
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
281+
; NOLSE-NEXT: b .LBB6_2
282+
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
283+
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
284+
; NOLSE-NEXT: stp x12, x13, [sp, #32]
285+
; NOLSE-NEXT: cmp x13, x10
286+
; NOLSE-NEXT: ldr q1, [sp, #32]
287+
; NOLSE-NEXT: ccmp x12, x11, #0, eq
288+
; NOLSE-NEXT: b.eq .LBB6_6
289+
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
290+
; NOLSE-NEXT: // =>This Loop Header: Depth=1
291+
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
292+
; NOLSE-NEXT: mov v0.16b, v1.16b
293+
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
294+
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
287295
; NOLSE-NEXT: bl fminl
288-
; NOLSE-NEXT: str q0, [sp, #32]
289-
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
290-
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
291-
; NOLSE-NEXT: cbnz w10, .LBB6_1
292-
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
293-
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
294-
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
295-
; NOLSE-NEXT: add sp, sp, #80
296+
; NOLSE-NEXT: str q0, [sp, #48]
297+
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
298+
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
299+
; NOLSE-NEXT: str q0, [sp, #64]
300+
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
301+
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
302+
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
303+
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
304+
; NOLSE-NEXT: ldaxp x12, x13, [x19]
305+
; NOLSE-NEXT: cmp x12, x11
306+
; NOLSE-NEXT: cset w14, ne
307+
; NOLSE-NEXT: cmp x13, x10
308+
; NOLSE-NEXT: cinc w14, w14, ne
309+
; NOLSE-NEXT: cbz w14, .LBB6_5
310+
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
311+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
312+
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
313+
; NOLSE-NEXT: cbnz w14, .LBB6_3
314+
; NOLSE-NEXT: b .LBB6_1
315+
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
316+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
317+
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
318+
; NOLSE-NEXT: cbnz w14, .LBB6_3
319+
; NOLSE-NEXT: b .LBB6_1
320+
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
321+
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
322+
; NOLSE-NEXT: mov v0.16b, v1.16b
323+
; NOLSE-NEXT: add sp, sp, #96
296324
; NOLSE-NEXT: ret
297325
;
298326
; LSE-LABEL: test_atomicrmw_fmin_fp128_seq_cst_align16:

llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -273,26 +273,54 @@ define double @test_atomicrmw_fsub_f32_seq_cst_align8(ptr %ptr, double %value) #
273273
define fp128 @test_atomicrmw_fsub_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
274274
; NOLSE-LABEL: test_atomicrmw_fsub_fp128_seq_cst_align16:
275275
; NOLSE: // %bb.0:
276-
; NOLSE-NEXT: sub sp, sp, #80
277-
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
276+
; NOLSE-NEXT: sub sp, sp, #96
277+
; NOLSE-NEXT: ldr q1, [x0]
278+
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
278279
; NOLSE-NEXT: mov x19, x0
279-
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
280-
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
281-
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
282-
; NOLSE-NEXT: ldaxp x8, x9, [x19]
283-
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
284-
; NOLSE-NEXT: stp x8, x9, [sp, #48]
285-
; NOLSE-NEXT: ldr q0, [sp, #48]
286280
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
281+
; NOLSE-NEXT: b .LBB6_2
282+
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
283+
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
284+
; NOLSE-NEXT: stp x12, x13, [sp, #32]
285+
; NOLSE-NEXT: cmp x13, x10
286+
; NOLSE-NEXT: ldr q1, [sp, #32]
287+
; NOLSE-NEXT: ccmp x12, x11, #0, eq
288+
; NOLSE-NEXT: b.eq .LBB6_6
289+
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
290+
; NOLSE-NEXT: // =>This Loop Header: Depth=1
291+
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
292+
; NOLSE-NEXT: mov v0.16b, v1.16b
293+
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
294+
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
287295
; NOLSE-NEXT: bl __subtf3
288-
; NOLSE-NEXT: str q0, [sp, #32]
289-
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
290-
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
291-
; NOLSE-NEXT: cbnz w10, .LBB6_1
292-
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
293-
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
294-
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
295-
; NOLSE-NEXT: add sp, sp, #80
296+
; NOLSE-NEXT: str q0, [sp, #48]
297+
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
298+
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
299+
; NOLSE-NEXT: str q0, [sp, #64]
300+
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
301+
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
302+
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
303+
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
304+
; NOLSE-NEXT: ldaxp x12, x13, [x19]
305+
; NOLSE-NEXT: cmp x12, x11
306+
; NOLSE-NEXT: cset w14, ne
307+
; NOLSE-NEXT: cmp x13, x10
308+
; NOLSE-NEXT: cinc w14, w14, ne
309+
; NOLSE-NEXT: cbz w14, .LBB6_5
310+
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
311+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
312+
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
313+
; NOLSE-NEXT: cbnz w14, .LBB6_3
314+
; NOLSE-NEXT: b .LBB6_1
315+
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
316+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
317+
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
318+
; NOLSE-NEXT: cbnz w14, .LBB6_3
319+
; NOLSE-NEXT: b .LBB6_1
320+
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
321+
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
322+
; NOLSE-NEXT: mov v0.16b, v1.16b
323+
; NOLSE-NEXT: add sp, sp, #96
296324
; NOLSE-NEXT: ret
297325
;
298326
; LSE-LABEL: test_atomicrmw_fsub_fp128_seq_cst_align16:

0 commit comments

Comments
 (0)