Skip to content

Commit 44282df

Browse files
committed
Check if libcall
1 parent a781c2c commit 44282df

File tree

5 files changed

+202
-70
lines changed

5 files changed

+202
-70
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27096,13 +27096,33 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2709627096
: AtomicExpansionKind::LLSC;
2709727097
}
2709827098

27099+
// Return true if the atomic operation expansion will lower to use a library
27100+
// call, and is thus ineligible to use an LLSC expansion.
27101+
static bool rmwOpMayLowerToLibcall(const AtomicRMWInst *RMW) {
27102+
if (!RMW->isFloatingPointOperation())
27103+
return false;
27104+
switch (RMW->getType()->getScalarType()->getTypeID()) {
27105+
case Type::FloatTyID:
27106+
case Type::DoubleTyID:
27107+
case Type::HalfTyID:
27108+
case Type::BFloatTyID:
27109+
return false;
27110+
default:
27111+
// fp128 will emit library calls.
27112+
return true;
27113+
}
27114+
27115+
llvm_unreachable("covered type switch");
27116+
}
27117+
2709927118
// The "default" for integer RMW operations is to expand to an LL/SC loop.
2710027119
// However, with the LSE instructions (or outline-atomics mode, which provides
2710127120
// library routines in place of the LSE-instructions), we can directly emit many
2710227121
// operations instead.
2710327122
TargetLowering::AtomicExpansionKind
2710427123
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
27105-
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
27124+
Type *Ty = AI->getType();
27125+
unsigned Size = Ty->getPrimitiveSizeInBits();
2710627126
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");
2710727127

2710827128
bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
@@ -27141,7 +27161,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2714127161
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
2714227162
// we have a single CAS instruction that can replace the loop.
2714327163
if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
27144-
Subtarget->hasLSE())
27164+
Subtarget->hasLSE() || rmwOpMayLowerToLibcall(AI))
2714527165
return AtomicExpansionKind::CmpXChg;
2714627166

2714727167
return AtomicExpansionKind::LLSC;

llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -508,26 +508,54 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #
508508
define fp128 @test_atomicrmw_fadd_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
509509
; NOLSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:
510510
; NOLSE: // %bb.0:
511-
; NOLSE-NEXT: sub sp, sp, #80
512-
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
511+
; NOLSE-NEXT: sub sp, sp, #96
512+
; NOLSE-NEXT: ldr q1, [x0]
513+
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
513514
; NOLSE-NEXT: mov x19, x0
514-
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
515-
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
516-
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
517-
; NOLSE-NEXT: ldaxp x8, x9, [x19]
518-
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
519-
; NOLSE-NEXT: stp x8, x9, [sp, #48]
520-
; NOLSE-NEXT: ldr q0, [sp, #48]
521515
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
516+
; NOLSE-NEXT: b .LBB6_2
517+
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
518+
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
519+
; NOLSE-NEXT: stp x12, x13, [sp, #32]
520+
; NOLSE-NEXT: cmp x13, x10
521+
; NOLSE-NEXT: ldr q1, [sp, #32]
522+
; NOLSE-NEXT: ccmp x12, x11, #0, eq
523+
; NOLSE-NEXT: b.eq .LBB6_6
524+
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
525+
; NOLSE-NEXT: // =>This Loop Header: Depth=1
526+
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
527+
; NOLSE-NEXT: mov v0.16b, v1.16b
528+
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
529+
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
522530
; NOLSE-NEXT: bl __addtf3
523-
; NOLSE-NEXT: str q0, [sp, #32]
524-
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
525-
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
526-
; NOLSE-NEXT: cbnz w10, .LBB6_1
527-
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
528-
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
529-
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
530-
; NOLSE-NEXT: add sp, sp, #80
531+
; NOLSE-NEXT: str q0, [sp, #48]
532+
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
533+
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
534+
; NOLSE-NEXT: str q0, [sp, #64]
535+
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
536+
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
537+
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
538+
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
539+
; NOLSE-NEXT: ldaxp x12, x13, [x19]
540+
; NOLSE-NEXT: cmp x12, x11
541+
; NOLSE-NEXT: cset w14, ne
542+
; NOLSE-NEXT: cmp x13, x10
543+
; NOLSE-NEXT: cinc w14, w14, ne
544+
; NOLSE-NEXT: cbz w14, .LBB6_5
545+
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
546+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
547+
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
548+
; NOLSE-NEXT: cbnz w14, .LBB6_3
549+
; NOLSE-NEXT: b .LBB6_1
550+
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
551+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
552+
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
553+
; NOLSE-NEXT: cbnz w14, .LBB6_3
554+
; NOLSE-NEXT: b .LBB6_1
555+
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
556+
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
557+
; NOLSE-NEXT: mov v0.16b, v1.16b
558+
; NOLSE-NEXT: add sp, sp, #96
531559
; NOLSE-NEXT: ret
532560
;
533561
; LSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:

llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -508,26 +508,54 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
508508
define fp128 @test_atomicrmw_fmax_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
509509
; NOLSE-LABEL: test_atomicrmw_fmax_fp128_seq_cst_align16:
510510
; NOLSE: // %bb.0:
511-
; NOLSE-NEXT: sub sp, sp, #80
512-
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
511+
; NOLSE-NEXT: sub sp, sp, #96
512+
; NOLSE-NEXT: ldr q1, [x0]
513+
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
513514
; NOLSE-NEXT: mov x19, x0
514-
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
515-
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
516-
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
517-
; NOLSE-NEXT: ldaxp x8, x9, [x19]
518-
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
519-
; NOLSE-NEXT: stp x8, x9, [sp, #48]
520-
; NOLSE-NEXT: ldr q0, [sp, #48]
521515
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
516+
; NOLSE-NEXT: b .LBB6_2
517+
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
518+
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
519+
; NOLSE-NEXT: stp x12, x13, [sp, #32]
520+
; NOLSE-NEXT: cmp x13, x10
521+
; NOLSE-NEXT: ldr q1, [sp, #32]
522+
; NOLSE-NEXT: ccmp x12, x11, #0, eq
523+
; NOLSE-NEXT: b.eq .LBB6_6
524+
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
525+
; NOLSE-NEXT: // =>This Loop Header: Depth=1
526+
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
527+
; NOLSE-NEXT: mov v0.16b, v1.16b
528+
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
529+
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
522530
; NOLSE-NEXT: bl fmaxl
523-
; NOLSE-NEXT: str q0, [sp, #32]
524-
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
525-
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
526-
; NOLSE-NEXT: cbnz w10, .LBB6_1
527-
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
528-
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
529-
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
530-
; NOLSE-NEXT: add sp, sp, #80
531+
; NOLSE-NEXT: str q0, [sp, #48]
532+
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
533+
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
534+
; NOLSE-NEXT: str q0, [sp, #64]
535+
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
536+
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
537+
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
538+
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
539+
; NOLSE-NEXT: ldaxp x12, x13, [x19]
540+
; NOLSE-NEXT: cmp x12, x11
541+
; NOLSE-NEXT: cset w14, ne
542+
; NOLSE-NEXT: cmp x13, x10
543+
; NOLSE-NEXT: cinc w14, w14, ne
544+
; NOLSE-NEXT: cbz w14, .LBB6_5
545+
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
546+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
547+
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
548+
; NOLSE-NEXT: cbnz w14, .LBB6_3
549+
; NOLSE-NEXT: b .LBB6_1
550+
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
551+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
552+
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
553+
; NOLSE-NEXT: cbnz w14, .LBB6_3
554+
; NOLSE-NEXT: b .LBB6_1
555+
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
556+
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
557+
; NOLSE-NEXT: mov v0.16b, v1.16b
558+
; NOLSE-NEXT: add sp, sp, #96
531559
; NOLSE-NEXT: ret
532560
;
533561
; LSE-LABEL: test_atomicrmw_fmax_fp128_seq_cst_align16:

llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -508,26 +508,54 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
508508
define fp128 @test_atomicrmw_fmin_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
509509
; NOLSE-LABEL: test_atomicrmw_fmin_fp128_seq_cst_align16:
510510
; NOLSE: // %bb.0:
511-
; NOLSE-NEXT: sub sp, sp, #80
512-
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
511+
; NOLSE-NEXT: sub sp, sp, #96
512+
; NOLSE-NEXT: ldr q1, [x0]
513+
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
513514
; NOLSE-NEXT: mov x19, x0
514-
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
515-
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
516-
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
517-
; NOLSE-NEXT: ldaxp x8, x9, [x19]
518-
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
519-
; NOLSE-NEXT: stp x8, x9, [sp, #48]
520-
; NOLSE-NEXT: ldr q0, [sp, #48]
521515
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
516+
; NOLSE-NEXT: b .LBB6_2
517+
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
518+
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
519+
; NOLSE-NEXT: stp x12, x13, [sp, #32]
520+
; NOLSE-NEXT: cmp x13, x10
521+
; NOLSE-NEXT: ldr q1, [sp, #32]
522+
; NOLSE-NEXT: ccmp x12, x11, #0, eq
523+
; NOLSE-NEXT: b.eq .LBB6_6
524+
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
525+
; NOLSE-NEXT: // =>This Loop Header: Depth=1
526+
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
527+
; NOLSE-NEXT: mov v0.16b, v1.16b
528+
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
529+
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
522530
; NOLSE-NEXT: bl fminl
523-
; NOLSE-NEXT: str q0, [sp, #32]
524-
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
525-
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
526-
; NOLSE-NEXT: cbnz w10, .LBB6_1
527-
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
528-
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
529-
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
530-
; NOLSE-NEXT: add sp, sp, #80
531+
; NOLSE-NEXT: str q0, [sp, #48]
532+
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
533+
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
534+
; NOLSE-NEXT: str q0, [sp, #64]
535+
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
536+
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
537+
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
538+
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
539+
; NOLSE-NEXT: ldaxp x12, x13, [x19]
540+
; NOLSE-NEXT: cmp x12, x11
541+
; NOLSE-NEXT: cset w14, ne
542+
; NOLSE-NEXT: cmp x13, x10
543+
; NOLSE-NEXT: cinc w14, w14, ne
544+
; NOLSE-NEXT: cbz w14, .LBB6_5
545+
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
546+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
547+
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
548+
; NOLSE-NEXT: cbnz w14, .LBB6_3
549+
; NOLSE-NEXT: b .LBB6_1
550+
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
551+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
552+
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
553+
; NOLSE-NEXT: cbnz w14, .LBB6_3
554+
; NOLSE-NEXT: b .LBB6_1
555+
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
556+
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
557+
; NOLSE-NEXT: mov v0.16b, v1.16b
558+
; NOLSE-NEXT: add sp, sp, #96
531559
; NOLSE-NEXT: ret
532560
;
533561
; LSE-LABEL: test_atomicrmw_fmin_fp128_seq_cst_align16:

llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -508,26 +508,54 @@ define double @test_atomicrmw_fsub_f32_seq_cst_align8(ptr %ptr, double %value) #
508508
define fp128 @test_atomicrmw_fsub_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
509509
; NOLSE-LABEL: test_atomicrmw_fsub_fp128_seq_cst_align16:
510510
; NOLSE: // %bb.0:
511-
; NOLSE-NEXT: sub sp, sp, #80
512-
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
511+
; NOLSE-NEXT: sub sp, sp, #96
512+
; NOLSE-NEXT: ldr q1, [x0]
513+
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
513514
; NOLSE-NEXT: mov x19, x0
514-
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
515-
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
516-
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
517-
; NOLSE-NEXT: ldaxp x8, x9, [x19]
518-
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
519-
; NOLSE-NEXT: stp x8, x9, [sp, #48]
520-
; NOLSE-NEXT: ldr q0, [sp, #48]
521515
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
516+
; NOLSE-NEXT: b .LBB6_2
517+
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
518+
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
519+
; NOLSE-NEXT: stp x12, x13, [sp, #32]
520+
; NOLSE-NEXT: cmp x13, x10
521+
; NOLSE-NEXT: ldr q1, [sp, #32]
522+
; NOLSE-NEXT: ccmp x12, x11, #0, eq
523+
; NOLSE-NEXT: b.eq .LBB6_6
524+
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
525+
; NOLSE-NEXT: // =>This Loop Header: Depth=1
526+
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
527+
; NOLSE-NEXT: mov v0.16b, v1.16b
528+
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
529+
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
522530
; NOLSE-NEXT: bl __subtf3
523-
; NOLSE-NEXT: str q0, [sp, #32]
524-
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
525-
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
526-
; NOLSE-NEXT: cbnz w10, .LBB6_1
527-
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
528-
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
529-
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
530-
; NOLSE-NEXT: add sp, sp, #80
531+
; NOLSE-NEXT: str q0, [sp, #48]
532+
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
533+
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
534+
; NOLSE-NEXT: str q0, [sp, #64]
535+
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
536+
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
537+
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
538+
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
539+
; NOLSE-NEXT: ldaxp x12, x13, [x19]
540+
; NOLSE-NEXT: cmp x12, x11
541+
; NOLSE-NEXT: cset w14, ne
542+
; NOLSE-NEXT: cmp x13, x10
543+
; NOLSE-NEXT: cinc w14, w14, ne
544+
; NOLSE-NEXT: cbz w14, .LBB6_5
545+
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
546+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
547+
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
548+
; NOLSE-NEXT: cbnz w14, .LBB6_3
549+
; NOLSE-NEXT: b .LBB6_1
550+
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
551+
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
552+
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
553+
; NOLSE-NEXT: cbnz w14, .LBB6_3
554+
; NOLSE-NEXT: b .LBB6_1
555+
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
556+
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
557+
; NOLSE-NEXT: mov v0.16b, v1.16b
558+
; NOLSE-NEXT: add sp, sp, #96
531559
; NOLSE-NEXT: ret
532560
;
533561
; LSE-LABEL: test_atomicrmw_fsub_fp128_seq_cst_align16:

0 commit comments

Comments
 (0)