Skip to content

[ARM] Fix failure to register-allocate CMP_SWAP_64 pseudo-inst #106721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1942,11 +1942,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
Register TempReg = MI.getOperand(1).getReg();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
Register AddrReg = MI.getOperand(2).getReg();
assert(!MI.getOperand(1).isUndef() && "cannot handle undef");
Register AddrAndTempReg = MI.getOperand(1).getReg();
Register AddrReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_0);
Register TempReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_1);
assert(MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
"tied operands have different registers");
Register DesiredReg = MI.getOperand(3).getReg();
MachineOperand New = MI.getOperand(4);
New.setIsKill(false);
Expand Down
37 changes: 23 additions & 14 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10479,33 +10479,42 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
Results.push_back(Cycles32.getValue(1));
}

static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
bool isBigEndian = DAG.getDataLayout().isBigEndian();
if (isBigEndian)
std::swap (VLo, VHi);
static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0,
SDValue V1) {
SDLoc dl(V0.getNode());
SDValue RegClass =
DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
}

static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
bool isBigEndian = DAG.getDataLayout().isBigEndian();
if (isBigEndian)
std::swap(VLo, VHi);
return createGPRPairNode2xi32(DAG, VLo, VHi);
}

static void ReplaceCMP_SWAP_64Results(SDNode *N,
SmallVectorImpl<SDValue> & Results,
SelectionDAG &DAG) {
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
assert(N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal");
SDValue Ops[] = {N->getOperand(1),
createGPRPairNode(DAG, N->getOperand(2)),
createGPRPairNode(DAG, N->getOperand(3)),
N->getOperand(0)};
SDValue Ops[] = {
createGPRPairNode2xi32(DAG, N->getOperand(1),
DAG.getUNDEF(MVT::i32)), // pointer, temp
createGPRPairNodei64(DAG, N->getOperand(2)), // expected
createGPRPairNodei64(DAG, N->getOperand(3)), // new
N->getOperand(0), // chain in
};
SDNode *CmpSwap = DAG.getMachineNode(
ARM::CMP_SWAP_64, SDLoc(N),
DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);

MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
Expand Down
17 changes: 15 additions & 2 deletions llvm/lib/Target/ARM/ARMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -6509,8 +6509,21 @@ def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;

def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp),
(ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
// The addr_temp and addr_temp_out operands are logically a pair of GPR
// operands:
// * addr is an input, holding the address to swap.
// * temp is a earlyclobber output, used internally in the expansion of the
// pseudo-inst.
// These are combined into one GPRPair operand to ensure that register
// allocation always succeeds. In the worst case there are only 4 GPRPair
// registers available, of which this instruction needs 3 for the other
// operands. If these operands weren't combined they would also use two GPR
// registers, which could overlap with two different GPRPairs, causing
// allocation to fail. With them combined, we need to allocate 4 GPRPairs,
// which will always succeed.
let Constraints = "@earlyclobber $Rd,$addr_temp_out = $addr_temp" in
def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPRPair:$addr_temp_out),
(ins GPRPair:$addr_temp, GPRPair:$desired, GPRPair:$new),
NoItinerary, []>, Sched<[]>;
}

Expand Down
96 changes: 96 additions & 0 deletions llvm/test/CodeGen/ARM/atomic-64bit-fast-regalloc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-none-eabi -O0 | FileCheck %s --check-prefix=CHECK --check-prefix=LE
; RUN: llc < %s -mtriple=armv7eb-none-eabi -O0 | FileCheck %s --check-prefix=CHECK --check-prefix=BE

;; Previously, this failed during register allocation because the CMP_SWAP_64
;; pseudo-instruction has a lot of operands, many of which need to be even-odd
;; register pairs, and the over-aligned alloca in this function causes both a
;; frame pointer and a base pointer to be needed.

define void @test(ptr %ptr) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r8, r9, r10, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r8, r9, r10, r11, lr}
; CHECK-NEXT: .setfp r11, sp, #24
; CHECK-NEXT: add r11, sp, #24
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: bfc sp, #0, #4
; CHECK-NEXT: mov r6, sp
; CHECK-NEXT: str r0, [r6, #28] @ 4-byte Spill
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_1: @ %block1
; CHECK-NEXT: ldr r0, [r6, #28] @ 4-byte Reload
; CHECK-NEXT: mov r1, sp
; CHECK-NEXT: sub r1, r1, #16
; CHECK-NEXT: bic r1, r1, #15
; CHECK-NEXT: mov sp, r1
; CHECK-NEXT: dmb ish
; CHECK-NEXT: ldr r1, [r0]
; CHECK-NEXT: ldr r0, [r0, #4]
; CHECK-NEXT: str r1, [r6, #20] @ 4-byte Spill
; CHECK-NEXT: str r0, [r6, #24] @ 4-byte Spill
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_2: @ %atomicrmw.start
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
; CHECK-NEXT: ldr r2, [r6, #24] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [r6, #20] @ 4-byte Reload
; CHECK-NEXT: ldr r8, [r6, #28] @ 4-byte Reload
; LE-NEXT: str r2, [r6, #16] @ 4-byte Spill
; LE-NEXT: str r0, [r6, #12] @ 4-byte Spill
; BE-NEXT: str r2, [r6, #12] @ 4-byte Spill
; BE-NEXT: str r0, [r6, #16] @ 4-byte Spill
; CHECK-NEXT: @ implicit-def: $r1
; CHECK-NEXT: @ implicit-def: $r3
; CHECK-NEXT: @ kill: def $r8 killed $r8 def $r8_r9
; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: @ kill: def $r0 killed $r0 def $r0_r1
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: mov r2, r12
; CHECK-NEXT: mov r3, r12
; CHECK-NEXT: .LBB0_3: @ %atomicrmw.start
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrexd r4, r5, [r8]
; CHECK-NEXT: cmp r4, r0
; CHECK-NEXT: cmpeq r5, r1
; CHECK-NEXT: bne .LBB0_5
; CHECK-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=2
; CHECK-NEXT: strexd r9, r2, r3, [r8]
; CHECK-NEXT: cmp r9, #0
; CHECK-NEXT: bne .LBB0_3
; CHECK-NEXT: .LBB0_5: @ %atomicrmw.start
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: ldr r2, [r6, #12] @ 4-byte Reload
; LE-NEXT: ldr r1, [r6, #16] @ 4-byte Reload
; LE-NEXT: mov r0, r5
; LE-NEXT: eor r3, r0, r1
; LE-NEXT: mov r1, r4
; LE-NEXT: eor r2, r1, r2
; BE-NEXT: ldr r0, [r6, #16] @ 4-byte Reload
; BE-NEXT: mov r1, r4
; BE-NEXT: eor r3, r1, r0
; BE-NEXT: mov r0, r5
; BE-NEXT: eor r2, r0, r2
; CHECK-NEXT: orr r2, r2, r3
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: str r1, [r6, #20] @ 4-byte Spill
; CHECK-NEXT: str r0, [r6, #24] @ 4-byte Spill
; CHECK-NEXT: bne .LBB0_2
; CHECK-NEXT: b .LBB0_6
; CHECK-NEXT: .LBB0_6: @ %atomicrmw.end
; CHECK-NEXT: dmb ish
; CHECK-NEXT: sub sp, r11, #24
; CHECK-NEXT: pop {r4, r5, r6, r8, r9, r10, r11, pc}
entry:
br label %block1

block1:
%stuff = alloca i8, i64 16, align 16
store atomic i64 0, ptr %ptr seq_cst, align 8
ret void
}
96 changes: 54 additions & 42 deletions llvm/test/CodeGen/ARM/atomic-load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -327,50 +327,56 @@ define void @test_old_store_64bit(ptr %p, i64 %v) {
; ARMOPTNONE-NEXT: push {r4, r5, r7, lr}
; ARMOPTNONE-NEXT: add r7, sp, #8
; ARMOPTNONE-NEXT: push {r8, r10, r11}
; ARMOPTNONE-NEXT: sub sp, sp, #20
; ARMOPTNONE-NEXT: str r0, [sp] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r2, [sp, #4] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r1, [sp, #8] @ 4-byte Spill
; ARMOPTNONE-NEXT: sub sp, sp, #24
; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARMOPTNONE-NEXT: dmb ish
; ARMOPTNONE-NEXT: ldr r1, [r0]
; ARMOPTNONE-NEXT: ldr r0, [r0, #4]
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARMOPTNONE-NEXT: b LBB5_1
; ARMOPTNONE-NEXT: LBB5_1: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1
; ARMOPTNONE-NEXT: @ Child Loop BB5_2 Depth 2
; ARMOPTNONE-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r3, [sp] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r10, [sp, #8] @ 4-byte Reload
; ARMOPTNONE-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; ARMOPTNONE-NEXT: mov r11, r0
; ARMOPTNONE-NEXT: mov r8, r2
; ARMOPTNONE-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r12, [sp, #8] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r8, [sp, #4] @ 4-byte Reload
; ARMOPTNONE-NEXT: str r3, [sp] @ 4-byte Spill
; ARMOPTNONE-NEXT: @ implicit-def: $r1
; ARMOPTNONE-NEXT: @ implicit-def: $r9
; ARMOPTNONE-NEXT: @ kill: def $r8 killed $r8 def $r8_r9
; ARMOPTNONE-NEXT: mov r9, r1
; ARMOPTNONE-NEXT: @ kill: def $r0 killed $r0 def $r0_r1
; ARMOPTNONE-NEXT: mov r1, r12
; ARMOPTNONE-NEXT: mov r10, r2
; ARMOPTNONE-NEXT: mov r11, r3
; ARMOPTNONE-NEXT: LBB5_2: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ Parent Loop BB5_1 Depth=1
; ARMOPTNONE-NEXT: @ => This Inner Loop Header: Depth=2
; ARMOPTNONE-NEXT: ldrexd r4, r5, [r3]
; ARMOPTNONE-NEXT: cmp r4, r8
; ARMOPTNONE-NEXT: cmpeq r5, r9
; ARMOPTNONE-NEXT: ldrexd r4, r5, [r8]
; ARMOPTNONE-NEXT: cmp r4, r10
; ARMOPTNONE-NEXT: cmpeq r5, r11
; ARMOPTNONE-NEXT: bne LBB5_4
; ARMOPTNONE-NEXT: @ %bb.3: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ in Loop: Header=BB5_2 Depth=2
; ARMOPTNONE-NEXT: strexd r0, r10, r11, [r3]
; ARMOPTNONE-NEXT: cmp r0, #0
; ARMOPTNONE-NEXT: strexd r9, r0, r1, [r8]
; ARMOPTNONE-NEXT: cmp r9, #0
; ARMOPTNONE-NEXT: bne LBB5_2
; ARMOPTNONE-NEXT: LBB5_4: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ in Loop: Header=BB5_1 Depth=1
; ARMOPTNONE-NEXT: ldr r1, [sp] @ 4-byte Reload
; ARMOPTNONE-NEXT: mov r0, r5
; ARMOPTNONE-NEXT: eor r3, r0, r1
; ARMOPTNONE-NEXT: mov r1, r4
; ARMOPTNONE-NEXT: eor r2, r1, r2
; ARMOPTNONE-NEXT: orr r2, r2, r3
; ARMOPTNONE-NEXT: cmp r2, #0
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARMOPTNONE-NEXT: bne LBB5_1
; ARMOPTNONE-NEXT: b LBB5_5
; ARMOPTNONE-NEXT: LBB5_5: @ %atomicrmw.end
Expand Down Expand Up @@ -861,52 +867,58 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
; ARMOPTNONE-NEXT: push {r4, r5, r7, lr}
; ARMOPTNONE-NEXT: add r7, sp, #8
; ARMOPTNONE-NEXT: push {r8, r10, r11}
; ARMOPTNONE-NEXT: sub sp, sp, #20
; ARMOPTNONE-NEXT: str r0, [sp] @ 4-byte Spill
; ARMOPTNONE-NEXT: sub sp, sp, #24
; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill
; ARMOPTNONE-NEXT: vmov d16, r1, r2
; ARMOPTNONE-NEXT: vmov r1, r2, d16
; ARMOPTNONE-NEXT: str r2, [sp, #4] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r1, [sp, #8] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARMOPTNONE-NEXT: dmb ish
; ARMOPTNONE-NEXT: ldr r1, [r0]
; ARMOPTNONE-NEXT: ldr r0, [r0, #4]
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARMOPTNONE-NEXT: b LBB13_1
; ARMOPTNONE-NEXT: LBB13_1: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1
; ARMOPTNONE-NEXT: @ Child Loop BB13_2 Depth 2
; ARMOPTNONE-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r3, [sp] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r10, [sp, #8] @ 4-byte Reload
; ARMOPTNONE-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; ARMOPTNONE-NEXT: mov r11, r0
; ARMOPTNONE-NEXT: mov r8, r2
; ARMOPTNONE-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r12, [sp, #8] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; ARMOPTNONE-NEXT: ldr r8, [sp, #4] @ 4-byte Reload
; ARMOPTNONE-NEXT: str r3, [sp] @ 4-byte Spill
; ARMOPTNONE-NEXT: @ implicit-def: $r1
; ARMOPTNONE-NEXT: @ implicit-def: $r9
; ARMOPTNONE-NEXT: @ kill: def $r8 killed $r8 def $r8_r9
; ARMOPTNONE-NEXT: mov r9, r1
; ARMOPTNONE-NEXT: @ kill: def $r0 killed $r0 def $r0_r1
; ARMOPTNONE-NEXT: mov r1, r12
; ARMOPTNONE-NEXT: mov r10, r2
; ARMOPTNONE-NEXT: mov r11, r3
; ARMOPTNONE-NEXT: LBB13_2: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ Parent Loop BB13_1 Depth=1
; ARMOPTNONE-NEXT: @ => This Inner Loop Header: Depth=2
; ARMOPTNONE-NEXT: ldrexd r4, r5, [r3]
; ARMOPTNONE-NEXT: cmp r4, r8
; ARMOPTNONE-NEXT: cmpeq r5, r9
; ARMOPTNONE-NEXT: ldrexd r4, r5, [r8]
; ARMOPTNONE-NEXT: cmp r4, r10
; ARMOPTNONE-NEXT: cmpeq r5, r11
; ARMOPTNONE-NEXT: bne LBB13_4
; ARMOPTNONE-NEXT: @ %bb.3: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ in Loop: Header=BB13_2 Depth=2
; ARMOPTNONE-NEXT: strexd r0, r10, r11, [r3]
; ARMOPTNONE-NEXT: cmp r0, #0
; ARMOPTNONE-NEXT: strexd r9, r0, r1, [r8]
; ARMOPTNONE-NEXT: cmp r9, #0
; ARMOPTNONE-NEXT: bne LBB13_2
; ARMOPTNONE-NEXT: LBB13_4: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ in Loop: Header=BB13_1 Depth=1
; ARMOPTNONE-NEXT: ldr r1, [sp] @ 4-byte Reload
; ARMOPTNONE-NEXT: mov r0, r5
; ARMOPTNONE-NEXT: eor r3, r0, r1
; ARMOPTNONE-NEXT: mov r1, r4
; ARMOPTNONE-NEXT: eor r2, r1, r2
; ARMOPTNONE-NEXT: orr r2, r2, r3
; ARMOPTNONE-NEXT: cmp r2, #0
; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARMOPTNONE-NEXT: bne LBB13_1
; ARMOPTNONE-NEXT: b LBB13_5
; ARMOPTNONE-NEXT: LBB13_5: @ %atomicrmw.end
Expand Down
Loading
Loading