Skip to content

Commit 8b0209a

Browse files
authored
Merge pull request #3917 from apple/eng/PR-88127379
AArch64: do not use xzr for ldxp -> stxp dataflow.
2 parents adbc02c + d59e957 commit 8b0209a

File tree

3 files changed

+80
-28
lines changed

3 files changed

+80
-28
lines changed

llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,8 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
430430

431431
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
432432
mayLoad = 1, mayStore = 1 in {
433-
class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch),
433+
class cmp_swap_128 : Pseudo<(outs GPR64common:$RdLo, GPR64common:$RdHi,
434+
GPR32common:$scratch),
434435
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
435436
GPR64:$newLo, GPR64:$newHi), []>,
436437
Sched<[WriteAtomic]>;

llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -30,35 +30,37 @@ body: |
3030
; CHECK: RET_ReallyLR
3131
; CHECK-NOLSE-LABEL: name: compare_swap_128
3232
; CHECK-NOLSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4
33-
; CHECK-NOLSE: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
34-
; CHECK-NOLSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
35-
; CHECK-NOLSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
36-
; CHECK-NOLSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
37-
; CHECK-NOLSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
38-
; CHECK-NOLSE: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64)
39-
; CHECK-NOLSE: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64)
40-
; CHECK-NOLSE: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64)
41-
; CHECK-NOLSE: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64)
42-
; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32common = CMP_SWAP_128_ACQUIRE [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128))
43-
; CHECK-NOLSE: [[COPY9:%[0-9]+]]:gpr64 = COPY %16
44-
; CHECK-NOLSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64)
45-
; CHECK-NOLSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
46-
; CHECK-NOLSE: RET_ReallyLR
33+
; CHECK-NOLSE-NEXT: {{ $}}
34+
; CHECK-NOLSE-NEXT: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
35+
; CHECK-NOLSE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
36+
; CHECK-NOLSE-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
37+
; CHECK-NOLSE-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
38+
; CHECK-NOLSE-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
39+
; CHECK-NOLSE-NEXT: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64)
40+
; CHECK-NOLSE-NEXT: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64)
41+
; CHECK-NOLSE-NEXT: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64)
42+
; CHECK-NOLSE-NEXT: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64)
43+
; CHECK-NOLSE-NEXT: early-clobber %13:gpr64common(s64), early-clobber %14:gpr64common(s64), early-clobber %16:gpr32common = CMP_SWAP_128_ACQUIRE [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128))
44+
; CHECK-NOLSE-NEXT: [[COPY9:%[0-9]+]]:gpr64 = COPY %16
45+
; CHECK-NOLSE-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64)
46+
; CHECK-NOLSE-NEXT: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
47+
; CHECK-NOLSE-NEXT: RET_ReallyLR
4748
; CHECK-LSE-LABEL: name: compare_swap_128
4849
; CHECK-LSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4
49-
; CHECK-LSE: [[COPY:%[0-9]+]]:gpr64sp(p0) = COPY $x0
50-
; CHECK-LSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
51-
; CHECK-LSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
52-
; CHECK-LSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
53-
; CHECK-LSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
54-
; CHECK-LSE: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64
55-
; CHECK-LSE: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64
56-
; CHECK-LSE: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire (s128))
57-
; CHECK-LSE: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0
58-
; CHECK-LSE: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64
59-
; CHECK-LSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64)
60-
; CHECK-LSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
61-
; CHECK-LSE: RET_ReallyLR
50+
; CHECK-LSE-NEXT: {{ $}}
51+
; CHECK-LSE-NEXT: [[COPY:%[0-9]+]]:gpr64sp(p0) = COPY $x0
52+
; CHECK-LSE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
53+
; CHECK-LSE-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
54+
; CHECK-LSE-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
55+
; CHECK-LSE-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
56+
; CHECK-LSE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64
57+
; CHECK-LSE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64
58+
; CHECK-LSE-NEXT: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire (s128))
59+
; CHECK-LSE-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0
60+
; CHECK-LSE-NEXT: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64
61+
; CHECK-LSE-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64)
62+
; CHECK-LSE-NEXT: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
63+
; CHECK-LSE-NEXT: RET_ReallyLR
6264
%0:_(p0) = COPY $x0
6365
%3:_(s64) = COPY $x1
6466
%4:_(s64) = COPY $x2

llvm/test/CodeGen/AArch64/arm64-atomic-128.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,3 +474,52 @@ define void @atomic_store_relaxed(i128 %in, i128* %p) {
474474
store atomic i128 %in, i128* %p unordered, align 16
475475
ret void
476476
}
477+
478+
; Since we store the original value to ensure no tearing for the unsuccessful
479+
; case, the register used must not be xzr.
480+
define void @cmpxchg_dead(i128* %ptr, i128 %desired, i128 %new) {
481+
; NOOUTLINE-LABEL: cmpxchg_dead:
482+
; NOOUTLINE: // %bb.0:
483+
; NOOUTLINE-NEXT: .LBB17_1: // =>This Inner Loop Header: Depth=1
484+
; NOOUTLINE-NEXT: ldxp x8, x9, [x0]
485+
; NOOUTLINE-NEXT: cmp x8, x2
486+
; NOOUTLINE-NEXT: cset w10, ne
487+
; NOOUTLINE-NEXT: cmp x9, x3
488+
; NOOUTLINE-NEXT: cinc w10, w10, ne
489+
; NOOUTLINE-NEXT: cbz w10, .LBB17_3
490+
; NOOUTLINE-NEXT: // %bb.2: // in Loop: Header=BB17_1 Depth=1
491+
; NOOUTLINE-NEXT: stxp w10, x8, x9, [x0]
492+
; NOOUTLINE-NEXT: cbnz w10, .LBB17_1
493+
; NOOUTLINE-NEXT: b .LBB17_4
494+
; NOOUTLINE-NEXT: .LBB17_3: // in Loop: Header=BB17_1 Depth=1
495+
; NOOUTLINE-NEXT: stxp w10, x4, x5, [x0]
496+
; NOOUTLINE-NEXT: cbnz w10, .LBB17_1
497+
; NOOUTLINE-NEXT: .LBB17_4:
498+
; NOOUTLINE-NEXT: ret
499+
;
500+
; OUTLINE-LABEL: cmpxchg_dead:
501+
; OUTLINE: // %bb.0:
502+
; OUTLINE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
503+
; OUTLINE-NEXT: .cfi_def_cfa_offset 16
504+
; OUTLINE-NEXT: .cfi_offset w30, -16
505+
; OUTLINE-NEXT: mov x1, x3
506+
; OUTLINE-NEXT: mov x8, x0
507+
; OUTLINE-NEXT: mov x0, x2
508+
; OUTLINE-NEXT: mov x2, x4
509+
; OUTLINE-NEXT: mov x3, x5
510+
; OUTLINE-NEXT: mov x4, x8
511+
; OUTLINE-NEXT: bl __aarch64_cas16_relax
512+
; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
513+
; OUTLINE-NEXT: ret
514+
;
515+
; LSE-LABEL: cmpxchg_dead:
516+
; LSE: // %bb.0:
517+
; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
518+
; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
519+
; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
520+
; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
521+
; LSE-NEXT: casp x2, x3, x4, x5, [x0]
522+
; LSE-NEXT: ret
523+
cmpxchg i128* %ptr, i128 %desired, i128 %new monotonic monotonic
524+
ret void
525+
}

0 commit comments

Comments
 (0)