Skip to content

Commit d59e957

Browse files
committed
AArch64: do not use xzr for ldxp -> stxp dataflow.
If the result of a cmpxchg is unused, regalloc chooses `xzr` for the defs of CMP_SWAP_128*. However, on the failure path this gets expanded to a LDXP -> STXP to store the original value (to ensure no tearing occurred). This unintentionally nulls out half of the value. So instead use GPR64common for these defs, so regalloc has to choose a real one.
1 parent 39db93a commit d59e957

File tree

3 files changed

+80
-28
lines changed

3 files changed

+80
-28
lines changed

llvm/lib/Target/AArch64/AArch64InstrAtomics.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,8 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
430430

431431
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
432432
mayLoad = 1, mayStore = 1 in {
433-
class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch),
433+
class cmp_swap_128 : Pseudo<(outs GPR64common:$RdLo, GPR64common:$RdHi,
434+
GPR32common:$scratch),
434435
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
435436
GPR64:$newLo, GPR64:$newHi), []>,
436437
Sched<[WriteAtomic]>;

llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -30,35 +30,37 @@ body: |
3030
; CHECK: RET_ReallyLR
3131
; CHECK-NOLSE-LABEL: name: compare_swap_128
3232
; CHECK-NOLSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4
33-
; CHECK-NOLSE: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
34-
; CHECK-NOLSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
35-
; CHECK-NOLSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
36-
; CHECK-NOLSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
37-
; CHECK-NOLSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
38-
; CHECK-NOLSE: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64)
39-
; CHECK-NOLSE: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64)
40-
; CHECK-NOLSE: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64)
41-
; CHECK-NOLSE: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64)
42-
; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32common = CMP_SWAP_128_ACQUIRE [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128))
43-
; CHECK-NOLSE: [[COPY9:%[0-9]+]]:gpr64 = COPY %16
44-
; CHECK-NOLSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64)
45-
; CHECK-NOLSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
46-
; CHECK-NOLSE: RET_ReallyLR
33+
; CHECK-NOLSE-NEXT: {{ $}}
34+
; CHECK-NOLSE-NEXT: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
35+
; CHECK-NOLSE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
36+
; CHECK-NOLSE-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
37+
; CHECK-NOLSE-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
38+
; CHECK-NOLSE-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
39+
; CHECK-NOLSE-NEXT: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64)
40+
; CHECK-NOLSE-NEXT: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64)
41+
; CHECK-NOLSE-NEXT: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64)
42+
; CHECK-NOLSE-NEXT: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64)
43+
; CHECK-NOLSE-NEXT: early-clobber %13:gpr64common(s64), early-clobber %14:gpr64common(s64), early-clobber %16:gpr32common = CMP_SWAP_128_ACQUIRE [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128))
44+
; CHECK-NOLSE-NEXT: [[COPY9:%[0-9]+]]:gpr64 = COPY %16
45+
; CHECK-NOLSE-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64)
46+
; CHECK-NOLSE-NEXT: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
47+
; CHECK-NOLSE-NEXT: RET_ReallyLR
4748
; CHECK-LSE-LABEL: name: compare_swap_128
4849
; CHECK-LSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4
49-
; CHECK-LSE: [[COPY:%[0-9]+]]:gpr64sp(p0) = COPY $x0
50-
; CHECK-LSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
51-
; CHECK-LSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
52-
; CHECK-LSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
53-
; CHECK-LSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
54-
; CHECK-LSE: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64
55-
; CHECK-LSE: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64
56-
; CHECK-LSE: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire (s128))
57-
; CHECK-LSE: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0
58-
; CHECK-LSE: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64
59-
; CHECK-LSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64)
60-
; CHECK-LSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
61-
; CHECK-LSE: RET_ReallyLR
50+
; CHECK-LSE-NEXT: {{ $}}
51+
; CHECK-LSE-NEXT: [[COPY:%[0-9]+]]:gpr64sp(p0) = COPY $x0
52+
; CHECK-LSE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
53+
; CHECK-LSE-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
54+
; CHECK-LSE-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
55+
; CHECK-LSE-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
56+
; CHECK-LSE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64
57+
; CHECK-LSE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64
58+
; CHECK-LSE-NEXT: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire (s128))
59+
; CHECK-LSE-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0
60+
; CHECK-LSE-NEXT: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64
61+
; CHECK-LSE-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64)
62+
; CHECK-LSE-NEXT: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
63+
; CHECK-LSE-NEXT: RET_ReallyLR
6264
%0:_(p0) = COPY $x0
6365
%3:_(s64) = COPY $x1
6466
%4:_(s64) = COPY $x2

llvm/test/CodeGen/AArch64/arm64-atomic-128.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,3 +474,52 @@ define void @atomic_store_relaxed(i128 %in, i128* %p) {
474474
store atomic i128 %in, i128* %p unordered, align 16
475475
ret void
476476
}
477+
478+
; Since we store the original value to ensure no tearing for the unsuccessful
479+
; case, the register used must not be xzr.
480+
define void @cmpxchg_dead(i128* %ptr, i128 %desired, i128 %new) {
481+
; NOOUTLINE-LABEL: cmpxchg_dead:
482+
; NOOUTLINE: // %bb.0:
483+
; NOOUTLINE-NEXT: .LBB17_1: // =>This Inner Loop Header: Depth=1
484+
; NOOUTLINE-NEXT: ldxp x8, x9, [x0]
485+
; NOOUTLINE-NEXT: cmp x8, x2
486+
; NOOUTLINE-NEXT: cset w10, ne
487+
; NOOUTLINE-NEXT: cmp x9, x3
488+
; NOOUTLINE-NEXT: cinc w10, w10, ne
489+
; NOOUTLINE-NEXT: cbz w10, .LBB17_3
490+
; NOOUTLINE-NEXT: // %bb.2: // in Loop: Header=BB17_1 Depth=1
491+
; NOOUTLINE-NEXT: stxp w10, x8, x9, [x0]
492+
; NOOUTLINE-NEXT: cbnz w10, .LBB17_1
493+
; NOOUTLINE-NEXT: b .LBB17_4
494+
; NOOUTLINE-NEXT: .LBB17_3: // in Loop: Header=BB17_1 Depth=1
495+
; NOOUTLINE-NEXT: stxp w10, x4, x5, [x0]
496+
; NOOUTLINE-NEXT: cbnz w10, .LBB17_1
497+
; NOOUTLINE-NEXT: .LBB17_4:
498+
; NOOUTLINE-NEXT: ret
499+
;
500+
; OUTLINE-LABEL: cmpxchg_dead:
501+
; OUTLINE: // %bb.0:
502+
; OUTLINE-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
503+
; OUTLINE-NEXT: .cfi_def_cfa_offset 16
504+
; OUTLINE-NEXT: .cfi_offset w30, -16
505+
; OUTLINE-NEXT: mov x1, x3
506+
; OUTLINE-NEXT: mov x8, x0
507+
; OUTLINE-NEXT: mov x0, x2
508+
; OUTLINE-NEXT: mov x2, x4
509+
; OUTLINE-NEXT: mov x3, x5
510+
; OUTLINE-NEXT: mov x4, x8
511+
; OUTLINE-NEXT: bl __aarch64_cas16_relax
512+
; OUTLINE-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
513+
; OUTLINE-NEXT: ret
514+
;
515+
; LSE-LABEL: cmpxchg_dead:
516+
; LSE: // %bb.0:
517+
; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
518+
; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
519+
; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
520+
; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
521+
; LSE-NEXT: casp x2, x3, x4, x5, [x0]
522+
; LSE-NEXT: ret
523+
cmpxchg i128* %ptr, i128 %desired, i128 %new monotonic monotonic
524+
ret void
525+
}

0 commit comments

Comments
 (0)