Skip to content

Reland "Revert "AtomicExpand: Allow incrementally legalizing atomicrmw"" #106793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

vitalybuka
Copy link
Collaborator

@vitalybuka vitalybuka commented Aug 30, 2024

Reverts #106792

The first commit of PR is pure revert, the rest is a possible fix.

@llvmbot
Copy link
Member

llvmbot commented Aug 30, 2024

@llvm/pr-subscribers-backend-aarch64

Author: Vitaly Buka (vitalybuka)

Changes

Reverts llvm/llvm-project#106792


Patch is 111.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106793.diff

5 Files Affected:

  • (modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+24-11)
  • (modified) llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll (+203-170)
  • (modified) llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll (+203-170)
  • (modified) llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll (+203-170)
  • (modified) llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll (+203-170)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 39a705599f90cc..b9732e816ea7e6 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -351,17 +351,30 @@ bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
 
   bool MadeChange = false;
 
-  SmallVector<Instruction *, 1> AtomicInsts;
-
-  // Changing control-flow while iterating through it is a bad idea, so gather a
-  // list of all atomic instructions before we start.
-  for (Instruction &I : instructions(F))
-    if (I.isAtomic() && !isa<FenceInst>(&I))
-      AtomicInsts.push_back(&I);
-
-  for (auto *I : AtomicInsts) {
-    if (processAtomicInstr(I))
-      MadeChange = true;
+  for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE;) {
+    BasicBlock *BB = &*BBI;
+    ++BBI;
+
+    BasicBlock::iterator Next;
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+         I = Next) {
+      Instruction &Inst = *I;
+      Next = std::next(I);
+
+      if (processAtomicInstr(&Inst)) {
+        MadeChange = true;
+
+        // Detect control flow change and resume iteration from the original
+        // block to inspect any newly inserted blocks. This allows incremental
+        // legalizaton of atomicrmw and cmpxchg.
+        if (BB != Next->getParent()) {
+          BBI = BB->getIterator();
+          BBE = F.end();
+          break;
+        }
+      }
+    }
   }
 
   return MadeChange;
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index 0d230bb9dcc6e9..ed9c1b037d0cc7 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -43,46 +43,49 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
 ; SOFTFP-NOLSE:       // %bb.0:
-; SOFTFP-NOLSE-NEXT:    stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldrh w20, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
+; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    mov w21, w1
 ; SOFTFP-NOLSE-NEXT:    b .LBB0_2
-; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    cmp w8, w23
-; SOFTFP-NOLSE-NEXT:    mov w20, w8
-; SOFTFP-NOLSE-NEXT:    b.eq .LBB0_5
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB0_6
 ; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT:    and w23, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
-; SOFTFP-NOLSE-NEXT:    mov w0, w23
+; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
+; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT:    mov w21, w0
+; SOFTFP-NOLSE-NEXT:    and w0, w22, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT:    mov w1, w22
+; SOFTFP-NOLSE-NEXT:    mov w1, w21
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT:  .LBB0_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:    mov w8, w0
+; SOFTFP-NOLSE-NEXT:  .LBB0_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz wzr, .LBB0_3
-; SOFTFP-NOLSE-NEXT:    b .LBB0_1
-; SOFTFP-NOLSE-NEXT:  .LBB0_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT:    mov w0, w20
+; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_3
+; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 2
   ret half %res
@@ -128,46 +131,49 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
 ; SOFTFP-NOLSE:       // %bb.0:
-; SOFTFP-NOLSE-NEXT:    stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldrh w20, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
+; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    mov w21, w1
 ; SOFTFP-NOLSE-NEXT:    b .LBB1_2
-; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    cmp w8, w23
-; SOFTFP-NOLSE-NEXT:    mov w20, w8
-; SOFTFP-NOLSE-NEXT:    b.eq .LBB1_5
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB1_6
 ; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    and w0, w21, #0xffff
-; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT:    and w23, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
-; SOFTFP-NOLSE-NEXT:    mov w0, w23
+; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
+; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
+; SOFTFP-NOLSE-NEXT:    mov w21, w0
+; SOFTFP-NOLSE-NEXT:    and w0, w22, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __gnu_h2f_ieee
-; SOFTFP-NOLSE-NEXT:    mov w1, w22
+; SOFTFP-NOLSE-NEXT:    mov w1, w21
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    bl __gnu_f2h_ieee
-; SOFTFP-NOLSE-NEXT:  .LBB1_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:    mov w8, w0
+; SOFTFP-NOLSE-NEXT:  .LBB1_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz wzr, .LBB1_3
-; SOFTFP-NOLSE-NEXT:    b .LBB1_1
-; SOFTFP-NOLSE-NEXT:  .LBB1_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT:    mov w0, w20
+; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_3
+; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, half %value seq_cst, align 4
   ret half %res
@@ -232,36 +238,40 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE:       // %bb.0:
 ; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT:    lsl w21, w1, #16
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
+; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
 ; SOFTFP-NOLSE-NEXT:    b .LBB2_2
-; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT:    mov w20, w8
-; SOFTFP-NOLSE-NEXT:    b.eq .LBB2_5
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB2_6
 ; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT:    mov w1, w21
+; SOFTFP-NOLSE-NEXT:    mov w21, w0
+; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT:  .LBB2_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:    mov w8, w0
+; SOFTFP-NOLSE-NEXT:  .LBB2_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz wzr, .LBB2_3
-; SOFTFP-NOLSE-NEXT:    b .LBB2_1
-; SOFTFP-NOLSE-NEXT:  .LBB2_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT:    mov w0, w20
+; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_3
+; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
@@ -327,36 +337,40 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE:       // %bb.0:
 ; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldrh w20, [x0]
-; SOFTFP-NOLSE-NEXT:    lsl w21, w1, #16
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
+; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
+; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
 ; SOFTFP-NOLSE-NEXT:    b .LBB3_2
-; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    cmp w8, w20, uxth
-; SOFTFP-NOLSE-NEXT:    mov w20, w8
-; SOFTFP-NOLSE-NEXT:    b.eq .LBB3_5
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB3_6
 ; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    lsl w0, w20, #16
-; SOFTFP-NOLSE-NEXT:    mov w1, w21
+; SOFTFP-NOLSE-NEXT:    mov w21, w0
+; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
+; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
-; SOFTFP-NOLSE-NEXT:  .LBB3_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:    mov w8, w0
+; SOFTFP-NOLSE-NEXT:  .LBB3_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxrh w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp w8, w20, uxth
+; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxrh wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz wzr, .LBB3_3
-; SOFTFP-NOLSE-NEXT:    b .LBB3_1
-; SOFTFP-NOLSE-NEXT:  .LBB3_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT:    mov w0, w20
+; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_3
+; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 4
@@ -399,35 +413,38 @@ define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE:       // %bb.0:
 ; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldr w20, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
-; SOFTFP-NOLSE-NEXT:    mov w21, w1
+; SOFTFP-NOLSE-NEXT:    ldr w0, [x0]
+; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    b .LBB4_2
-; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    cmp w8, w20
-; SOFTFP-NOLSE-NEXT:    mov w20, w8
-; SOFTFP-NOLSE-NEXT:    b.eq .LBB4_5
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB4_6
 ; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    mov w0, w20
-; SOFTFP-NOLSE-NEXT:    mov w1, w21
+; SOFTFP-NOLSE-NEXT:    mov w1, w20
+; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
-; SOFTFP-NOLSE-NEXT:  .LBB4_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:    mov w8, w0
+; SOFTFP-NOLSE-NEXT:  .LBB4_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxr w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp w8, w20
+; SOFTFP-NOLSE-NEXT:    ldaxr w0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp w0, w21
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxr wzr, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz wzr, .LBB4_3
-; SOFTFP-NOLSE-NEXT:    b .LBB4_1
-; SOFTFP-NOLSE-NEXT:  .LBB4_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT:    mov w0, w20
+; SOFTFP-NOLSE-NEXT:    stlxr w9, w8, [x19]
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_3
+; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
@@ -469,36 +486,40 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
 ; SOFTFP-NOLSE:       // %bb.0:
 ; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; SOFTFP-NOLSE-NEXT:    ldr x21, [x0]
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldr x20, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
-; SOFTFP-NOLSE-NEXT:    mov x21, x1
+; SOFTFP-NOLSE-NEXT:    mov x20, x1
 ; SOFTFP-NOLSE-NEXT:    b .LBB5_2
-; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    cmp x8, x20
-; SOFTFP-NOLSE-NEXT:    mov x20, x8
-; SOFTFP-NOLSE-NEXT:    b.eq .LBB5_5
+; SOFTFP-NOLSE-NEXT:    mov w9, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    mov x21, x8
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_6
 ; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    mov x0, x20
-; SOFTFP-NOLSE-NEXT:    mov x1, x21
+; SOFTFP-NOLSE-NEXT:    mov x0, x21
+; SOFTFP-NOLSE-NEXT:    mov x1, x20
 ; SOFTFP-NOLSE-NEXT:    bl __adddf3
-; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp x8, x20
+; SOFTFP-NOLSE-NEXT:    cmp x8, x21
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxr wzr, x0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz wzr, .LBB5_3
-; SOFTFP-NOLSE-NEXT:    b .LBB5_1
-; SOFTFP-NOLSE-NEXT:  .LBB5_5: // %atomicrmw.end
-; SOFTFP-NOLSE-NEXT:    mov x0, x20
+; SOFTFP-NOLSE-NEXT:    stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
+; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w9, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    mov x21, x8
+; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
@@ -687,18 +708,18 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    ldrh w23, [x0, #2]
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldrh w21, [x0]
-; SOFTFP-NOLSE-NEXT:    mov w22, w1
+; SOFTFP-NOLSE-NEXT:    ldrh w22, [x0]
+; SOFTFP-NOLSE-NEXT:    mov w21, w1
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
 ; SOFTFP-NOLSE-NEXT:    b .LBB7_2
-; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    lsr w23, w8, #16
-; SOFTFP-NOLSE-NEXT:    cmp w8, w21
-; SOFTFP-NOLSE-NEXT:    mov w21, w8
-; SOFTFP-NOLSE-NEXT:    b.eq .LBB7_5
+; SOFTFP-NOLSE-NEXT:...
[truncated]

@vitalybuka
Copy link
Collaborator Author

09d9461 is pure revert, the rest is a possible fix

@arsenm arsenm merged commit 0628683 into main Sep 4, 2024
8 checks passed
@arsenm arsenm deleted the revert-106792-revert-103371-users/arsenm/atomic-expand-visit-new-blocks branch September 4, 2024 06:29
vitalybuka added a commit that referenced this pull request Sep 4, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants