AtomicExpand: Refactor atomic instruction handling #102914

arsenm · 2024-08-12T15:07:36Z

Move the processing of an instruction into a helper function. Also
avoid redundant checking for all types of atomic instructions.
Including the assert, it was effectively performing the same check
3 times.

arsenm · 2024-08-12T15:08:04Z

AtomicExpand: Stop trying to prune cmpxchg extractvalue users #103211
AtomicExpand: Refactor atomic instruction handling #102914 👈
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

Join @arsenm and the rest of your teammates on Graphite

llvmbot · 2024-08-12T15:15:35Z

@llvm/pr-subscribers-backend-nvptx
@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-backend-powerpc

Author: Matt Arsenault (arsenm)

Changes

Move the processing of an instruction into a helper function. Also
avoid redundant checking for all types of atomic instructions.
Including the assert, it was effectively performing the same check
3 times.

Patch is 45.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102914.diff

6 Files Affected:

(modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+140-129)
(modified) llvm/test/CodeGen/NVPTX/atomics-sm70.ll (+8-8)
(modified) llvm/test/CodeGen/NVPTX/atomics-sm90.ll (+8-8)
(modified) llvm/test/CodeGen/PowerPC/all-atomics.ll (+44-44)
(modified) llvm/test/CodeGen/X86/atomic6432.ll (+28-28)
(modified) llvm/test/CodeGen/X86/pr5145.ll (+8-8)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 49836b914784fc..f6f6af715abd82 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -119,6 +119,8 @@ class AtomicExpandImpl {
   llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
                                  CreateCmpXchgInstFun CreateCmpXchg);
 
+  bool processAtomicInstr(Instruction *I);
+
 public:
   bool run(Function &F, const TargetMachine *TM);
 };
@@ -203,149 +205,158 @@ static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
          Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
 }
 
-bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
-  const auto *Subtarget = TM->getSubtargetImpl(F);
-  if (!Subtarget->enableAtomicExpand())
-    return false;
-  TLI = Subtarget->getTargetLowering();
-  DL = &F.getDataLayout();
+bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
+  auto *LI = dyn_cast<LoadInst>(I);
+  auto *SI = dyn_cast<StoreInst>(I);
+  auto *RMWI = dyn_cast<AtomicRMWInst>(I);
+  auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
 
-  SmallVector<Instruction *, 1> AtomicInsts;
+  // If the Size/Alignment is not supported, replace with a libcall.
+  if (LI) {
+    if (!LI->isAtomic())
+      return false;
 
-  // Changing control-flow while iterating through it is a bad idea, so gather a
-  // list of all atomic instructions before we start.
-  for (Instruction &I : instructions(F))
-    if (I.isAtomic() && !isa<FenceInst>(&I))
-      AtomicInsts.push_back(&I);
+    if (!atomicSizeSupported(TLI, LI)) {
+      expandAtomicLoadToLibcall(LI);
+      return true;
+    }
+  } else if (SI) {
+    if (!SI->isAtomic())
+      return false;
 
-  bool MadeChange = false;
-  for (auto *I : AtomicInsts) {
-    auto LI = dyn_cast<LoadInst>(I);
-    auto SI = dyn_cast<StoreInst>(I);
-    auto RMWI = dyn_cast<AtomicRMWInst>(I);
-    auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
-    assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
-
-    // If the Size/Alignment is not supported, replace with a libcall.
-    if (LI) {
-      if (!atomicSizeSupported(TLI, LI)) {
-        expandAtomicLoadToLibcall(LI);
-        MadeChange = true;
-        continue;
-      }
-    } else if (SI) {
-      if (!atomicSizeSupported(TLI, SI)) {
-        expandAtomicStoreToLibcall(SI);
-        MadeChange = true;
-        continue;
-      }
-    } else if (RMWI) {
-      if (!atomicSizeSupported(TLI, RMWI)) {
-        expandAtomicRMWToLibcall(RMWI);
-        MadeChange = true;
-        continue;
-      }
-    } else if (CASI) {
-      if (!atomicSizeSupported(TLI, CASI)) {
-        expandAtomicCASToLibcall(CASI);
-        MadeChange = true;
-        continue;
-      }
+    if (!atomicSizeSupported(TLI, SI)) {
+      expandAtomicStoreToLibcall(SI);
+      return true;
+    }
+  } else if (RMWI) {
+    if (!atomicSizeSupported(TLI, RMWI)) {
+      expandAtomicRMWToLibcall(RMWI);
+      return true;
     }
+  } else if (CASI) {
+    if (!atomicSizeSupported(TLI, CASI)) {
+      expandAtomicCASToLibcall(CASI);
+      return true;
+    }
+  } else
+    return false;
 
-    if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
-                  TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
-      I = LI = convertAtomicLoadToIntegerType(LI);
-      MadeChange = true;
-    } else if (SI &&
-               TLI->shouldCastAtomicStoreInIR(SI) ==
-                   TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
-      I = SI = convertAtomicStoreToIntegerType(SI);
+  bool MadeChange = false;
+
+  if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
+                TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+    I = LI = convertAtomicLoadToIntegerType(LI);
+    MadeChange = true;
+  } else if (SI && TLI->shouldCastAtomicStoreInIR(SI) ==
+                       TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+    I = SI = convertAtomicStoreToIntegerType(SI);
+    MadeChange = true;
+  } else if (RMWI &&
+             TLI->shouldCastAtomicRMWIInIR(RMWI) ==
+                 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+    I = RMWI = convertAtomicXchgToIntegerType(RMWI);
+    MadeChange = true;
+  } else if (CASI) {
+    // TODO: when we're ready to make the change at the IR level, we can
+    // extend convertCmpXchgToInteger for floating point too.
+    if (CASI->getCompareOperand()->getType()->isPointerTy()) {
+      // TODO: add a TLI hook to control this so that each target can
+      // convert to lowering the original type one at a time.
+      I = CASI = convertCmpXchgToIntegerType(CASI);
       MadeChange = true;
-    } else if (RMWI &&
-               TLI->shouldCastAtomicRMWIInIR(RMWI) ==
-                   TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
-      I = RMWI = convertAtomicXchgToIntegerType(RMWI);
+    }
+  }
+
+  if (TLI->shouldInsertFencesForAtomic(I)) {
+    auto FenceOrdering = AtomicOrdering::Monotonic;
+    if (LI && isAcquireOrStronger(LI->getOrdering())) {
+      FenceOrdering = LI->getOrdering();
+      LI->setOrdering(AtomicOrdering::Monotonic);
+    } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
+      FenceOrdering = SI->getOrdering();
+      SI->setOrdering(AtomicOrdering::Monotonic);
+    } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
+                        isAcquireOrStronger(RMWI->getOrdering()))) {
+      FenceOrdering = RMWI->getOrdering();
+      RMWI->setOrdering(AtomicOrdering::Monotonic);
+    } else if (CASI &&
+               TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+                   TargetLoweringBase::AtomicExpansionKind::None &&
+               (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
+                isAcquireOrStronger(CASI->getSuccessOrdering()) ||
+                isAcquireOrStronger(CASI->getFailureOrdering()))) {
+      // If a compare and swap is lowered to LL/SC, we can do smarter fence
+      // insertion, with a stronger one on the success path than on the
+      // failure path. As a result, fence insertion is directly done by
+      // expandAtomicCmpXchg in that case.
+      FenceOrdering = CASI->getMergedOrdering();
+      CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
+      CASI->setFailureOrdering(AtomicOrdering::Monotonic);
+    }
+
+    if (FenceOrdering != AtomicOrdering::Monotonic) {
+      MadeChange |= bracketInstWithFences(I, FenceOrdering);
+    }
+  } else if (I->hasAtomicStore() &&
+             TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
+    auto FenceOrdering = AtomicOrdering::Monotonic;
+    if (SI)
+      FenceOrdering = SI->getOrdering();
+    else if (RMWI)
+      FenceOrdering = RMWI->getOrdering();
+    else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
+                         TargetLoweringBase::AtomicExpansionKind::LLSC)
+      // LLSC is handled in expandAtomicCmpXchg().
+      FenceOrdering = CASI->getSuccessOrdering();
+
+    IRBuilder Builder(I);
+    if (auto TrailingFence =
+            TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
+      TrailingFence->moveAfter(I);
       MadeChange = true;
-    } else if (CASI) {
-      // TODO: when we're ready to make the change at the IR level, we can
-      // extend convertCmpXchgToInteger for floating point too.
-      if (CASI->getCompareOperand()->getType()->isPointerTy()) {
-        // TODO: add a TLI hook to control this so that each target can
-        // convert to lowering the original type one at a time.
-        I = CASI = convertCmpXchgToIntegerType(CASI);
-        MadeChange = true;
-      }
     }
+  }
 
-    if (TLI->shouldInsertFencesForAtomic(I)) {
-      auto FenceOrdering = AtomicOrdering::Monotonic;
-      if (LI && isAcquireOrStronger(LI->getOrdering())) {
-        FenceOrdering = LI->getOrdering();
-        LI->setOrdering(AtomicOrdering::Monotonic);
-      } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
-        FenceOrdering = SI->getOrdering();
-        SI->setOrdering(AtomicOrdering::Monotonic);
-      } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
-                          isAcquireOrStronger(RMWI->getOrdering()))) {
-        FenceOrdering = RMWI->getOrdering();
-        RMWI->setOrdering(AtomicOrdering::Monotonic);
-      } else if (CASI &&
-                 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
-                     TargetLoweringBase::AtomicExpansionKind::None &&
-                 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
-                  isAcquireOrStronger(CASI->getSuccessOrdering()) ||
-                  isAcquireOrStronger(CASI->getFailureOrdering()))) {
-        // If a compare and swap is lowered to LL/SC, we can do smarter fence
-        // insertion, with a stronger one on the success path than on the
-        // failure path. As a result, fence insertion is directly done by
-        // expandAtomicCmpXchg in that case.
-        FenceOrdering = CASI->getMergedOrdering();
-        CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
-        CASI->setFailureOrdering(AtomicOrdering::Monotonic);
-      }
+  if (LI)
+    MadeChange |= tryExpandAtomicLoad(LI);
+  else if (SI)
+    MadeChange |= tryExpandAtomicStore(SI);
+  else if (RMWI) {
+    // There are two different ways of expanding RMW instructions:
+    // - into a load if it is idempotent
+    // - into a Cmpxchg/LL-SC loop otherwise
+    // we try them in that order.
+
+    if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+      MadeChange = true;
 
-      if (FenceOrdering != AtomicOrdering::Monotonic) {
-        MadeChange |= bracketInstWithFences(I, FenceOrdering);
-      }
-    } else if (I->hasAtomicStore() &&
-               TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
-      auto FenceOrdering = AtomicOrdering::Monotonic;
-      if (SI)
-        FenceOrdering = SI->getOrdering();
-      else if (RMWI)
-        FenceOrdering = RMWI->getOrdering();
-      else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
-                           TargetLoweringBase::AtomicExpansionKind::LLSC)
-        // LLSC is handled in expandAtomicCmpXchg().
-        FenceOrdering = CASI->getSuccessOrdering();
-
-      IRBuilder Builder(I);
-      if (auto TrailingFence =
-              TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
-        TrailingFence->moveAfter(I);
-        MadeChange = true;
-      }
+    } else {
+      MadeChange |= tryExpandAtomicRMW(RMWI);
     }
+  } else if (CASI)
+    MadeChange |= tryExpandAtomicCmpXchg(CASI);
 
-    if (LI)
-      MadeChange |= tryExpandAtomicLoad(LI);
-    else if (SI)
-      MadeChange |= tryExpandAtomicStore(SI);
-    else if (RMWI) {
-      // There are two different ways of expanding RMW instructions:
-      // - into a load if it is idempotent
-      // - into a Cmpxchg/LL-SC loop otherwise
-      // we try them in that order.
-
-      if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+  return MadeChange;
+}
+
+bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
+  const auto *Subtarget = TM->getSubtargetImpl(F);
+  if (!Subtarget->enableAtomicExpand())
+    return false;
+  TLI = Subtarget->getTargetLowering();
+  DL = &F.getDataLayout();
+
+  bool MadeChange = false;
+
+  for (BasicBlock &BB : make_early_inc_range(F)) {
+    for (Instruction &I : make_early_inc_range(reverse(BB))) {
+      // We do this iteration backwards because the control flow introducing
+      // transforms split the block at the end.
+      if (processAtomicInstr(&I))
         MadeChange = true;
-      } else {
-        MadeChange |= tryExpandAtomicRMW(RMWI);
-      }
-    } else if (CASI)
-      MadeChange |= tryExpandAtomicCmpXchg(CASI);
+    }
   }
+
   return MadeChange;
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index 9cc45fbe313b7e..0c1ca8cb7ac166 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -61,7 +61,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    shl.b32 %r27, %r26, %r2;
 ; CHECKPTX62-NEXT:    not.b32 %r3, %r27;
 ; CHECKPTX62-NEXT:    ld.u32 %r54, [%r1];
-; CHECKPTX62-NEXT:  $L__BB0_1: // %atomicrmw.start
+; CHECKPTX62-NEXT:  $L__BB0_1: // %atomicrmw.start45
 ; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECKPTX62-NEXT:    shr.u32 %r28, %r54, %r2;
 ; CHECKPTX62-NEXT:    cvt.u16.u32 %rs2, %r28;
@@ -74,9 +74,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    setp.ne.s32 %p1, %r6, %r54;
 ; CHECKPTX62-NEXT:    mov.u32 %r54, %r6;
 ; CHECKPTX62-NEXT:    @%p1 bra $L__BB0_1;
-; CHECKPTX62-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECKPTX62-NEXT:  // %bb.2: // %atomicrmw.end44
 ; CHECKPTX62-NEXT:    ld.u32 %r55, [%r1];
-; CHECKPTX62-NEXT:  $L__BB0_3: // %atomicrmw.start9
+; CHECKPTX62-NEXT:  $L__BB0_3: // %atomicrmw.start27
 ; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECKPTX62-NEXT:    shr.u32 %r33, %r55, %r2;
 ; CHECKPTX62-NEXT:    cvt.u16.u32 %rs6, %r33;
@@ -90,14 +90,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    setp.ne.s32 %p2, %r9, %r55;
 ; CHECKPTX62-NEXT:    mov.u32 %r55, %r9;
 ; CHECKPTX62-NEXT:    @%p2 bra $L__BB0_3;
-; CHECKPTX62-NEXT:  // %bb.4: // %atomicrmw.end8
+; CHECKPTX62-NEXT:  // %bb.4: // %atomicrmw.end26
 ; CHECKPTX62-NEXT:    and.b32 %r10, %r22, -4;
 ; CHECKPTX62-NEXT:    shl.b32 %r38, %r22, 3;
 ; CHECKPTX62-NEXT:    and.b32 %r11, %r38, 24;
 ; CHECKPTX62-NEXT:    shl.b32 %r40, %r26, %r11;
 ; CHECKPTX62-NEXT:    not.b32 %r12, %r40;
 ; CHECKPTX62-NEXT:    ld.global.u32 %r56, [%r10];
-; CHECKPTX62-NEXT:  $L__BB0_5: // %atomicrmw.start27
+; CHECKPTX62-NEXT:  $L__BB0_5: // %atomicrmw.start9
 ; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECKPTX62-NEXT:    shr.u32 %r41, %r56, %r11;
 ; CHECKPTX62-NEXT:    cvt.u16.u32 %rs11, %r41;
@@ -110,14 +110,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    setp.ne.s32 %p3, %r15, %r56;
 ; CHECKPTX62-NEXT:    mov.u32 %r56, %r15;
 ; CHECKPTX62-NEXT:    @%p3 bra $L__BB0_5;
-; CHECKPTX62-NEXT:  // %bb.6: // %atomicrmw.end26
+; CHECKPTX62-NEXT:  // %bb.6: // %atomicrmw.end8
 ; CHECKPTX62-NEXT:    and.b32 %r16, %r23, -4;
 ; CHECKPTX62-NEXT:    shl.b32 %r46, %r23, 3;
 ; CHECKPTX62-NEXT:    and.b32 %r17, %r46, 24;
 ; CHECKPTX62-NEXT:    shl.b32 %r48, %r26, %r17;
 ; CHECKPTX62-NEXT:    not.b32 %r18, %r48;
 ; CHECKPTX62-NEXT:    ld.shared.u32 %r57, [%r16];
-; CHECKPTX62-NEXT:  $L__BB0_7: // %atomicrmw.start45
+; CHECKPTX62-NEXT:  $L__BB0_7: // %atomicrmw.start
 ; CHECKPTX62-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECKPTX62-NEXT:    shr.u32 %r49, %r57, %r17;
 ; CHECKPTX62-NEXT:    cvt.u16.u32 %rs15, %r49;
@@ -130,7 +130,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    setp.ne.s32 %p4, %r21, %r57;
 ; CHECKPTX62-NEXT:    mov.u32 %r57, %r21;
 ; CHECKPTX62-NEXT:    @%p4 bra $L__BB0_7;
-; CHECKPTX62-NEXT:  // %bb.8: // %atomicrmw.end44
+; CHECKPTX62-NEXT:  // %bb.8: // %atomicrmw.end
 ; CHECKPTX62-NEXT:    ret;
   %r1 = atomicrmw fadd ptr %dp0, half %val seq_cst
   %r2 = atomicrmw fadd ptr %dp0, half 1.0 seq_cst
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index 9301ea44c69367..22e5033f647a65 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -63,7 +63,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    not.b32 %r3, %r27;
 ; CHECKPTX71-NEXT:    ld.u32 %r54, [%r1];
 ; CHECKPTX71-NEXT:    cvt.f32.bf16 %f2, %rs1;
-; CHECKPTX71-NEXT:  $L__BB0_1: // %atomicrmw.start
+; CHECKPTX71-NEXT:  $L__BB0_1: // %atomicrmw.start45
 ; CHECKPTX71-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECKPTX71-NEXT:    shr.u32 %r28, %r54, %r2;
 ; CHECKPTX71-NEXT:    cvt.u16.u32 %rs2, %r28;
@@ -78,9 +78,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    setp.ne.s32 %p1, %r6, %r54;
 ; CHECKPTX71-NEXT:    mov.u32 %r54, %r6;
 ; CHECKPTX71-NEXT:    @%p1 bra $L__BB0_1;
-; CHECKPTX71-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECKPTX71-NEXT:  // %bb.2: // %atomicrmw.end44
 ; CHECKPTX71-NEXT:    ld.u32 %r55, [%r1];
-; CHECKPTX71-NEXT:  $L__BB0_3: // %atomicrmw.start9
+; CHECKPTX71-NEXT:  $L__BB0_3: // %atomicrmw.start27
 ; CHECKPTX71-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECKPTX71-NEXT:    shr.u32 %r33, %r55, %r2;
 ; CHECKPTX71-NEXT:    cvt.u16.u32 %rs6, %r33;
@@ -95,14 +95,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    setp.ne.s32 %p2, %r9, %r55;
 ; CHECKPTX71-NEXT:    mov.u32 %r55, %r9;
 ; CHECKPTX71-NEXT:    @%p2 bra $L__BB0_3;
-; CHECKPTX71-NEXT:  // %bb.4: // %atomicrmw.end8
+; CHECKPTX71-NEXT:  // %bb.4: // %atomicrmw.end26
 ; CHECKPTX71-NEXT:    and.b32 %r10, %r22, -4;
 ; CHECKPTX71-NEXT:    shl.b32 %r38, %r22, 3;
 ; CHECKPTX71-NEXT:    and.b32 %r11, %r38, 24;
 ; CHECKPTX71-NEXT:    shl.b32 %r40, %r26, %r11;
 ; CHECKPTX71-NEXT:    not.b32 %r12, %r40;
 ; CHECKPTX71-NEXT:    ld.global.u32 %r56, [%r10];
-; CHECKPTX71-NEXT:  $L__BB0_5: // %atomicrmw.start27
+; CHECKPTX71-NEXT:  $L__BB0_5: // %atomicrmw.start9
 ; CHECKPTX71-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECKPTX71-NEXT:    shr.u32 %r41, %r56, %r11;
 ; CHECKPTX71-NEXT:    cvt.u16.u32 %rs10, %r41;
@@ -117,14 +117,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    setp.ne.s32 %p3, %r15, %r56;
 ; CHECKPTX71-NEXT:    mov.u32 %r56, %r15;
 ; CHECKPTX71-NEXT:    @%p3 bra $L__BB0_5;
-; CHECKPTX71-NEXT:  // %bb.6: // %atomicrmw.end26
+; CHECKPTX71-NEXT:  // %bb.6: // %atomicrmw.end8
 ; CHECKPTX71-NEXT:    and.b32 %r16, %r23, -4;
 ; CHECKPTX71-NEXT:    shl.b32 %r46, %r23, 3;
 ; CHECKPTX71-NEXT:    and.b32 %r17, %r46, 24;
 ; CHECKPTX71-NEXT:    shl.b32 %r48, %r26, %r17;
 ; CHECKPTX71-NEXT:    not.b32 %r18, %r48;
 ; CHECKPTX71-NEXT:    ld.shared.u32 %r57, [%r16];
-; CHECKPTX71-NEXT:  $L__BB0_7: // %atomicrmw.start45
+; CHECKPTX71-NEXT:  $L__BB0_7: // %atomicrmw.start
 ; CHECKPTX71-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECKPTX71-NEXT:    shr.u32 %r49, %r57, %r17;
 ; CHECKPTX71-NEXT:    cvt.u16.u32 %rs14, %r49;
@@ -139,7 +139,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    setp.ne.s32 %p4, %r21, %r57;
 ; CHECKPTX71-NEXT:    mov.u32 %r57, %r21;
 ; CHECKPTX71-NEXT:    @%p4 bra $L__BB0_7;
-; CHECKPTX71-NEXT:  // %bb.8: // %atomicrmw.end44
+; CHECKPTX71-NEXT:  // %bb.8: // %atomicrmw.end
 ; CHECKPTX71-NEXT:    ret;
   %r1 = atomicrmw fadd ptr %dp0, bfloat %val seq_cst
   %r2 = atomicrmw fadd ptr %dp0, bfloat 1.0 seq_cst
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 093253bf8f6915..531e559ea7309c 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -913,7 +913,7 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lwz 6, 4(31)
 ; AIX32-NEXT:    lwz 7, 0(31)
 ; AIX32-NEXT:    .align 4
-; AIX32-NEXT:  L..BB0_49: # %atomicrmw.start
+; AIX32-NEXT:  L..BB0_49: # %atomicrmw.start2
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    xori 3, 5, 1
 ; AIX32-NEXT:    stw 7, 72(1)
@@ -938,7 +938,7 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lwz 7, 72(1)
 ; AIX32-NEXT:    cmplwi 3, 0
 ; AIX32-NEXT:    beq 0, L..BB0_49
-; AIX32-NEXT:  # %bb.50: # %atomicrmw.end
+; AIX32-NEXT:  # %bb.50: # %atomicrmw.end1
 ; AIX32-NEXT:    lwz 31, L..C9(2) # @s128
 ; AIX32-NEXT:    addi 30, 1, 72
 ; AIX32-NEXT:    addi 29, 1, 56
@@ -947,7 +947,7 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lwz 6, 4(31)
 ; AIX32-NEXT:    lwz 7, 0(31)
 ; AIX32-NEXT:    .align 4
-; AIX32-NEXT:  L..BB0_51: # %atomicrmw.start2
+; AIX32-NEXT:  L..BB0_51: # %atomicrmw.start
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    xori 3, 5, 1
 ; AIX32-NEXT:    stw 7, 72(1)
@@ -972,13 +972,13 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lwz 7, 72(1)
 ; ...
[truncated]

aengelke · 2024-08-12T15:31:31Z

FWIW, I planned to merge the pre-isel legalizing passes (expand atomics, large div/rem, large fp, indirectbr, callbr) into one pass to avoid scanning the IR multiple times. From that perspective, I like the refactoring.

efriedma-quic

LGTM

efriedma-quic · 2024-08-12T20:12:10Z

llvm/lib/CodeGen/AtomicExpandPass.cpp

+
+  bool MadeChange = false;
+
+  for (BasicBlock &BB : make_early_inc_range(F)) {


This looks like it'll skip newly generated blocks, but I guess that's not new.

Yes, we have no way of incrementally legalizing atomics (which is what I'm trying to solve)

Move the processing of an instruction into a helper function. Also avoid redundant checking for all types of atomic instructions. Including the assert, it was effectively performing the same check 3 times.

Only keep the refactor of the main loop body into a function. Keep the worklist for now, later patch would have just un-inverted it later.

arsenm · 2024-08-13T15:50:58Z

Merge activity

Aug 13, 11:50 AM EDT: @arsenm started a stack merge that includes this pull request via Graphite.
Aug 13, 11:51 AM EDT: @arsenm merged this pull request with Graphite.

gonzalobg · 2024-08-13T18:24:06Z

llvm/lib/CodeGen/AtomicExpandPass.cpp

+    MadeChange |= tryExpandAtomicStore(SI);
+  else if (RMWI) {
+    // There are two different ways of expanding RMW instructions:
+    // - into a load if it is idempotent


This is incorrect; that's always unsound in LLVM IR, see, e.g., #56450, among others.

The bug was already there (just saw it below), so nvm.

This just moved the code around, there is no change

arsenm force-pushed the users/arsenm/atomic-expand-stop-precollecting-atomic-instrs branch 2 times, most recently from 6a4771c to f3da020 Compare August 12, 2024 15:11

arsenm requested review from ahmedbougacha, asb, efriedma-quic, gandhi56, gchatelet, jfbastien, jyknight, preames and shiltian August 12, 2024 15:14

arsenm added the llvm:codegen label Aug 12, 2024 — with Graphite App

arsenm marked this pull request as ready for review August 12, 2024 15:15

llvmbot added backend:PowerPC backend:X86 backend:NVPTX labels Aug 12, 2024

efriedma-quic approved these changes Aug 12, 2024

View reviewed changes

arsenm added 3 commits August 13, 2024 15:54

AtomicExpand: Stop precollecting atomic instructions in function

332177d

Move the processing of an instruction into a helper function. Also avoid redundant checking for all types of atomic instructions. Including the assert, it was effectively performing the same check 3 times.

Move instruction handling cases together

ac04cf0

Revert the iteration change

dc606e8

Only keep the refactor of the main loop body into a function. Keep the worklist for now, later patch would have just un-inverted it later.

arsenm force-pushed the users/arsenm/atomic-expand-stop-precollecting-atomic-instrs branch from daf9612 to dc606e8 Compare August 13, 2024 14:10

arsenm changed the title ~~AtomicExpand: Stop precollecting atomic instructions in function~~ AtomicExpand: Refactor atomic instruction handling Aug 13, 2024

arsenm mentioned this pull request Aug 13, 2024

AtomicExpand: Stop trying to prune cmpxchg extractvalue users #103211

Closed

arsenm merged commit 2d7a2c1 into main Aug 13, 2024
6 of 8 checks passed

arsenm deleted the users/arsenm/atomic-expand-stop-precollecting-atomic-instrs branch August 13, 2024 15:51

gonzalobg reviewed Aug 13, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AtomicExpand: Refactor atomic instruction handling #102914

AtomicExpand: Refactor atomic instruction handling #102914

Uh oh!

arsenm commented Aug 12, 2024

Uh oh!

arsenm commented Aug 12, 2024 •

edited

Loading

Uh oh!

llvmbot commented Aug 12, 2024 •

edited

Loading

Uh oh!

aengelke commented Aug 12, 2024

Uh oh!

efriedma-quic left a comment

Uh oh!

efriedma-quic Aug 12, 2024

Uh oh!

arsenm Aug 13, 2024

Uh oh!

arsenm commented Aug 13, 2024 •

edited

Loading

Uh oh!

Uh oh!

gonzalobg Aug 13, 2024

Uh oh!

gonzalobg Aug 13, 2024

Uh oh!

arsenm Aug 13, 2024

Uh oh!

Uh oh!


		bool MadeChange = false;

		for (BasicBlock &BB : make_early_inc_range(F)) {

AtomicExpand: Refactor atomic instruction handling #102914

AtomicExpand: Refactor atomic instruction handling #102914

Uh oh!

Conversation

arsenm commented Aug 12, 2024

Uh oh!

arsenm commented Aug 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

aengelke commented Aug 12, 2024

Uh oh!

efriedma-quic left a comment

Choose a reason for hiding this comment

Uh oh!

efriedma-quic Aug 12, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm Aug 13, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm commented Aug 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

gonzalobg Aug 13, 2024

Choose a reason for hiding this comment

Uh oh!

gonzalobg Aug 13, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm Aug 13, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

arsenm commented Aug 12, 2024 •

edited

Loading

llvmbot commented Aug 12, 2024 •

edited

Loading

arsenm commented Aug 13, 2024 •

edited

Loading