[X86,SimplifyCFG][NFC] Refactor code for #108812 #109398

phoebewang · 2024-09-20T09:57:18Z

No description provided.

llvmbot · 2024-09-20T09:57:56Z

@llvm/pr-subscribers-llvm-transforms

Author: Phoebe Wang (phoebewang)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/109398.diff

1 Files Affected:

(modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+143-138)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 69c4475a494cbe..03db86a3baae7a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -283,7 +283,7 @@ class SimplifyCFGOpt {
   bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
                                              IRBuilder<> &Builder);
 
-  bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
+  bool hoistCommonCodeFromSuccessors(Instruction *TI, bool EqTermsOnly);
   bool hoistSuccIdenticalTerminatorToSwitchOrIf(
       Instruction *TI, Instruction *I1,
       SmallVectorImpl<Instruction *> &OtherSuccTIs);
@@ -1611,12 +1611,147 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
   return false;
 }
 
+/// If the target supports conditional faulting,
+/// we look for the following pattern:
+/// \code
+///   BB:
+///     ...
+///     %cond = icmp ult %x, %y
+///     br i1 %cond, label %TrueBB, label %FalseBB
+///   FalseBB:
+///     store i32 1, ptr %q, align 4
+///     ...
+///   TrueBB:
+///     %maskedloadstore = load i32, ptr %b, align 4
+///     store i32 %maskedloadstore, ptr %p, align 4
+///     ...
+/// \endcode
+///
+/// and transform it into:
+///
+/// \code
+///   BB:
+///     ...
+///     %cond = icmp ult %x, %y
+///     %maskedloadstore = cload i32, ptr %b, %cond
+///     cstore i32 %maskedloadstore, ptr %p, %cond
+///     cstore i32 1, ptr %q, ~%cond
+///     br i1 %cond, label %TrueBB, label %FalseBB
+///   FalseBB:
+///     ...
+///   TrueBB:
+///     ...
+/// \endcode
+///
+/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
+/// e.g.
+///
+/// \code
+///   %vcond = bitcast i1 %cond to <1 x i1>
+///   %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
+///                         (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
+///   %maskedloadstore = bitcast <1 x i32> %v0 to i32
+///   call void @llvm.masked.store.v1i32.p0
+///                          (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
+///   %cond.not = xor i1 %cond, true
+///   %vcond.not = bitcast i1 %cond.not to <1 x i>
+///   call void @llvm.masked.store.v1i32.p0
+///              (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
+/// \endcode
+///
+/// So we need to turn hoisted load/store into cload/cstore.
+static void hoistConditionalLoadsStores(
+    BranchInst *BI,
+    SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
+    bool Invert) {
+  auto &Context = BI->getParent()->getContext();
+  auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
+  auto *Cond = BI->getOperand(0);
+  // Construct the condition if needed.
+  BasicBlock *BB = BI->getParent();
+  IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
+  Value *Mask = Builder.CreateBitCast(
+      Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
+      VCondTy);
+  for (auto *I : SpeculatedConditionalLoadsStores) {
+    IRBuilder<> Builder(I);
+    // We currently assume conditional faulting load/store is supported for
+    // scalar types only when creating new instructions. This can be easily
+    // extended for vector types in the future.
+    assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
+    auto *Op0 = I->getOperand(0);
+    CallInst *MaskedLoadStore = nullptr;
+    if (auto *LI = dyn_cast<LoadInst>(I)) {
+      // Handle Load.
+      auto *Ty = I->getType();
+      PHINode *PN = nullptr;
+      Value *PassThru = nullptr;
+      for (User *U : I->users())
+        if ((PN = dyn_cast<PHINode>(U))) {
+          PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB),
+                                           FixedVectorType::get(Ty, 1));
+          break;
+        }
+      MaskedLoadStore = Builder.CreateMaskedLoad(
+          FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
+      Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
+      if (PN)
+        PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
+      I->replaceAllUsesWith(NewLoadStore);
+    } else {
+      // Handle Store.
+      auto *StoredVal =
+          Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
+      MaskedLoadStore = Builder.CreateMaskedStore(
+          StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
+    }
+    // For non-debug metadata, only !annotation, !range, !nonnull and !align are
+    // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
+    //
+    // !nonnull, !align : Not support pointer type, no need to keep.
+    // !range: Load type is changed from scalar to vector, but the metadata on
+    //         vector specifies a per-element range, so the semantics stay the
+    //         same. Keep it.
+    // !annotation: Not impact semantics. Keep it.
+    if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
+      MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
+    I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
+    // FIXME: DIAssignID is not supported for masked store yet.
+    // (Verifier::visitDIAssignIDMetadata)
+    at::deleteAssignmentMarkers(I);
+    I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
+      return Node->getMetadataID() == Metadata::DIAssignIDKind;
+    });
+    MaskedLoadStore->copyMetadata(*I);
+    I->eraseFromParent();
+  }
+}
+
+static bool isSafeCheapLoadStore(const Instruction *I,
+                                 const TargetTransformInfo &TTI) {
+  // Not handle volatile or atomic.
+  if (auto *L = dyn_cast<LoadInst>(I)) {
+    if (!L->isSimple())
+      return false;
+  } else if (auto *S = dyn_cast<StoreInst>(I)) {
+    if (!S->isSimple())
+      return false;
+  } else
+    return false;
+
+  // llvm.masked.load/store use i32 for alignment while load/store use i64.
+  // That's why we have the alignment limitation.
+  // FIXME: Update the prototype of the intrinsics?
+  return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
+         getLoadStoreAlignment(I) < Value::MaximumAlignment;
+}
+
 /// Hoist any common code in the successor blocks up into the block. This
 /// function guarantees that BB dominates all successors. If EqTermsOnly is
 /// given, only perform hoisting in case both blocks only contain a terminator.
 /// In that case, only the original BI will be replaced and selects for PHIs are
 /// added.
-bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
+bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
                                                    bool EqTermsOnly) {
   // This does very trivial matching, with limited scanning, to find identical
   // instructions in the two blocks. In particular, we don't want to get into
@@ -1624,6 +1759,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
   // such, we currently just scan for obviously identical instructions in an
   // identical order, possibly separated by the same number of non-identical
   // instructions.
+  BasicBlock *BB = TI->getParent();
   unsigned int SuccSize = succ_size(BB);
   if (SuccSize < 2)
     return false;
@@ -1635,8 +1771,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
     if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
       return false;
 
-  auto *TI = BB->getTerminator();
-
   // The second of pair is a SkipFlags bitmask.
   using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
   SmallVector<SuccIterPair, 8> SuccIterPairs;
@@ -2997,25 +3131,6 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
   return BIEndProb < Likely;
 }
 
-static bool isSafeCheapLoadStore(const Instruction *I,
-                                 const TargetTransformInfo &TTI) {
-  // Not handle volatile or atomic.
-  if (auto *L = dyn_cast<LoadInst>(I)) {
-    if (!L->isSimple())
-      return false;
-  } else if (auto *S = dyn_cast<StoreInst>(I)) {
-    if (!S->isSimple())
-      return false;
-  } else
-    return false;
-
-  // llvm.masked.load/store use i32 for alignment while load/store use i64.
-  // That's why we have the alignment limitation.
-  // FIXME: Update the prototype of the intrinsics?
-  return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
-         getLoadStoreAlignment(I) < Value::MaximumAlignment;
-}
-
 /// Speculate a conditional basic block flattening the CFG.
 ///
 /// Note that this is a very risky transform currently. Speculating
@@ -3267,118 +3382,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
   BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
              std::prev(ThenBB->end()));
 
-  // If the target supports conditional faulting,
-  // we look for the following pattern:
-  // \code
-  //   BB:
-  //     ...
-  //     %cond = icmp ult %x, %y
-  //     br i1 %cond, label %TrueBB, label %FalseBB
-  //   FalseBB:
-  //     store i32 1, ptr %q, align 4
-  //     ...
-  //   TrueBB:
-  //     %maskedloadstore = load i32, ptr %b, align 4
-  //     store i32 %maskedloadstore, ptr %p, align 4
-  //     ...
-  // \endcode
-  //
-  // and transform it into:
-  //
-  // \code
-  //   BB:
-  //     ...
-  //     %cond = icmp ult %x, %y
-  //     %maskedloadstore = cload i32, ptr %b, %cond
-  //     cstore i32 %maskedloadstore, ptr %p, %cond
-  //     cstore i32 1, ptr %q, ~%cond
-  //     br i1 %cond, label %TrueBB, label %FalseBB
-  //   FalseBB:
-  //     ...
-  //   TrueBB:
-  //     ...
-  // \endcode
-  //
-  // where cload/cstore are represented by llvm.masked.load/store intrinsics,
-  // e.g.
-  //
-  // \code
-  //   %vcond = bitcast i1 %cond to <1 x i1>
-  //   %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
-  //                         (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
-  //   %maskedloadstore = bitcast <1 x i32> %v0 to i32
-  //   call void @llvm.masked.store.v1i32.p0
-  //                          (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
-  //   %cond.not = xor i1 %cond, true
-  //   %vcond.not = bitcast i1 %cond.not to <1 x i>
-  //   call void @llvm.masked.store.v1i32.p0
-  //              (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
-  // \endcode
-  //
-  // So we need to turn hoisted load/store into cload/cstore.
-  auto &Context = BI->getParent()->getContext();
-  auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
-  auto *Cond = BI->getOperand(0);
-  Value *Mask = nullptr;
-  // Construct the condition if needed.
-  if (!SpeculatedConditionalLoadsStores.empty()) {
-    IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
-    Mask = Builder.CreateBitCast(
-        Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
-        VCondTy);
-  }
-  for (auto *I : SpeculatedConditionalLoadsStores) {
-    IRBuilder<> Builder(I);
-    // We currently assume conditional faulting load/store is supported for
-    // scalar types only when creating new instructions. This can be easily
-    // extended for vector types in the future.
-    assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
-    auto *Op0 = I->getOperand(0);
-    CallInst *MaskedLoadStore = nullptr;
-    if (auto *LI = dyn_cast<LoadInst>(I)) {
-      // Handle Load.
-      auto *Ty = I->getType();
-      PHINode *PN = nullptr;
-      Value *PassThru = nullptr;
-      for (User *U : I->users())
-        if ((PN = dyn_cast<PHINode>(U))) {
-          PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB),
-                                           FixedVectorType::get(Ty, 1));
-          break;
-        }
-      MaskedLoadStore = Builder.CreateMaskedLoad(
-          FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
-      Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
-      if (PN)
-        PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
-      I->replaceAllUsesWith(NewLoadStore);
-    } else {
-      // Handle Store.
-      auto *StoredVal =
-          Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
-      MaskedLoadStore = Builder.CreateMaskedStore(
-          StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
-    }
-    // For non-debug metadata, only !annotation, !range, !nonnull and !align are
-    // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
-    //
-    // !nonnull, !align : Not support pointer type, no need to keep.
-    // !range: Load type is changed from scalar to vector, but the metadata on
-    //         vector specifies a per-element range, so the semantics stay the
-    //         same. Keep it.
-    // !annotation: Not impact semantics. Keep it.
-    if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
-      MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
-    I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
-    // FIXME: DIAssignID is not supported for masked store yet.
-    // (Verifier::visitDIAssignIDMetadata)
-    at::deleteAssignmentMarkers(I);
-    I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
-      return Node->getMetadataID() == Metadata::DIAssignIDKind;
-    });
-    MaskedLoadStore->copyMetadata(*I);
-    I->eraseFromParent();
-  }
+  if (!SpeculatedConditionalLoadsStores.empty())
+    hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
 
   // Insert selects and rewrite the PHI operands.
   IRBuilder<NoFolder> Builder(BI);
@@ -7449,7 +7454,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
     return requestResimplify();
 
   if (HoistCommon &&
-      hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
+      hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
     return requestResimplify();
 
   return false;
@@ -7807,8 +7812,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // can hoist it up to the branching block.
   if (BI->getSuccessor(0)->getSinglePredecessor()) {
     if (BI->getSuccessor(1)->getSinglePredecessor()) {
-      if (HoistCommon && hoistCommonCodeFromSuccessors(
-                             BI->getParent(), !Options.HoistCommonInsts))
+      if (HoistCommon &&
+          hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
         return requestResimplify();
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally

[X86,SimplifyCFG][NFC] Refactor code for llvm#108812

978bf0f

phoebewang requested review from RKSimon, dtcxzyw and KanRobert September 20, 2024 09:57

llvmbot added the llvm:transforms label Sep 20, 2024

phoebewang mentioned this pull request Sep 20, 2024

[X86,SimplifyCFG] Support hoisting load/store with conditional faulting (Part II) #108812

Merged

phoebewang merged commit c9e5c42 into llvm:main Sep 21, 2024
8 of 10 checks passed

phoebewang deleted the APX branch September 21, 2024 12:18

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86,SimplifyCFG][NFC] Refactor code for #108812 #109398

[X86,SimplifyCFG][NFC] Refactor code for #108812 #109398

Uh oh!

phoebewang commented Sep 20, 2024

Uh oh!

llvmbot commented Sep 20, 2024

Uh oh!

Uh oh!

Uh oh!

[X86,SimplifyCFG][NFC] Refactor code for #108812 #109398

[X86,SimplifyCFG][NFC] Refactor code for #108812 #109398

Uh oh!

Conversation

phoebewang commented Sep 20, 2024

Uh oh!

llvmbot commented Sep 20, 2024

Uh oh!

Uh oh!

Uh oh!