Skip to content

Revert "[LoopIdiom] Support 'shift until less-than' idiom (#95002)" #98065

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 8, 2024

Conversation

hazzlim
Copy link
Contributor

@hazzlim hazzlim commented Jul 8, 2024

Reverts #95002 while I investigate buildbot failure.

This reverts commit 83b01aa.

Reverts llvm#95002 while I investigate buildbot failure.

This reverts commit 83b01aa.
@llvmbot
Copy link
Member

llvmbot commented Jul 8, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Hari Limaye (hazzlim)

Changes

Reverts #95002 while I investigate buildbot failure.

This reverts commit 83b01aa.


Patch is 46.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98065.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (+36-255)
  • (removed) llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll (-778)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index b3e3e1e71f1010..635bd1236196e5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -231,19 +231,12 @@ class LoopIdiomRecognize {
   bool recognizePopcount();
   void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
                                PHINode *CntPhi, Value *Var);
-  bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
-                               bool ZeroCheck, size_t CanonicalSize);
-  bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
-                             Instruction *DefX, PHINode *CntPhi,
-                             Instruction *CntInst);
   bool recognizeAndInsertFFS();  /// Find First Set: ctlz or cttz
-  bool recognizeShiftUntilLessThan();
   void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
                                 Instruction *CntInst, PHINode *CntPhi,
                                 Value *Var, Instruction *DefX,
                                 const DebugLoc &DL, bool ZeroCheck,
-                                bool IsCntPhiUsedOutsideLoop,
-                                bool InsertSub = false);
+                                bool IsCntPhiUsedOutsideLoop);
 
   bool recognizeShiftUntilBitTest();
   bool recognizeShiftUntilZero();
@@ -1489,8 +1482,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
                     << CurLoop->getHeader()->getName() << "\n");
 
   return recognizePopcount() || recognizeAndInsertFFS() ||
-         recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
-         recognizeShiftUntilLessThan();
+         recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
 }
 
 /// Check if the given conditional branch is based on the comparison between
@@ -1525,34 +1517,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
   return nullptr;
 }
 
-/// Check if the given conditional branch is based on an unsigned less-than
-/// comparison between a variable and a constant, and if the comparison is false
-/// the control yields to the loop entry. If the branch matches the behaviour,
-/// the variable involved in the comparison is returned.
-static Value *matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry,
-                                     uint64_t &Threshold) {
-  if (!BI || !BI->isConditional())
-    return nullptr;
-
-  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
-  if (!Cond)
-    return nullptr;
-
-  ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
-  if (!CmpConst)
-    return nullptr;
-
-  BasicBlock *FalseSucc = BI->getSuccessor(1);
-  ICmpInst::Predicate Pred = Cond->getPredicate();
-
-  if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
-    Threshold = CmpConst->getZExtValue();
-    return Cond->getOperand(0);
-  }
-
-  return nullptr;
-}
-
 // Check if the recurrence variable `VarX` is in the right form to create
 // the idiom. Returns the value coerced to a PHINode if so.
 static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
@@ -1564,107 +1528,6 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
   return nullptr;
 }
 
-/// Return true if the idiom is detected in the loop.
-///
-/// Additionally:
-/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
-///       or nullptr if there is no such.
-/// 2) \p CntPhi is set to the corresponding phi node
-///       or nullptr if there is no such.
-/// 3) \p InitX is set to the value whose CTLZ could be used.
-/// 4) \p DefX is set to the instruction calculating Loop exit condition.
-/// 5) \p Threshold is set to the constant involved in the unsigned less-than
-///       comparison.
-///
-/// The core idiom we are trying to detect is:
-/// \code
-///    if (x0 < 2)
-///      goto loop-exit // the precondition of the loop
-///    cnt0 = init-val
-///    do {
-///      x = phi (x0, x.next);   //PhiX
-///      cnt = phi (cnt0, cnt.next)
-///
-///      cnt.next = cnt + 1;
-///       ...
-///      x.next = x >> 1;   // DefX
-///    } while (x >= 4)
-/// loop-exit:
-/// \endcode
-static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL,
-                                          Intrinsic::ID &IntrinID,
-                                          Value *&InitX, Instruction *&CntInst,
-                                          PHINode *&CntPhi, Instruction *&DefX,
-                                          uint64_t &Threshold) {
-  BasicBlock *LoopEntry;
-
-  DefX = nullptr;
-  CntInst = nullptr;
-  CntPhi = nullptr;
-  LoopEntry = *(CurLoop->block_begin());
-
-  // step 1: Check if the loop-back branch is in desirable form.
-  if (Value *T = matchShiftULTCondition(
-          dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
-          Threshold))
-    DefX = dyn_cast<Instruction>(T);
-  else
-    return false;
-
-  // step 2: Check the recurrence of variable X
-  if (!DefX || !isa<PHINode>(DefX))
-    return false;
-
-  PHINode *VarPhi = cast<PHINode>(DefX);
-  int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
-  if (Idx == -1)
-    return false;
-
-  DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
-  if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
-    return false;
-
-  // step 3: detect instructions corresponding to "x.next = x >> 1"
-  if (DefX->getOpcode() != Instruction::LShr)
-    return false;
-
-  IntrinID = Intrinsic::ctlz;
-  ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
-  if (!Shft || !Shft->isOne())
-    return false;
-
-  InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
-
-  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
-  //         or cnt.next = cnt + -1.
-  // TODO: We can skip the step. If loop trip count is known (CTLZ),
-  //       then all uses of "cnt.next" could be optimized to the trip count
-  //       plus "cnt0". Currently it is not optimized.
-  //       This step could be used to detect POPCNT instruction:
-  //       cnt.next = cnt + (x.next & 1)
-  for (Instruction &Inst : llvm::make_range(
-           LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
-    if (Inst.getOpcode() != Instruction::Add)
-      continue;
-
-    ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
-    if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
-      continue;
-
-    PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
-    if (!Phi)
-      continue;
-
-    CntInst = &Inst;
-    CntPhi = Phi;
-    break;
-  }
-  if (!CntInst)
-    return false;
-
-  return true;
-}
-
 /// Return true iff the idiom is detected in the loop.
 ///
 /// Additionally:
@@ -1893,35 +1756,27 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
   return true;
 }
 
-// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
-// profitable if we delete the loop.
-bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
-                                                 Value *InitX, bool ZeroCheck,
-                                                 size_t CanonicalSize) {
-  const Value *Args[] = {InitX,
-                         ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
+  // Give up if the loop has multiple blocks or multiple backedges.
+  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+    return false;
 
-  // @llvm.dbg doesn't count as they have no semantic effect.
-  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
-  uint32_t HeaderSize =
-      std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+  Intrinsic::ID IntrinID;
+  Value *InitX;
+  Instruction *DefX = nullptr;
+  PHINode *CntPhi = nullptr;
+  Instruction *CntInst = nullptr;
+  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
+  // this is always 6.
+  size_t IdiomCanonicalSize = 6;
 
-  IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
-  InstructionCost Cost = TTI->getIntrinsicInstrCost(
-      Attrs, TargetTransformInfo::TCK_SizeAndLatency);
-  if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
+  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
+                                 CntInst, CntPhi, DefX))
     return false;
 
-  return true;
-}
-
-/// Convert CTLZ / CTTZ idiom loop into countable loop.
-/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
-/// returns false.
-bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
-                                               Value *InitX, Instruction *DefX,
-                                               PHINode *CntPhi,
-                                               Instruction *CntInst) {
   bool IsCntPhiUsedOutsideLoop = false;
   for (User *U : CntPhi->users())
     if (!CurLoop->contains(cast<Instruction>(U))) {
@@ -1963,107 +1818,35 @@ bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
     ZeroCheck = true;
   }
 
-  // FFS idiom loop has only 6 instructions:
+  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+  // profitable if we delete the loop.
+
+  // the loop has only 6 instructions:
   //  %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
   //  %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
   //  %shr = ashr %n.addr.0, 1
   //  %tobool = icmp eq %shr, 0
   //  %inc = add nsw %i.0, 1
   //  br i1 %tobool
-  size_t IdiomCanonicalSize = 6;
-  if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
-    return false;
-
-  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
-                           DefX->getDebugLoc(), ZeroCheck,
-                           IsCntPhiUsedOutsideLoop);
-  return true;
-}
-
-/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
-/// trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertFFS() {
-  // Give up if the loop has multiple blocks or multiple backedges.
-  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
-    return false;
-
-  Intrinsic::ID IntrinID;
-  Value *InitX;
-  Instruction *DefX = nullptr;
-  PHINode *CntPhi = nullptr;
-  Instruction *CntInst = nullptr;
-
-  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
-                                 DefX))
-    return false;
 
-  return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
-}
-
-bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
-  // Give up if the loop has multiple blocks or multiple backedges.
-  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
-    return false;
-
-  Intrinsic::ID IntrinID;
-  Value *InitX;
-  Instruction *DefX = nullptr;
-  PHINode *CntPhi = nullptr;
-  Instruction *CntInst = nullptr;
-
-  uint64_t LoopThreshold;
-  if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
-                                     CntPhi, DefX, LoopThreshold))
-    return false;
-
-  if (LoopThreshold == 2) {
-    // Treat as regular FFS.
-    return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
-  }
-
-  // Look for Floor Log2 Idiom.
-  if (LoopThreshold != 4)
-    return false;
-
-  // Abort if CntPhi is used outside of the loop.
-  for (User *U : CntPhi->users())
-    if (!CurLoop->contains(cast<Instruction>(U)))
-      return false;
-
-  // It is safe to assume Preheader exist as it was checked in
-  // parent function RunOnLoop.
-  BasicBlock *PH = CurLoop->getLoopPreheader();
-  auto *PreCondBB = PH->getSinglePredecessor();
-  if (!PreCondBB)
-    return false;
-  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
-  if (!PreCondBI)
-    return false;
-
-  uint64_t PreLoopThreshold;
-  if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
-      PreLoopThreshold != 2)
-    return false;
+  const Value *Args[] = {InitX,
+                         ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
 
-  bool ZeroCheck = true;
+  // @llvm.dbg doesn't count as they have no semantic effect.
+  auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+  uint32_t HeaderSize =
+      std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
 
-  // the loop has only 6 instructions:
-  //  %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
-  //  %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
-  //  %shr = ashr %n.addr.0, 1
-  //  %tobool = icmp ult %n.addr.0, C
-  //  %inc = add nsw %i.0, 1
-  //  br i1 %tobool
-  size_t IdiomCanonicalSize = 6;
-  if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+  IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
+  InstructionCost Cost =
+    TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
+  if (HeaderSize != IdiomCanonicalSize &&
+      Cost > TargetTransformInfo::TCC_Basic)
     return false;
 
-  // log2(x) = w − 1 − clz(x)
   transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
                            DefX->getDebugLoc(), ZeroCheck,
-                           /*IsCntPhiUsedOutsideLoop=*/false,
-                           /*InsertSub=*/true);
+                           IsCntPhiUsedOutsideLoop);
   return true;
 }
 
@@ -2178,7 +1961,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
 void LoopIdiomRecognize::transformLoopToCountable(
     Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
     PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
-    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
+    bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
   BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
 
   // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -2208,8 +1991,6 @@ void LoopIdiomRecognize::transformLoopToCountable(
   Type *CountTy = Count->getType();
   Count = Builder.CreateSub(
       ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
-  if (InsertSub)
-    Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
   Value *NewCount = Count;
   if (IsCntPhiUsedOutsideLoop)
     Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll b/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
deleted file mode 100644
index 47ae4fd5b66a7c..00000000000000
--- a/llvm/test/Transforms/LoopIdiom/AArch64/ctlz.ll
+++ /dev/null
@@ -1,778 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -passes=loop-idiom -mtriple=aarch64 < %s -S | FileCheck %s
-
-; Recognize CTLZ builtin pattern.
-; Here we'll just convert loop to countable,
-; so do not insert builtin if CPU do not support CTLZ
-;
-; int ctlz_and_other(int n, char *a)
-; {
-;   n = n >= 0 ? n : -n;
-;   int i = 0, n0 = n;
-;   while(n >>= 1) {
-;     a[i] = (n0 & (1 << i)) ? 1 : 0;
-;     i++;
-;   }
-;   return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind uwtable
-define i32 @ctlz_and_other(i32 %n, ptr nocapture %a) {
-; CHECK-LABEL: define i32 @ctlz_and_other(
-; CHECK-SAME: i32 [[N:%.*]], ptr nocapture [[A:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    [[SHR8:%.*]] = lshr i32 [[ABS_N]], 1
-; CHECK-NEXT:    [[TOBOOL9:%.*]] = icmp eq i32 [[SHR8]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHR8]], i1 true)
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[SHR11:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[SHR8]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[TMP3]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[ABS_N]]
-; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[TOBOOL1]] to i8
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[SHR]] = ashr i32 [[SHR11]], 1
-; CHECK-NEXT:    [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[TMP2]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_LCSSA]] to i32
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[I_0_LCSSA]]
-;
-entry:
-  %abs_n = call i32 @llvm.abs.i32(i32 %n, i1 true)
-  %shr8 = lshr i32 %abs_n, 1
-  %tobool9 = icmp eq i32 %shr8, 0
-  br i1 %tobool9, label %while.end, label %while.body.preheader
-
-while.body.preheader:                             ; preds = %entry
-  br label %while.body
-
-while.body:                                       ; preds = %while.body.preheader, %while.body
-  %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
-  %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
-  %0 = trunc i64 %indvars.iv to i32
-  %shl = shl i32 1, %0
-  %and = and i32 %shl, %abs_n
-  %tobool1 = icmp ne i32 %and, 0
-  %conv = zext i1 %tobool1 to i8
-  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
-  store i8 %conv, ptr %arrayidx, align 1
-  %indvars.iv.next = add nuw i64 %indvars.iv, 1
-  %shr = ashr i32 %shr11, 1
-  %tobool = icmp eq i32 %shr, 0
-  br i1 %tobool, label %while.end.loopexit, label %while.body
-
-while.end.loopexit:                               ; preds = %while.body
-  %1 = trunc i64 %indvars.iv.next to i32
-  br label %while.end
-
-while.end:                                        ; preds = %while.end.loopexit, %entry
-  %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
-  ret i32 %i.0.lcssa
-}
-
-; Recognize CTLZ builtin pattern.
-; Here it will replace the loop -
-; assume builtin is always profitable.
-;
-; int ctlz_zero_check(int n)
-; {
-;   n = n >= 0 ? n : -n;
-;   int i = 0;
-;   while(n) {
-;     n >>= 1;
-;     i++;
-;   }
-;   return i;
-; }
-;
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @ctlz_zero_check(i32 %n) {
-; CHECK-LABEL: define i32 @ctlz_zero_check(
-; CHECK-SAME: i32 [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ABS_N:%.*]] = call i32 @llvm.abs.i32(i32 [[N]], i1 true)
-; CHECK-NEXT:    [[TOBOOL4:%.*]] = icmp eq i32 [[ABS_N]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
-; CHECK:       while.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[ABS_N]], i1 true)
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 32, [[TMP0]]
-; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT:    [[I_06:%.*]] ...
[truncated]

@hazzlim hazzlim merged commit ea39f97 into llvm:main Jul 8, 2024
5 of 8 checks passed
Copy link

github-actions bot commented Jul 8, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 87e914db85572c4d9fe924616141dcc9a5d66177 9a46c4662bdbb8f74e95c377b82c1f3b53a4e86a -- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
View the diff from clang-format here.
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 635bd12361..c5e24e0516 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -231,7 +231,7 @@ private:
   bool recognizePopcount();
   void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
                                PHINode *CntPhi, Value *Var);
-  bool recognizeAndInsertFFS();  /// Find First Set: ctlz or cttz
+  bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
   void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
                                 Instruction *CntInst, PHINode *CntPhi,
                                 Value *Var, Instruction *DefX,
@@ -1773,8 +1773,8 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
   // this is always 6.
   size_t IdiomCanonicalSize = 6;
 
-  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
-                                 CntInst, CntPhi, DefX))
+  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
+                                 DefX))
     return false;
 
   bool IsCntPhiUsedOutsideLoop = false;
@@ -1838,10 +1838,9 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
       std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
 
   IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
-  InstructionCost Cost =
-    TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
-  if (HeaderSize != IdiomCanonicalSize &&
-      Cost > TargetTransformInfo::TCC_Basic)
+  InstructionCost Cost = TTI->getIntrinsicInstrCost(
+      Attrs, TargetTransformInfo::TCK_SizeAndLatency);
+  if (HeaderSize != IdiomCanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
     return false;
 
   transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants