Skip to content

Commit 25067f1

Browse files
committed
[LoopIdiomRecognize] Teach detectShiftUntilZeroIdiom to recognize loops where the counter is decrementing.
This adds support for loops like unsigned clz(unsigned x) { unsigned w = sizeof (x) * CHAR_BIT; while (x) { w--; x >>= 1; } return w; } and unsigned clz(unsigned x) { unsigned w = sizeof (x) * CHAR_BIT - 1; while (x >>= 1) { w--; } return w; } To support these we look for add x, -1 as well as add x, 1 that we already matched. If the value was -1 we need to subtract from the initial counter value instead of adding to it. Fixes PR48404. Differential Revision: https://reviews.llvm.org/D92745
1 parent b3ee7f1 commit 25067f1

File tree

3 files changed

+47
-15
lines changed

3 files changed

+47
-15
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
14751475
return false;
14761476

14771477
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1478+
// or cnt.next = cnt + -1.
14781479
// TODO: We can skip the step. If loop trip count is known (CTLZ),
14791480
// then all uses of "cnt.next" could be optimized to the trip count
14801481
// plus "cnt0". Currently it is not optimized.
@@ -1488,7 +1489,7 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
14881489
continue;
14891490

14901491
ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1491-
if (!Inc || !Inc->isOne())
1492+
if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
14921493
continue;
14931494

14941495
PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
@@ -1751,11 +1752,18 @@ void LoopIdiomRecognize::transformLoopToCountable(
17511752
NewCount = Builder.CreateZExtOrTrunc(NewCount,
17521753
cast<IntegerType>(CntInst->getType()));
17531754

1754-
// If the counter's initial value is not zero, insert Add Inst.
17551755
Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1756-
ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1757-
if (!InitConst || !InitConst->isZero())
1758-
NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1756+
if (cast<ConstantInt>(CntInst->getOperand(1))->isOne()) {
1757+
// If the counter was being incremented in the loop, add NewCount to the
1758+
// counter's initial value, but only if the initial value is not zero.
1759+
ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1760+
if (!InitConst || !InitConst->isZero())
1761+
NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1762+
} else {
1763+
// If the count was being decremented in the loop, subtract NewCount from
1764+
// the counter's initial value.
1765+
NewCount = Builder.CreateSub(CntInitVal, NewCount);
1766+
}
17591767

17601768
// Step 2: Insert new IV and loop condition:
17611769
// loop:

llvm/test/Transforms/LoopIdiom/X86/ctlz.ll

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -693,16 +693,21 @@ define i32 @ctlz_decrement(i32 %n) {
693693
; ALL-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
694694
; ALL-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
695695
; ALL: while.body.preheader:
696+
; ALL-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[N]], i1 true)
697+
; ALL-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
698+
; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
696699
; ALL-NEXT: br label [[WHILE_BODY:%.*]]
697700
; ALL: while.body:
701+
; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
698702
; ALL-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 32, [[WHILE_BODY_PREHEADER]] ]
699703
; ALL-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
700704
; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_05]], 1
701705
; ALL-NEXT: [[INC]] = add nsw i32 [[I_06]], -1
702-
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
706+
; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
707+
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
703708
; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
704709
; ALL: while.end.loopexit:
705-
; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
710+
; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY]] ]
706711
; ALL-NEXT: br label [[WHILE_END]]
707712
; ALL: while.end:
708713
; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 32, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
@@ -747,16 +752,23 @@ while.end: ; preds = %while.end.loopexit,
747752
define i32 @ctlz_lshr_decrement(i32 %n) {
748753
; ALL-LABEL: @ctlz_lshr_decrement(
749754
; ALL-NEXT: entry:
755+
; ALL-NEXT: [[TMP0:%.*]] = lshr i32 [[N:%.*]], 1
756+
; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 false)
757+
; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
758+
; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
759+
; ALL-NEXT: [[TMP4:%.*]] = sub i32 31, [[TMP2]]
750760
; ALL-NEXT: br label [[WHILE_COND:%.*]]
751761
; ALL: while.cond:
752-
; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
762+
; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
763+
; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHR:%.*]], [[WHILE_COND]] ]
753764
; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 31, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
754765
; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1
755-
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
766+
; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
767+
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
756768
; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], -1
757769
; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
758770
; ALL: while.end:
759-
; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[I_0]], [[WHILE_COND]] ]
771+
; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
760772
; ALL-NEXT: ret i32 [[I_0_LCSSA]]
761773
;
762774
entry:

llvm/test/Transforms/LoopIdiom/X86/cttz.ll

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,16 +133,21 @@ define i32 @cttz_decrement(i32 %n) {
133133
; ALL-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
134134
; ALL-NEXT: br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
135135
; ALL: while.body.preheader:
136+
; ALL-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[N]], i1 true)
137+
; ALL-NEXT: [[TMP1:%.*]] = sub i32 32, [[TMP0]]
138+
; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
136139
; ALL-NEXT: br label [[WHILE_BODY:%.*]]
137140
; ALL: while.body:
141+
; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TCDEC:%.*]], [[WHILE_BODY]] ]
138142
; ALL-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 32, [[WHILE_BODY_PREHEADER]] ]
139143
; ALL-NEXT: [[N_ADDR_05:%.*]] = phi i32 [ [[SHL:%.*]], [[WHILE_BODY]] ], [ [[N]], [[WHILE_BODY_PREHEADER]] ]
140144
; ALL-NEXT: [[SHL]] = shl i32 [[N_ADDR_05]], 1
141145
; ALL-NEXT: [[INC]] = add nsw i32 [[I_06]], -1
142-
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHL]], 0
146+
; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
147+
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
143148
; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
144149
; ALL: while.end.loopexit:
145-
; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
150+
; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY]] ]
146151
; ALL-NEXT: br label [[WHILE_END]]
147152
; ALL: while.end:
148153
; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 32, [[ENTRY:%.*]] ], [ [[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
@@ -187,16 +192,23 @@ while.end: ; preds = %while.end.loopexit,
187192
define i32 @cttz_shl_decrement(i32 %n) {
188193
; ALL-LABEL: @cttz_shl_decrement(
189194
; ALL-NEXT: entry:
195+
; ALL-NEXT: [[TMP0:%.*]] = shl i32 [[N:%.*]], 1
196+
; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false)
197+
; ALL-NEXT: [[TMP2:%.*]] = sub i32 32, [[TMP1]]
198+
; ALL-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
199+
; ALL-NEXT: [[TMP4:%.*]] = sub i32 31, [[TMP2]]
190200
; ALL-NEXT: br label [[WHILE_COND:%.*]]
191201
; ALL: while.cond:
192-
; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHL:%.*]], [[WHILE_COND]] ]
202+
; ALL-NEXT: [[TCPHI:%.*]] = phi i32 [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TCDEC:%.*]], [[WHILE_COND]] ]
203+
; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[SHL:%.*]], [[WHILE_COND]] ]
193204
; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 31, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
194205
; ALL-NEXT: [[SHL]] = shl i32 [[N_ADDR_0]], 1
195-
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHL]], 0
206+
; ALL-NEXT: [[TCDEC]] = sub nsw i32 [[TCPHI]], 1
207+
; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TCDEC]], 0
196208
; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], -1
197209
; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
198210
; ALL: while.end:
199-
; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[I_0]], [[WHILE_COND]] ]
211+
; ALL-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[WHILE_COND]] ]
200212
; ALL-NEXT: ret i32 [[I_0_LCSSA]]
201213
;
202214
entry:

0 commit comments

Comments
 (0)