Skip to content

Commit 0633d5c

Browse files
committed
[LoopIdiom] 'logical right-shift until zero' ('count active bits') "on steroids" idiom recognition.
I think i've added exhaustive test coverage, and i have verified that alive2 is happy with all the tests, so in principle i'm fine with landing this without review, but just in case.. This adds support for the "count active bits" pattern, i.e.: ``` int countActiveBits(unsigned val) { int cnt = 0; for( ; (val >> cnt) != 0; ++cnt) ; return cnt; } ``` but a somewhat more general one, since that is what i need: ``` int countActiveBits(unsigned val, int start, int off) { int cnt; for (cnt = start; val >> (cnt + off); cnt++) ; return cnt; } ``` I've followed in footstep of 'left-shift until bittest' idiom (D91038), in the sense that iff the `ctlz` intrinsic is cheap, we'll transform, regardless of all other factors. This can have a shocking effect on certain benchmarks: ``` raw.pixls.us-unique/Olympus/XZ-1$ /repositories/googlebenchmark/tools/compare.py -a benchmarks ~/rawspeed/build-{old,new}/src/utilities/rsbench/rsbench --benchmark_counters_tabular=true --benchmark_min_time=0.00000001 --benchmark_repetitions=128 p1319978.orf RUNNING: /home/lebedevri/rawspeed/build-old/src/utilities/rsbench/rsbench --benchmark_counters_tabular=true --benchmark_min_time=0.00000001 --benchmark_repetitions=128 p1319978.orf --benchmark_display_aggregates_only=true --benchmark_out=/tmp/tmp49_28zcm 2021-05-09T01:06:05+03:00 Running /home/lebedevri/rawspeed/build-old/src/utilities/rsbench/rsbench Run on (32 X 3600.24 MHz CPU s) CPU Caches: L1 Data 32 KiB (x16) L1 Instruction 32 KiB (x16) L2 Unified 512 KiB (x16) L3 Unified 32768 KiB (x2) Load Average: 5.26, 6.29, 3.49 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations CPUTime,s CPUTime/WallTime Pixels Pixels/CPUTime Pixels/WallTime Raws/CPUTime Raws/WallTime WallTime,s ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ p1319978.orf/threads:32/process_time/real_time_mean 145 ms 145 ms 128 0.145319 0.999981 10.1568M 69.8949M 69.8936M 6.88159 6.88146 0.145322 p1319978.orf/threads:32/process_time/real_time_median 145 ms 145 ms 128 0.145317 0.999986 10.1568M 69.8941M 69.8931M 6.88151 6.88141 0.145319 p1319978.orf/threads:32/process_time/real_time_stddev 0.766 ms 0.766 ms 128 766.586u 15.1302u 0 354.167k 354.098k 0.0348699 0.0348631 766.469u RUNNING: /home/lebedevri/rawspeed/build-new/src/utilities/rsbench/rsbench --benchmark_counters_tabular=true --benchmark_min_time=0.00000001 --benchmark_repetitions=128 p1319978.orf --benchmark_display_aggregates_only=true --benchmark_out=/tmp/tmpwb9sw2x0 2021-05-09T01:06:24+03:00 Running /home/lebedevri/rawspeed/build-new/src/utilities/rsbench/rsbench Run on (32 X 3599.95 MHz CPU s) CPU Caches: L1 Data 32 KiB (x16) L1 Instruction 32 KiB (x16) L2 Unified 512 KiB (x16) L3 Unified 32768 KiB (x2) Load Average: 4.05, 5.95, 3.43 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations CPUTime,s CPUTime/WallTime Pixels Pixels/CPUTime Pixels/WallTime Raws/CPUTime Raws/WallTime WallTime,s ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ p1319978.orf/threads:32/process_time/real_time_mean 99.8 ms 99.8 ms 128 0.0997758 0.999972 10.1568M 101.797M 101.794M 10.0225 10.0222 0.0997786 p1319978.orf/threads:32/process_time/real_time_median 99.7 ms 99.7 ms 128 0.0997165 0.999985 10.1568M 101.857M 101.854M 10.0284 10.0281 0.0997195 p1319978.orf/threads:32/process_time/real_time_stddev 0.224 ms 0.224 ms 128 224.166u 34.345u 0 226.81k 227.231k 0.0223309 0.0223723 224.586u Comparing /home/lebedevri/rawspeed/build-old/src/utilities/rsbench/rsbench to /home/lebedevri/rawspeed/build-new/src/utilities/rsbench/rsbench Benchmark Time CPU Time Old Time New CPU Old CPU New ---------------------------------------------------------------------------------------------------------------------------------------------------- p1319978.orf/threads:32/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 128 vs 128 p1319978.orf/threads:32/process_time/real_time_mean -0.3134 -0.3134 145 100 145 100 p1319978.orf/threads:32/process_time/real_time_median -0.3138 -0.3138 145 100 145 100 p1319978.orf/threads:32/process_time/real_time_stddev -0.7073 -0.7078 1 0 1 0 ``` Reviewed By: craig.topper, zhuhan0 Differential Revision: https://reviews.llvm.org/D102116
1 parent d97bab6 commit 0633d5c

File tree

3 files changed

+697
-188
lines changed

3 files changed

+697
-188
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 331 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
112112
STATISTIC(
113113
NumShiftUntilBitTest,
114114
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
115+
STATISTIC(NumShiftUntilZero,
116+
"Number of uncountable loops recognized as 'shift until zero' idiom");
115117

116118
bool DisableLIRP::All;
117119
static cl::opt<bool, true>
@@ -248,6 +250,7 @@ class LoopIdiomRecognize {
248250
bool IsCntPhiUsedOutsideLoop);
249251

250252
bool recognizeShiftUntilBitTest();
253+
bool recognizeShiftUntilZero();
251254

252255
/// @}
253256
};
@@ -1373,7 +1376,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
13731376
<< CurLoop->getHeader()->getName() << "\n");
13741377

13751378
return recognizePopcount() || recognizeAndInsertFFS() ||
1376-
recognizeShiftUntilBitTest();
1379+
recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
13771380
}
13781381

13791382
/// Check if the given conditional branch is based on the comparison between
@@ -2262,7 +2265,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
22622265
assert(LoopPreheaderBB && "There is always a loop preheader.");
22632266

22642267
BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2265-
assert(LoopPreheaderBB && "There is only a single successor.");
2268+
assert(SuccessorBB && "There is only a single successor.");
22662269

22672270
IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
22682271
Builder.SetCurrentDebugLocation(cast<Instruction>(XCurr)->getDebugLoc());
@@ -2395,3 +2398,329 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
23952398
++NumShiftUntilBitTest;
23962399
return MadeChange;
23972400
}
2401+
2402+
/// Return true if the idiom is detected in the loop.
2403+
///
2404+
/// The core idiom we are trying to detect is:
2405+
/// \code
2406+
/// entry:
2407+
/// <...>
2408+
/// %start = <...>
2409+
/// %extraoffset = <...>
2410+
/// <...>
2411+
/// br label %for.cond
2412+
///
2413+
/// loop:
2414+
/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
2415+
/// %nbits = add nsw i8 %iv, %extraoffset
2416+
/// %val.shifted = lshr i8 %val, %nbits
2417+
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
2418+
/// %iv.next = add i8 %iv, 1
2419+
/// <...>
2420+
/// br i1 %val.shifted.iszero, label %end, label %loop
2421+
///
2422+
/// end:
2423+
/// %iv.res = phi i8 [ %iv, %loop ] <...>
2424+
/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
2425+
/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
2426+
/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
2427+
/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
2428+
/// <...>
2429+
/// \endcode
2430+
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
2431+
Instruction *&ValShiftedIsZero,
2432+
Instruction *&IV, Value *&Start,
2433+
Value *&Val, const SCEV *&ExtraOffsetExpr,
2434+
bool &InvertedCond) {
2435+
LLVM_DEBUG(dbgs() << DEBUG_TYPE
2436+
" Performing shift-until-zero idiom detection.\n");
2437+
2438+
// Give up if the loop has multiple blocks or multiple backedges.
2439+
if (CurLoop->getNumBlocks() != 1 || CurLoop->getNumBackEdges() != 1) {
2440+
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad block/backedge count.\n");
2441+
return false;
2442+
}
2443+
2444+
Instruction *ValShifted, *NBits, *IVNext;
2445+
Value *ExtraOffset;
2446+
2447+
BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2448+
BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2449+
assert(LoopPreheaderBB && "There is always a loop preheader.");
2450+
2451+
using namespace PatternMatch;
2452+
2453+
// Step 1: Check if the loop backedge, condition is in desirable form.
2454+
2455+
ICmpInst::Predicate Pred;
2456+
BasicBlock *TrueBB, *FalseBB;
2457+
if (!match(LoopHeaderBB->getTerminator(),
2458+
m_Br(m_Instruction(ValShiftedIsZero), m_BasicBlock(TrueBB),
2459+
m_BasicBlock(FalseBB))) ||
2460+
!match(ValShiftedIsZero,
2461+
m_ICmp(Pred, m_Instruction(ValShifted), m_Zero())) ||
2462+
!ICmpInst::isEquality(Pred)) {
2463+
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
2464+
return false;
2465+
}
2466+
2467+
// Step 2: Check if the comparison's operand is in desirable form.
2468+
2469+
if (!match(ValShifted, m_LShr(m_Value(Val), m_Instruction(NBits)))) {
2470+
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad comparisons value computation.\n");
2471+
return false;
2472+
}
2473+
2474+
// Step 3: Check if the shift amount is in desirable form.
2475+
2476+
if (match(NBits, m_c_Add(m_Instruction(IV),
2477+
m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
2478+
(NBits->hasNoSignedWrap() || NBits->hasNoUnsignedWrap()))
2479+
ExtraOffsetExpr = SE->getNegativeSCEV(SE->getSCEV(ExtraOffset));
2480+
else if (match(NBits,
2481+
m_Sub(m_Instruction(IV),
2482+
m_LoopInvariant(m_Value(ExtraOffset), CurLoop))) &&
2483+
NBits->hasNoSignedWrap())
2484+
ExtraOffsetExpr = SE->getSCEV(ExtraOffset);
2485+
else {
2486+
IV = NBits;
2487+
ExtraOffsetExpr = SE->getZero(NBits->getType());
2488+
}
2489+
2490+
// Step 4: Check if the recurrence is in desirable form.
2491+
auto *IVPN = dyn_cast<PHINode>(IV);
2492+
if (!IVPN || IVPN->getParent() != LoopHeaderBB) {
2493+
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Not an expected PHI node.\n");
2494+
return false;
2495+
}
2496+
2497+
Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);
2498+
IVNext = dyn_cast<Instruction>(IVPN->getIncomingValueForBlock(LoopHeaderBB));
2499+
2500+
if (!IVNext || !match(IVNext, m_Add(m_Specific(IVPN), m_One()))) {
2501+
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad recurrence.\n");
2502+
return false;
2503+
}
2504+
2505+
// Step 4: Check if the backedge's destinations are in desirable form.
2506+
2507+
assert(ICmpInst::isEquality(Pred) &&
2508+
"Should only get equality predicates here.");
2509+
2510+
// cmp-br is commutative, so canonicalize to a single variant.
2511+
InvertedCond = Pred != ICmpInst::Predicate::ICMP_EQ;
2512+
if (InvertedCond) {
2513+
Pred = ICmpInst::getInversePredicate(Pred);
2514+
std::swap(TrueBB, FalseBB);
2515+
}
2516+
2517+
// We expect to exit loop when comparison yields true,
2518+
// so when it yields false we should branch back to loop header.
2519+
if (FalseBB != LoopHeaderBB) {
2520+
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge flow.\n");
2521+
return false;
2522+
}
2523+
2524+
// Okay, idiom checks out.
2525+
return true;
2526+
}
2527+
2528+
/// Look for the following loop:
2529+
/// \code
2530+
/// entry:
2531+
/// <...>
2532+
/// %start = <...>
2533+
/// %extraoffset = <...>
2534+
/// <...>
2535+
/// br label %for.cond
2536+
///
2537+
/// loop:
2538+
/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
2539+
/// %nbits = add nsw i8 %iv, %extraoffset
2540+
/// %val.shifted = lshr i8 %val, %nbits
2541+
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
2542+
/// %iv.next = add i8 %iv, 1
2543+
/// <...>
2544+
/// br i1 %val.shifted.iszero, label %end, label %loop
2545+
///
2546+
/// end:
2547+
/// %iv.res = phi i8 [ %iv, %loop ] <...>
2548+
/// %nbits.res = phi i8 [ %nbits, %loop ] <...>
2549+
/// %val.shifted.res = phi i8 [ %val.shifted, %loop ] <...>
2550+
/// %val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ] <...>
2551+
/// %iv.next.res = phi i8 [ %iv.next, %loop ] <...>
2552+
/// <...>
2553+
/// \endcode
2554+
///
2555+
/// And transform it into:
2556+
/// \code
2557+
/// entry:
2558+
/// <...>
2559+
/// %start = <...>
2560+
/// %extraoffset = <...>
2561+
/// <...>
2562+
/// %val.numleadingzeros = call i8 @llvm.ctlz.i8(i8 %val, i1 0)
2563+
/// %val.numactivebits = sub i8 8, %val.numleadingzeros
2564+
/// %extraoffset.neg = sub i8 0, %extraoffset
2565+
/// %tmp = add i8 %val.numactivebits, %extraoffset.neg
2566+
/// %iv.final = call i8 @llvm.smax.i8(i8 %tmp, i8 %start)
2567+
/// %loop.tripcount = sub i8 %iv.final, %start
2568+
/// br label %loop
2569+
///
2570+
/// loop:
2571+
/// %loop.iv = phi i8 [ 0, %entry ], [ %loop.iv.next, %loop ]
2572+
/// %loop.iv.next = add i8 %loop.iv, 1
2573+
/// %loop.ivcheck = icmp eq i8 %loop.iv.next, %loop.tripcount
2574+
/// %iv = add i8 %loop.iv, %start
2575+
/// <...>
2576+
/// br i1 %loop.ivcheck, label %end, label %loop
2577+
///
2578+
/// end:
2579+
/// %iv.res = phi i8 [ %iv.final, %loop ] <...>
2580+
/// <...>
2581+
/// \endcode
2582+
bool LoopIdiomRecognize::recognizeShiftUntilZero() {
2583+
bool MadeChange = false;
2584+
2585+
Instruction *ValShiftedIsZero, *IV;
2586+
Value *Start, *Val;
2587+
const SCEV *ExtraOffsetExpr;
2588+
bool InvertedCond;
2589+
if (!detectShiftUntilZeroIdiom(CurLoop, SE, ValShiftedIsZero, IV, Start, Val,
2590+
ExtraOffsetExpr, InvertedCond)) {
2591+
LLVM_DEBUG(dbgs() << DEBUG_TYPE
2592+
" shift-until-zero idiom detection failed.\n");
2593+
return MadeChange;
2594+
}
2595+
LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom detected!\n");
2596+
2597+
// Ok, it is the idiom we were looking for, we *could* transform this loop,
2598+
// but is it profitable to transform?
2599+
2600+
BasicBlock *LoopHeaderBB = CurLoop->getHeader();
2601+
BasicBlock *LoopPreheaderBB = CurLoop->getLoopPreheader();
2602+
assert(LoopPreheaderBB && "There is always a loop preheader.");
2603+
2604+
BasicBlock *SuccessorBB = CurLoop->getExitBlock();
2605+
assert(SuccessorBB && "There is only a single successor.");
2606+
2607+
IRBuilder<> Builder(LoopPreheaderBB->getTerminator());
2608+
Builder.SetCurrentDebugLocation(IV->getDebugLoc());
2609+
2610+
Intrinsic::ID IntrID = Intrinsic::ctlz;
2611+
Type *Ty = Val->getType();
2612+
unsigned Bitwidth = Ty->getScalarSizeInBits();
2613+
2614+
TargetTransformInfo::TargetCostKind CostKind =
2615+
TargetTransformInfo::TCK_SizeAndLatency;
2616+
2617+
// The rewrite is considered to be unprofitable iff and only iff the
2618+
// intrinsic we'll use are not cheap. Note that we are okay with *just*
2619+
// making the loop countable, even if nothing else changes.
2620+
IntrinsicCostAttributes Attrs(
2621+
IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getFalse()});
2622+
InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
2623+
if (Cost > TargetTransformInfo::TCC_Basic) {
2624+
LLVM_DEBUG(dbgs() << DEBUG_TYPE
2625+
" Intrinsic is too costly, not beneficial\n");
2626+
return MadeChange;
2627+
}
2628+
2629+
// Ok, transform appears worthwhile.
2630+
MadeChange = true;
2631+
2632+
bool OffsetIsZero = false;
2633+
if (auto *ExtraOffsetExprC = dyn_cast<SCEVConstant>(ExtraOffsetExpr))
2634+
OffsetIsZero = ExtraOffsetExprC->isZero();
2635+
2636+
// Step 1: Compute the loop's final IV value / trip count.
2637+
2638+
CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
2639+
IntrID, Ty, {Val, /*is_zero_undef=*/Builder.getFalse()},
2640+
/*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros");
2641+
Value *ValNumActiveBits = Builder.CreateSub(
2642+
ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
2643+
Val->getName() + ".numactivebits", /*HasNUW=*/true,
2644+
/*HasNSW=*/Bitwidth != 2);
2645+
2646+
SCEVExpander Expander(*SE, *DL, "loop-idiom");
2647+
Expander.setInsertPoint(&*Builder.GetInsertPoint());
2648+
Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);
2649+
2650+
Value *ValNumActiveBitsOffset = Builder.CreateAdd(
2651+
ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",
2652+
/*HasNUW=*/OffsetIsZero, /*HasNSW=*/true);
2653+
Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},
2654+
{ValNumActiveBitsOffset, Start},
2655+
/*FMFSource=*/nullptr, "iv.final");
2656+
2657+
auto *LoopBackedgeTakenCount = cast<Instruction>(Builder.CreateSub(
2658+
IVFinal, Start, CurLoop->getName() + ".backedgetakencount",
2659+
/*HasNUW=*/OffsetIsZero, /*HasNSW=*/true));
2660+
// FIXME: or when the offset was `add nuw`
2661+
2662+
// We know loop's backedge-taken count, but what's loop's trip count?
2663+
Value *LoopTripCount =
2664+
Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),
2665+
CurLoop->getName() + ".tripcount", /*HasNUW=*/true,
2666+
/*HasNSW=*/Bitwidth != 2);
2667+
2668+
// Step 2: Adjust the successor basic block to recieve the original
2669+
// induction variable's final value instead of the orig. IV itself.
2670+
2671+
IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);
2672+
2673+
// Step 3: Rewrite the loop into a countable form, with canonical IV.
2674+
2675+
// The new canonical induction variable.
2676+
Builder.SetInsertPoint(&LoopHeaderBB->front());
2677+
auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");
2678+
2679+
// The induction itself.
2680+
Builder.SetInsertPoint(LoopHeaderBB->getFirstNonPHI());
2681+
auto *CIVNext =
2682+
Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",
2683+
/*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
2684+
2685+
// The loop trip count check.
2686+
auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,
2687+
CurLoop->getName() + ".ivcheck");
2688+
auto *NewIVCheck = CIVCheck;
2689+
if (InvertedCond) {
2690+
NewIVCheck = Builder.CreateNot(CIVCheck);
2691+
NewIVCheck->takeName(ValShiftedIsZero);
2692+
}
2693+
2694+
// The original IV, but rebased to be an offset to the CIV.
2695+
auto *IVDePHId = Builder.CreateAdd(CIV, Start, "", /*HasNUW=*/false,
2696+
/*HasNSW=*/true); // FIXME: what about NUW?
2697+
IVDePHId->takeName(IV);
2698+
2699+
// The loop terminator.
2700+
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
2701+
Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
2702+
LoopHeaderBB->getTerminator()->eraseFromParent();
2703+
2704+
// Populate the IV PHI.
2705+
CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);
2706+
CIV->addIncoming(CIVNext, LoopHeaderBB);
2707+
2708+
// Step 4: Forget the "non-computable" trip-count SCEV associated with the
2709+
// loop. The loop would otherwise not be deleted even if it becomes empty.
2710+
2711+
SE->forgetLoop(CurLoop);
2712+
2713+
// Step 5: Try to cleanup the loop's body somewhat.
2714+
IV->replaceAllUsesWith(IVDePHId);
2715+
IV->eraseFromParent();
2716+
2717+
ValShiftedIsZero->replaceAllUsesWith(NewIVCheck);
2718+
ValShiftedIsZero->eraseFromParent();
2719+
2720+
// Other passes will take care of actually deleting the loop if possible.
2721+
2722+
LLVM_DEBUG(dbgs() << DEBUG_TYPE " shift-until-zero idiom optimized!\n");
2723+
2724+
++NumShiftUntilZero;
2725+
return MadeChange;
2726+
}

0 commit comments

Comments
 (0)