@@ -2532,15 +2532,124 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2532
2532
return *Report;
2533
2533
}
2534
2534
2535
- bool LoopAccessInfo::isUniform (Value *V) const {
2535
+ namespace {
2536
+ // / A rewriter to build the SCEVs for each of the VF lanes in the expected
2537
+ // / vectorized loop, which can then be compared to detect their uniformity. This
2538
+ // / is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
2539
+ // / with new AddRecs where the step is multiplied by StepMultiplier and Offset *
2540
+ // / Step is added. Also checks if all sub-expressions are analyzable w.r.t.
2541
+ // / uniformity.
2542
+ class SCEVAddRecForUniformityRewriter
2543
+ : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
2544
+ // / Multiplier to be applied to the step of AddRecs in TheLoop.
2545
+ unsigned StepMultiplier;
2546
+
2547
+ // / Offset to be added to the AddRecs in TheLoop.
2548
+ unsigned Offset;
2549
+
2550
+ // / Loop for which to rewrite AddRecsFor.
2551
+ Loop *TheLoop;
2552
+
2553
+ // / Is any sub-expressions not analyzable w.r.t. uniformity?
2554
+ bool CannotAnalyze = false ;
2555
+
2556
+ bool canAnalyze () const { return !CannotAnalyze; }
2557
+
2558
+ public:
2559
+ SCEVAddRecForUniformityRewriter (ScalarEvolution &SE, unsigned StepMultiplier,
2560
+ unsigned Offset, Loop *TheLoop)
2561
+ : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
2562
+ TheLoop (TheLoop) {}
2563
+
2564
+ const SCEV *visitAddRecExpr (const SCEVAddRecExpr *Expr) {
2565
+ assert (Expr->getLoop () == TheLoop &&
2566
+ " addrec outside of TheLoop must be invariant and should have been "
2567
+ " handled earlier" );
2568
+ // Build a new AddRec by multiplying the step by StepMultiplier and
2569
+ // incrementing the start by Offset * step.
2570
+ Type *Ty = Expr->getType ();
2571
+ auto *Step = Expr->getStepRecurrence (SE);
2572
+ auto *NewStep = SE.getMulExpr (Step, SE.getConstant (Ty, StepMultiplier));
2573
+ auto *ScaledOffset = SE.getMulExpr (Step, SE.getConstant (Ty, Offset));
2574
+ auto *NewStart = SE.getAddExpr (Expr->getStart (), ScaledOffset);
2575
+ return SE.getAddRecExpr (NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
2576
+ }
2577
+
2578
+ const SCEV *visit (const SCEV *S) {
2579
+ if (CannotAnalyze || SE.isLoopInvariant (S, TheLoop))
2580
+ return S;
2581
+ return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit (S);
2582
+ }
2583
+
2584
+ const SCEV *visitUnknown (const SCEVUnknown *S) {
2585
+ if (SE.isLoopInvariant (S, TheLoop))
2586
+ return S;
2587
+ // The value could vary across iterations.
2588
+ CannotAnalyze = true ;
2589
+ return S;
2590
+ }
2591
+
2592
+ const SCEV *visitCouldNotCompute (const SCEVCouldNotCompute *S) {
2593
+ // Could not analyze the expression.
2594
+ CannotAnalyze = true ;
2595
+ return S;
2596
+ }
2597
+
2598
+ static const SCEV *rewrite (const SCEV *S, ScalarEvolution &SE,
2599
+ unsigned StepMultiplier, unsigned Offset,
2600
+ Loop *TheLoop) {
2601
+ // / Bail out if the expression does not contain an UDiv expression.
2602
+ // / Uniform values which are not loop invariant require operations to strip
2603
+ // / out the lowest bits. For now just look for UDivs and use it to avoid
2604
+ // / re-writing UDIV-free expressions for other lanes to limit compile time.
2605
+ if (!SCEVExprContains (S,
2606
+ [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
2607
+ return SE.getCouldNotCompute ();
2608
+
2609
+ SCEVAddRecForUniformityRewriter Rewriter (SE, StepMultiplier, Offset,
2610
+ TheLoop);
2611
+ const SCEV *Result = Rewriter.visit (S);
2612
+
2613
+ if (Rewriter.canAnalyze ())
2614
+ return Result;
2615
+ return SE.getCouldNotCompute ();
2616
+ }
2617
+ };
2618
+
2619
+ } // namespace
2620
+
2621
+ bool LoopAccessInfo::isUniform (Value *V, std::optional<ElementCount> VF) const {
2536
2622
auto *SE = PSE->getSE ();
2537
2623
// Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2538
2624
// never considered uniform.
2539
2625
// TODO: Is this really what we want? Even without FP SCEV, we may want some
2540
2626
// trivially loop-invariant FP values to be considered uniform.
2541
2627
if (!SE->isSCEVable (V->getType ()))
2542
2628
return false ;
2543
- return (SE->isLoopInvariant (SE->getSCEV (V), TheLoop));
2629
+ const SCEV *S = SE->getSCEV (V);
2630
+ if (SE->isLoopInvariant (S, TheLoop))
2631
+ return true ;
2632
+ if (!VF || VF->isScalable ())
2633
+ return false ;
2634
+ if (VF->isScalar ())
2635
+ return true ;
2636
+
2637
+ // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
2638
+ // lane 0 matches the expressions for all other lanes.
2639
+ unsigned FixedVF = VF->getKnownMinValue ();
2640
+ const SCEV *FirstLaneExpr =
2641
+ SCEVAddRecForUniformityRewriter::rewrite (S, *SE, FixedVF, 0 , TheLoop);
2642
+ if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
2643
+ return false ;
2644
+
2645
+ // Make sure the expressions for lanes FixedVF-1..1 match the expression for
2646
+ // lane 0. We check lanes in reverse order for compile-time, as frequently
2647
+ // checking the last lane is sufficient to rule out uniformity.
2648
+ return all_of (reverse (seq<unsigned >(1 , FixedVF)), [&](unsigned I) {
2649
+ const SCEV *IthLaneExpr =
2650
+ SCEVAddRecForUniformityRewriter::rewrite (S, *SE, FixedVF, I, TheLoop);
2651
+ return FirstLaneExpr == IthLaneExpr;
2652
+ });
2544
2653
}
2545
2654
2546
2655
// / Find the operand of the GEP that should be checked for consecutive
0 commit comments