Skip to content

Commit 551c280

Browse files
authored
[indvars] Always fallback to truncation if AddRec widening fails (#70967)
The current code structure results in cases where if a) we can't clone the IV user (because it's not in our whitelist) or b) can't prove the SCEV expressions are identical, we'd sometimes leave both the original unwiddened IV and the partially widdened IV in code. Instead, just truncate thw wide IV to the use - same as what we'd do if we couldn't find an addrec to start with. Noticed this while playing with changing how we produce addrecs. The current structure results in a very tight interlock between SCEVs internal capabilities and indvars code.
1 parent 24060db commit 551c280

File tree

3 files changed

+71
-69
lines changed

3 files changed

+71
-69
lines changed

llvm/lib/Transforms/Utils/SimplifyIndVar.cpp

Lines changed: 56 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1805,65 +1805,70 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
18051805
return nullptr;
18061806
}
18071807

1808-
// Does this user itself evaluate to a recurrence after widening?
1809-
WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1810-
if (!WideAddRec.first)
1811-
WideAddRec = getWideRecurrence(DU);
1812-
1813-
assert((WideAddRec.first == nullptr) ==
1814-
(WideAddRec.second == ExtendKind::Unknown));
1815-
if (!WideAddRec.first) {
1816-
// If use is a loop condition, try to promote the condition instead of
1817-
// truncating the IV first.
1818-
if (widenLoopCompare(DU))
1808+
auto tryAddRecExpansion = [&]() -> Instruction* {
1809+
// Does this user itself evaluate to a recurrence after widening?
1810+
WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1811+
if (!WideAddRec.first)
1812+
WideAddRec = getWideRecurrence(DU);
1813+
assert((WideAddRec.first == nullptr) ==
1814+
(WideAddRec.second == ExtendKind::Unknown));
1815+
if (!WideAddRec.first)
18191816
return nullptr;
18201817

1821-
// We are here about to generate a truncate instruction that may hurt
1822-
// performance because the scalar evolution expression computed earlier
1823-
// in WideAddRec.first does not indicate a polynomial induction expression.
1824-
// In that case, look at the operands of the use instruction to determine
1825-
// if we can still widen the use instead of truncating its operand.
1826-
if (widenWithVariantUse(DU))
1818+
// Reuse the IV increment that SCEVExpander created as long as it dominates
1819+
// NarrowUse.
1820+
Instruction *WideUse = nullptr;
1821+
if (WideAddRec.first == WideIncExpr &&
1822+
Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1823+
WideUse = WideInc;
1824+
else {
1825+
WideUse = cloneIVUser(DU, WideAddRec.first);
1826+
if (!WideUse)
1827+
return nullptr;
1828+
}
1829+
// Evaluation of WideAddRec ensured that the narrow expression could be
1830+
// extended outside the loop without overflow. This suggests that the wide use
1831+
// evaluates to the same expression as the extended narrow use, but doesn't
1832+
// absolutely guarantee it. Hence the following failsafe check. In rare cases
1833+
// where it fails, we simply throw away the newly created wide use.
1834+
if (WideAddRec.first != SE->getSCEV(WideUse)) {
1835+
LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1836+
<< *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1837+
<< "\n");
1838+
DeadInsts.emplace_back(WideUse);
18271839
return nullptr;
1840+
};
18281841

1829-
// This user does not evaluate to a recurrence after widening, so don't
1830-
// follow it. Instead insert a Trunc to kill off the original use,
1831-
// eventually isolating the original narrow IV so it can be removed.
1832-
truncateIVUse(DU, DT, LI);
1833-
return nullptr;
1834-
}
1842+
// if we reached this point then we are going to replace
1843+
// DU.NarrowUse with WideUse. Reattach DbgValue then.
1844+
replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
18351845

1836-
// Reuse the IV increment that SCEVExpander created as long as it dominates
1837-
// NarrowUse.
1838-
Instruction *WideUse = nullptr;
1839-
if (WideAddRec.first == WideIncExpr &&
1840-
Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1841-
WideUse = WideInc;
1842-
else {
1843-
WideUse = cloneIVUser(DU, WideAddRec.first);
1844-
if (!WideUse)
1845-
return nullptr;
1846-
}
1847-
// Evaluation of WideAddRec ensured that the narrow expression could be
1848-
// extended outside the loop without overflow. This suggests that the wide use
1849-
// evaluates to the same expression as the extended narrow use, but doesn't
1850-
// absolutely guarantee it. Hence the following failsafe check. In rare cases
1851-
// where it fails, we simply throw away the newly created wide use.
1852-
if (WideAddRec.first != SE->getSCEV(WideUse)) {
1853-
LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1854-
<< *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1855-
<< "\n");
1856-
DeadInsts.emplace_back(WideUse);
1846+
ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1847+
// Returning WideUse pushes it on the worklist.
1848+
return WideUse;
1849+
};
1850+
1851+
if (auto *I = tryAddRecExpansion())
1852+
return I;
1853+
1854+
// If use is a loop condition, try to promote the condition instead of
1855+
// truncating the IV first.
1856+
if (widenLoopCompare(DU))
18571857
return nullptr;
1858-
}
18591858

1860-
// if we reached this point then we are going to replace
1861-
// DU.NarrowUse with WideUse. Reattach DbgValue then.
1862-
replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1859+
// We are here about to generate a truncate instruction that may hurt
1860+
// performance because the scalar evolution expression computed earlier
1861+
// in WideAddRec.first does not indicate a polynomial induction expression.
1862+
// In that case, look at the operands of the use instruction to determine
1863+
// if we can still widen the use instead of truncating its operand.
1864+
if (widenWithVariantUse(DU))
1865+
return nullptr;
18631866

1864-
ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1865-
// Returning WideUse pushes it on the worklist.
1866-
return WideUse;
1867+
// This user does not evaluate to a recurrence after widening, so don't
1868+
// follow it. Instead insert a Trunc to kill off the original use,
1869+
// eventually isolating the original narrow IV so it can be removed.
1870+
truncateIVUse(DU, DT, LI);
1871+
return nullptr;
18671872
}
18681873

18691874
/// Add eligible users of NarrowDef to NarrowIVUsers.

llvm/test/Transforms/IndVarSimplify/pr55925.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,13 @@ define void @test(ptr %p) personality ptr undef {
1414
; CHECK-NEXT: br label [[LOOP:%.*]]
1515
; CHECK: loop:
1616
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
17-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH]] ]
18-
; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foo(i32 returned [[IV]])
17+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32
18+
; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foo(i32 returned [[TMP0]])
1919
; CHECK-NEXT: to label [[LOOP_LATCH]] unwind label [[EXIT:%.*]]
2020
; CHECK: loop.latch:
2121
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
22-
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
23-
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32
24-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @foo(i32 [[TMP0]])
22+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
23+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[TMP1]])
2524
; CHECK-NEXT: br label [[LOOP]]
2625
; CHECK: exit:
2726
; CHECK-NEXT: [[LP:%.*]] = landingpad { ptr, i32 }
@@ -55,19 +54,18 @@ define void @test_critedge(i1 %c, ptr %p) personality ptr undef {
5554
; CHECK-NEXT: br label [[LOOP:%.*]]
5655
; CHECK: loop:
5756
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
58-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH]] ]
5957
; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP_INVOKE:%.*]], label [[LOOP_OTHER:%.*]]
6058
; CHECK: loop.invoke:
6159
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32
62-
; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foo(i32 returned [[IV]])
60+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
61+
; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foo(i32 returned [[TMP0]])
6362
; CHECK-NEXT: to label [[LOOP_LATCH]] unwind label [[EXIT:%.*]]
6463
; CHECK: loop.other:
6564
; CHECK-NEXT: br label [[LOOP_LATCH]]
6665
; CHECK: loop.latch:
67-
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], [[LOOP_INVOKE]] ], [ 0, [[LOOP_OTHER]] ]
66+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP1]], [[LOOP_INVOKE]] ], [ 0, [[LOOP_OTHER]] ]
6867
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
69-
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
70-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @foo(i32 [[PHI]])
68+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[PHI]])
7169
; CHECK-NEXT: br label [[LOOP]]
7270
; CHECK: exit:
7371
; CHECK-NEXT: [[LP:%.*]] = landingpad { ptr, i32 }

llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -660,15 +660,17 @@ define void @zext_postinc_offset_constant_minus_one(ptr %A, i32 %start) {
660660
; CHECK-NEXT: [[NONPOS:%.*]] = icmp slt i32 [[START:%.*]], 2
661661
; CHECK-NEXT: br i1 [[NONPOS]], label [[EXIT:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
662662
; CHECK: for.body.preheader:
663+
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START]] to i64
663664
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
664665
; CHECK: for.body:
666+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
665667
; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ]
666-
; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[J_016_US]], -1
667-
; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64
668-
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
668+
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
669+
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]]
669670
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
670671
; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1
671672
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[INC_US]], 6
673+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
672674
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]
673675
; CHECK: exit.loopexit:
674676
; CHECK-NEXT: br label [[EXIT]]
@@ -704,12 +706,9 @@ define void @zext_preinc_offset_constant_minus_one(ptr %A, i32 %start) {
704706
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
705707
; CHECK: for.body:
706708
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
707-
; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ]
708-
; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[J_016_US]], -1
709-
; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64
710-
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
709+
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
710+
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]]
711711
; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]])
712-
; CHECK-NEXT: [[INC_US]] = add nsw i32 [[J_016_US]], -1
713712
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i64 [[INDVARS_IV]], 6
714713
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
715714
; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]]

0 commit comments

Comments
 (0)