Skip to content

Commit 8d26059

Browse files
pawelflisikowskiigcbot
authored andcommitted
Make getPreviousDefRecursive iterative
1 parent e18110c commit 8d26059

File tree

4 files changed

+1464
-0
lines changed

4 files changed

+1464
-0
lines changed
Lines changed: 366 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
/*========================== begin_copyright_notice ============================
10+
11+
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
12+
See https://llvm.org/LICENSE.txt for license information.
13+
SPDX-License-Identifier: Apache-2.0 with LLVM-exception
14+
15+
============================= end_copyright_notice ===========================*/
16+
17+
From c522ebaa9d67809e7b3e2660321b12b999da24b3 Mon Sep 17 00:00:00 2001
18+
From: pawelflisikowski <[email protected]>
19+
Date: Tue, 28 Jan 2025 03:19:57 -0800
20+
Subject: [PATCH] [IGC LLVM] Make getPreviousDefRecursive iterative
21+
22+
Description:
23+
Large kernels with long use-def chains may cause recursive calls to
24+
exceed the stack space within the memory SSA updater of the LICM pass.
25+
26+
This was observed with Blender on LNL (OGLVK).
27+
28+
Replace the MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(...)
29+
function with the new iterative version.
30+
31+
Platforms:
32+
All
33+
---
34+
llvm/include/llvm/Analysis/MemorySSAUpdater.h | 2 +-
35+
llvm/lib/Analysis/MemorySSAUpdater.cpp | 267 ++++++++++++------
36+
2 files changed, 185 insertions(+), 84 deletions(-)
37+
38+
diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
39+
index 3e5ebe9cb..013994d86 100644
40+
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
41+
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
42+
@@ -255,7 +255,7 @@ private:
43+
getPreviousDefFromEnd(BasicBlock *,
44+
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
45+
MemoryAccess *
46+
- getPreviousDefRecursive(BasicBlock *,
47+
+ getPreviousDefIterative(BasicBlock *,
48+
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
49+
MemoryAccess *recursePhi(MemoryAccess *Phi);
50+
MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi);
51+
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
52+
index 9c841883d..800e9812c 100644
53+
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
54+
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
55+
@@ -27,6 +27,7 @@
56+
#include "llvm/Support/Debug.h"
57+
#include "llvm/Support/FormattedStream.h"
58+
#include <algorithm>
59+
+#include <stack>
60+
61+
#define DEBUG_TYPE "memoryssa"
62+
using namespace llvm;
63+
@@ -40,101 +41,201 @@ using namespace llvm;
64+
// that there are two or more definitions needing to be merged.
65+
// This still will leave non-minimal form in the case of irreducible control
66+
// flow, where phi nodes may be in cycles with themselves, but unnecessary.
67+
-MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
68+
+MemoryAccess *MemorySSAUpdater::getPreviousDefIterative(
69+
BasicBlock *BB,
70+
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
71+
- // First, do a cache lookup. Without this cache, certain CFG structures
72+
- // (like a series of if statements) take exponential time to visit.
73+
- auto Cached = CachedPreviousDef.find(BB);
74+
- if (Cached != CachedPreviousDef.end())
75+
- return Cached->second;
76+
-
77+
- // If this method is called from an unreachable block, return LoE.
78+
- if (!MSSA->DT->isReachableFromEntry(BB))
79+
- return MSSA->getLiveOnEntryDef();
80+
+ enum ResumePoint {
81+
+ START,
82+
+ COLLECT_VALUE_FROM_UNIQUE_PREDECESSOR,
83+
+ COLLECT_VALUES_FROM_PREDECESSORS,
84+
+ PROCESS_PREDECESSOR,
85+
+ GET_PREVIOUS_DEF_FROM_END,
86+
+ SIMPLIFY_OPS
87+
+ };
88+
89+
- if (BasicBlock *Pred = BB->getUniquePredecessor()) {
90+
- VisitedBlocks.insert(BB);
91+
- // Single predecessor case, just recurse, we can only have one definition.
92+
- MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
93+
- CachedPreviousDef.insert({BB, Result});
94+
- return Result;
95+
- }
96+
+ class StackFrame {
97+
+ public:
98+
+ BasicBlock *BB;
99+
+ ResumePoint ResumeAt;
100+
+ bool UniqueIncomingAccess;
101+
+ MemoryAccess *SingleAccess;
102+
+ SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps;
103+
+ // Iterators for keeping track of predecessor blocks that are already
104+
+ // processed.
105+
+ pred_iterator PredIt;
106+
+ pred_iterator PredEnd;
107+
+ StackFrame(BasicBlock *BB, ResumePoint resumePoint)
108+
+ : BB(BB), ResumeAt(resumePoint), UniqueIncomingAccess(true),
109+
+ SingleAccess(nullptr), PredIt(pred_begin(BB)), PredEnd(pred_end(BB)) {
110+
+ }
111+
+ };
112+
113+
- if (VisitedBlocks.count(BB)) {
114+
- // We hit our node again, meaning we had a cycle, we must insert a phi
115+
- // node to break it so we have an operand. The only case this will
116+
- // insert useless phis is if we have irreducible control flow.
117+
- MemoryAccess *Result = MSSA->createMemoryPhi(BB);
118+
- CachedPreviousDef.insert({BB, Result});
119+
- return Result;
120+
- }
121+
+ std::stack<StackFrame> WorkStack;
122+
+ std::stack<MemoryAccess *> ReturnStack;
123+
+ WorkStack.push(StackFrame(BB, START));
124+
+
125+
+ while (!WorkStack.empty()) {
126+
+ StackFrame &CurrentFrame = WorkStack.top();
127+
+ BasicBlock *CurrentBB = CurrentFrame.BB;
128+
+
129+
+ switch (CurrentFrame.ResumeAt) {
130+
+ case START: {
131+
+ // First, do a cache lookup. Without this cache, certain CFG structures
132+
+ // (like a series of if statements) take exponential time to visit.
133+
+ auto Cached = CachedPreviousDef.find(CurrentBB);
134+
+ if (Cached != CachedPreviousDef.end()) {
135+
+ ReturnStack.push(Cached->second);
136+
+ WorkStack.pop();
137+
+ break;
138+
+ }
139+
140+
- if (VisitedBlocks.insert(BB).second) {
141+
- // Mark us visited so we can detect a cycle
142+
- SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps;
143+
+ // If this method is called from an unreachable block, return LoE.
144+
+ if (!MSSA->DT->isReachableFromEntry(CurrentBB)) {
145+
+ MemoryAccess *LoE = MSSA->getLiveOnEntryDef();
146+
+ ReturnStack.push(LoE);
147+
+ WorkStack.pop();
148+
+ break;
149+
+ }
150+
151+
- // Recurse to get the values in our predecessors for placement of a
152+
- // potential phi node. This will insert phi nodes if we cycle in order to
153+
- // break the cycle and have an operand.
154+
- bool UniqueIncomingAccess = true;
155+
- MemoryAccess *SingleAccess = nullptr;
156+
- for (auto *Pred : predecessors(BB)) {
157+
- if (MSSA->DT->isReachableFromEntry(Pred)) {
158+
- auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef);
159+
- if (!SingleAccess)
160+
- SingleAccess = IncomingAccess;
161+
- else if (IncomingAccess != SingleAccess)
162+
- UniqueIncomingAccess = false;
163+
- PhiOps.push_back(IncomingAccess);
164+
- } else
165+
- PhiOps.push_back(MSSA->getLiveOnEntryDef());
166+
- }
167+
+ if (BasicBlock *Pred = CurrentBB->getUniquePredecessor()) {
168+
+ VisitedBlocks.insert(CurrentBB);
169+
+ // Single predecessor case, just recurse, we can only have one
170+
+ // definition.
171+
+ CurrentFrame.ResumeAt = COLLECT_VALUE_FROM_UNIQUE_PREDECESSOR;
172+
+ WorkStack.push(StackFrame(Pred, GET_PREVIOUS_DEF_FROM_END));
173+
+ break;
174+
+ }
175+
176+
- // Now try to simplify the ops to avoid placing a phi.
177+
- // This may return null if we never created a phi yet, that's okay
178+
- MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB));
179+
-
180+
- // See if we can avoid the phi by simplifying it.
181+
- auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
182+
- // If we couldn't simplify, we may have to create a phi
183+
- if (Result == Phi && UniqueIncomingAccess && SingleAccess) {
184+
- // A concrete Phi only exists if we created an empty one to break a cycle.
185+
- if (Phi) {
186+
- assert(Phi->operands().empty() && "Expected empty Phi");
187+
- Phi->replaceAllUsesWith(SingleAccess);
188+
- removeMemoryAccess(Phi);
189+
+ // If this block has been already visited
190+
+ if (VisitedBlocks.count(CurrentBB)) {
191+
+ // We hit our node again, meaning we had a cycle, we must insert a phi
192+
+ // node to break it so we have an operand. The only case this will
193+
+ // insert useless phis is if we have irreducible control flow.
194+
+ MemoryAccess *Result = MSSA->createMemoryPhi(CurrentBB);
195+
+ CachedPreviousDef.insert({CurrentBB, Result});
196+
+ ReturnStack.push(Result);
197+
+ WorkStack.pop();
198+
+ break;
199+
}
200+
- Result = SingleAccess;
201+
- } else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) {
202+
- if (!Phi)
203+
- Phi = MSSA->createMemoryPhi(BB);
204+
-
205+
- // See if the existing phi operands match what we need.
206+
- // Unlike normal SSA, we only allow one phi node per block, so we can't just
207+
- // create a new one.
208+
- if (Phi->getNumOperands() != 0) {
209+
- // FIXME: Figure out whether this is dead code and if so remove it.
210+
- if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
211+
- // These will have been filled in by the recursive read we did above.
212+
- llvm::copy(PhiOps, Phi->op_begin());
213+
- std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
214+
+
215+
+ // If block hasn't been visited (true if the element was successfully
216+
+ // inserted, i.e., it was not already present in the set).
217+
+ if (VisitedBlocks.insert(CurrentBB).second) {
218+
+ if (CurrentFrame.PredIt != CurrentFrame.PredEnd) {
219+
+ CurrentFrame.ResumeAt = COLLECT_VALUES_FROM_PREDECESSORS;
220+
+ WorkStack.push(StackFrame(*CurrentFrame.PredIt, PROCESS_PREDECESSOR));
221+
+ break;
222+
}
223+
+ }
224+
+ }
225+
+ case GET_PREVIOUS_DEF_FROM_END: {
226+
+ auto *Defs = MSSA->getWritableBlockDefs(CurrentBB);
227+
+
228+
+ if (Defs) {
229+
+ CachedPreviousDef.insert({CurrentBB, &*Defs->rbegin()});
230+
+ ReturnStack.push(&*Defs->rbegin());
231+
+ WorkStack.pop();
232+
} else {
233+
- unsigned i = 0;
234+
- for (auto *Pred : predecessors(BB))
235+
- Phi->addIncoming(&*PhiOps[i++], Pred);
236+
- InsertedPHIs.push_back(Phi);
237+
+ // Start normal search from the beginning.
238+
+ CurrentFrame.ResumeAt = START;
239+
+ }
240+
+ break;
241+
+ }
242+
+ case COLLECT_VALUE_FROM_UNIQUE_PREDECESSOR: {
243+
+ WorkStack.pop();
244+
+ MemoryAccess *Result = ReturnStack.top();
245+
+ CachedPreviousDef.insert({CurrentBB, Result});
246+
+ break;
247+
+ }
248+
+ case PROCESS_PREDECESSOR: {
249+
+ if (MSSA->DT->isReachableFromEntry(CurrentFrame.BB)) {
250+
+ CurrentFrame.ResumeAt = GET_PREVIOUS_DEF_FROM_END;
251+
+ } else {
252+
+ ReturnStack.push(nullptr);
253+
+ WorkStack.pop();
254+
+ }
255+
+ break;
256+
+ }
257+
+ case COLLECT_VALUES_FROM_PREDECESSORS: {
258+
+ MemoryAccess *IncomingAccess = ReturnStack.top();
259+
+ if (IncomingAccess) {
260+
+ if (!CurrentFrame.SingleAccess) {
261+
+ CurrentFrame.SingleAccess = IncomingAccess;
262+
+ } else if (IncomingAccess != CurrentFrame.SingleAccess)
263+
+ CurrentFrame.UniqueIncomingAccess = false;
264+
+ CurrentFrame.PhiOps.push_back(IncomingAccess);
265+
+ } else {
266+
+ CurrentFrame.PhiOps.push_back(MSSA->getLiveOnEntryDef());
267+
+ }
268+
+ ReturnStack.pop();
269+
+ // Process remaining predecessors.
270+
+ ++CurrentFrame.PredIt;
271+
+ if (CurrentFrame.PredIt != CurrentFrame.PredEnd) {
272+
+ WorkStack.push(StackFrame(*CurrentFrame.PredIt, PROCESS_PREDECESSOR));
273+
+ } else {
274+
+ CurrentFrame.ResumeAt = SIMPLIFY_OPS;
275+
+ break;
276+
+ }
277+
+ }
278+
+ case SIMPLIFY_OPS: {
279+
+ // Now try to simplify the ops to avoid placing a phi.
280+
+ // This may return null if we never created a phi yet, that's okay
281+
+ MemoryPhi *Phi =
282+
+ dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(CurrentBB));
283+
+
284+
+ // See if we can avoid the phi by simplifying it.
285+
+ auto *Result = tryRemoveTrivialPhi(Phi, CurrentFrame.PhiOps);
286+
+ // If we couldn't simplify, we may have to create a phi
287+
+ if (Result == Phi && CurrentFrame.UniqueIncomingAccess &&
288+
+ CurrentFrame.SingleAccess) {
289+
+ // A concrete Phi only exists if we created an empty one to break a
290+
+ // cycle.
291+
+ if (Phi) {
292+
+ assert(Phi->operands().empty() && "Expected empty Phi");
293+
+ Phi->replaceAllUsesWith(CurrentFrame.SingleAccess);
294+
+ removeMemoryAccess(Phi);
295+
+ }
296+
+ Result = CurrentFrame.SingleAccess;
297+
+ } else if (Result == Phi && !(CurrentFrame.UniqueIncomingAccess &&
298+
+ CurrentFrame.SingleAccess)) {
299+
+ if (!Phi)
300+
+ Phi = MSSA->createMemoryPhi(CurrentBB);
301+
+
302+
+ // See if the existing phi operands match what we need.
303+
+ // Unlike normal SSA, we only allow one phi node per block, so we
304+
+ // can't just create a new one.
305+
+ if (Phi->getNumOperands() != 0) {
306+
+ // FIXME: Figure out whether this is dead code and if so remove
307+
+ // it.
308+
+ if (!std::equal(Phi->op_begin(), Phi->op_end(),
309+
+ CurrentFrame.PhiOps.begin())) {
310+
+ // These will have been filled in by the recursive read we did
311+
+ // above.
312+
+ llvm::copy(CurrentFrame.PhiOps, Phi->op_begin());
313+
+ std::copy(pred_begin(CurrentBB), pred_end(CurrentBB),
314+
+ Phi->block_begin());
315+
+ }
316+
+ } else {
317+
+ unsigned i = 0;
318+
+ for (auto *Pred : predecessors(CurrentBB))
319+
+ Phi->addIncoming(&*CurrentFrame.PhiOps[i++], Pred);
320+
+ InsertedPHIs.push_back(Phi);
321+
+ }
322+
+ Result = Phi;
323+
}
324+
- Result = Phi;
325+
+ // Set ourselves up for the next variable by resetting visited state.
326+
+ VisitedBlocks.erase(CurrentBB);
327+
+ CachedPreviousDef.insert({CurrentBB, Result});
328+
+ ReturnStack.push(Result);
329+
+ WorkStack.pop();
330+
}
331+
+ } // end of 'switch' statement
332+
+ } // end of 'while' loop
333+
334+
- // Set ourselves up for the next variable by resetting visited state.
335+
- VisitedBlocks.erase(BB);
336+
- CachedPreviousDef.insert({BB, Result});
337+
- return Result;
338+
+ if (ReturnStack.size() != 1) {
339+
+ llvm_unreachable("There should be only one return value");
340+
}
341+
- llvm_unreachable("Should have hit one of the three cases above");
342+
+ return ReturnStack.top();
343+
}
344+
345+
// This starts at the memory access, and goes backwards in the block to find the
346+
@@ -145,7 +246,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) {
347+
if (auto *LocalResult = getPreviousDefInBlock(MA))
348+
return LocalResult;
349+
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
350+
- return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef);
351+
+ return getPreviousDefIterative(MA->getBlock(), CachedPreviousDef);
352+
}
353+
354+
// This starts at the memory access, and goes backwards in the block to the find
355+
@@ -186,7 +287,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(
356+
return &*Defs->rbegin();
357+
}
358+
359+
- return getPreviousDefRecursive(BB, CachedPreviousDef);
360+
+ return getPreviousDefIterative(BB, CachedPreviousDef);
361+
}
362+
// Recurse over a set of phi uses to eliminate the trivial ones
363+
MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
364+
--
365+
2.46.0.windows.1
366+

0 commit comments

Comments
 (0)