Skip to content

Commit f86efad

Browse files
committed
[InstCombine] Iterative replacement in PtrReplacer
This patch enhances the PtrReplacer as follows: 1. Users are now collected iteratively to be generous on the stack. In the case of PHIs with incoming values which have not yet been visited, they are pushed back into the stack for reconsideration. 2. Replace users of the pointer root in a reverse-postorder traversal, instead of a simple traversal over the collected users. This reordering ensures that the uses of an instruction are replaced before replacing the instruction itself. 3. During the replacement of PHI, use the same incoming value if it does not have a replacement. This patch specifically fixes the case when an incoming value of a PHI is addrspacecasted.
1 parent 98194fd commit f86efad

File tree

2 files changed

+150
-67
lines changed

2 files changed

+150
-67
lines changed

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 112 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,10 @@ class PointerReplacer {
244244
void replacePointer(Value *V);
245245

246246
private:
247-
bool collectUsersRecursive(Instruction &I);
248247
void replace(Instruction *I);
249-
Value *getReplacement(Value *I);
248+
Value *getReplacement(Value *V) const { return WorkMap.lookup(V); }
250249
bool isAvailable(Instruction *I) const {
251-
return I == &Root || Worklist.contains(I);
250+
return I == &Root || UsersToReplace.contains(I);
252251
}
253252

254253
bool isEqualOrValidAddrSpaceCast(const Instruction *I,
@@ -260,8 +259,7 @@ class PointerReplacer {
260259
return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
261260
}
262261

263-
SmallPtrSet<Instruction *, 32> ValuesToRevisit;
264-
SmallSetVector<Instruction *, 4> Worklist;
262+
SmallSetVector<Instruction *, 4> UsersToReplace;
265263
MapVector<Value *, Value *> WorkMap;
266264
InstCombinerImpl &IC;
267265
Instruction &Root;
@@ -270,80 +268,132 @@ class PointerReplacer {
270268
} // end anonymous namespace
271269

272270
bool PointerReplacer::collectUsers() {
273-
if (!collectUsersRecursive(Root))
274-
return false;
275-
276-
// Ensure that all outstanding (indirect) users of I
277-
// are inserted into the Worklist. Return false
278-
// otherwise.
279-
return llvm::set_is_subset(ValuesToRevisit, Worklist);
280-
}
271+
SmallVector<Instruction *> Worklist;
272+
SmallSetVector<Instruction *, 4> ValuesToRevisit;
273+
274+
auto PushUsersToWorklist = [&](Instruction *Inst) {
275+
for (auto *U : Inst->users()) {
276+
if (auto *I = dyn_cast<Instruction>(U)) {
277+
if (!isAvailable(I) && !ValuesToRevisit.contains(I))
278+
Worklist.emplace_back(I);
279+
}
280+
}
281+
};
281282

282-
bool PointerReplacer::collectUsersRecursive(Instruction &I) {
283-
for (auto *U : I.users()) {
284-
auto *Inst = cast<Instruction>(&*U);
283+
PushUsersToWorklist(&Root);
284+
while (!Worklist.empty()) {
285+
auto *Inst = Worklist.pop_back_val();
286+
if (!Inst)
287+
return false;
288+
if (isAvailable(Inst))
289+
continue;
285290
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
286291
if (Load->isVolatile())
287292
return false;
288-
Worklist.insert(Load);
293+
UsersToReplace.insert(Load);
289294
} else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
290295
// All incoming values must be instructions for replacability
291296
if (any_of(PHI->incoming_values(),
292297
[](Value *V) { return !isa<Instruction>(V); }))
293298
return false;
294299

295-
// If at least one incoming value of the PHI is not in Worklist,
296-
// store the PHI for revisiting and skip this iteration of the
297-
// loop.
298-
if (any_of(PHI->incoming_values(), [this](Value *V) {
299-
return !isAvailable(cast<Instruction>(V));
300-
})) {
301-
ValuesToRevisit.insert(Inst);
300+
// If all incoming values are available, mark this PHI as
301+
// replacable and push it's users into the worklist.
302+
if (all_of(PHI->incoming_values(),
303+
[&](Value *V) { return isAvailable(cast<Instruction>(V)); })) {
304+
UsersToReplace.insert(PHI);
305+
PushUsersToWorklist(PHI);
302306
continue;
303307
}
304308

305-
Worklist.insert(PHI);
306-
if (!collectUsersRecursive(*PHI))
307-
return false;
308-
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
309-
if (!isa<Instruction>(SI->getTrueValue()) ||
310-
!isa<Instruction>(SI->getFalseValue()))
309+
// Not all incoming values are available. If this PHI was already
310+
// visited prior to this iteration, return false.
311+
if (!ValuesToRevisit.insert(PHI))
311312
return false;
312313

313-
if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
314-
!isAvailable(cast<Instruction>(SI->getFalseValue()))) {
315-
ValuesToRevisit.insert(Inst);
316-
continue;
314+
// Push PHI back into the stack, followed by unavailable
315+
// incoming values.
316+
Worklist.emplace_back(PHI);
317+
for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
318+
auto *IncomingValue = cast<Instruction>(PHI->getIncomingValue(Idx));
319+
if (UsersToReplace.contains(IncomingValue))
320+
continue;
321+
if (!ValuesToRevisit.insert(IncomingValue))
322+
return false;
323+
Worklist.emplace_back(IncomingValue);
317324
}
318-
Worklist.insert(SI);
319-
if (!collectUsersRecursive(*SI))
320-
return false;
321-
} else if (isa<GetElementPtrInst>(Inst)) {
322-
Worklist.insert(Inst);
323-
if (!collectUsersRecursive(*Inst))
325+
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
326+
if (isa<Constant>(SI->getTrueValue()) &&
327+
isa<Constant>(SI->getFalseValue()))
328+
continue;
329+
330+
auto *TrueInst = dyn_cast<Instruction>(SI->getTrueValue());
331+
auto *FalseInst = dyn_cast<Instruction>(SI->getFalseValue());
332+
if (!TrueInst || !FalseInst)
324333
return false;
334+
335+
UsersToReplace.insert(SI);
336+
PushUsersToWorklist(SI);
337+
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
338+
UsersToReplace.insert(GEP);
339+
PushUsersToWorklist(GEP);
325340
} else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
326341
if (MI->isVolatile())
327342
return false;
328-
Worklist.insert(Inst);
343+
UsersToReplace.insert(Inst);
329344
} else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
330-
Worklist.insert(Inst);
331-
if (!collectUsersRecursive(*Inst))
332-
return false;
345+
UsersToReplace.insert(Inst);
346+
PushUsersToWorklist(Inst);
333347
} else if (Inst->isLifetimeStartOrEnd()) {
334348
continue;
335349
} else {
336350
// TODO: For arbitrary uses with address space mismatches, should we check
337351
// if we can introduce a valid addrspacecast?
338-
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
352+
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *Inst << '\n');
339353
return false;
340354
}
341355
}
342356

343-
return true;
357+
return llvm::set_is_subset(ValuesToRevisit, UsersToReplace);
344358
}
345359

346-
Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
360+
void PointerReplacer::replacePointer(Value *V) {
361+
#ifndef NDEBUG
362+
auto *PT = cast<PointerType>(Root.getType());
363+
auto *NT = cast<PointerType>(V->getType());
364+
assert(PT != NT && "Invalid usage");
365+
#endif
366+
367+
WorkMap[&Root] = V;
368+
SmallVector<Instruction *> Worklist;
369+
SetVector<Instruction *> PostOrderWorklist;
370+
SmallPtrSet<Instruction *, 4> Visited;
371+
372+
// Perform a postorder traversal of the users of Root.
373+
Worklist.push_back(&Root);
374+
while (!Worklist.empty()) {
375+
Instruction *I = Worklist.back();
376+
377+
// If I has not been processed before, push each of its
378+
// replacable users into the worklist.
379+
if (Visited.insert(I).second) {
380+
for (auto *U : I->users()) {
381+
auto *UserInst = cast<Instruction>(U);
382+
if (UsersToReplace.contains(UserInst))
383+
Worklist.push_back(UserInst);
384+
}
385+
// Otherwise, users of I have already been pushed into
386+
// the PostOrderWorklist. Push I as well.
387+
} else {
388+
PostOrderWorklist.insert(I);
389+
Worklist.pop_back();
390+
}
391+
}
392+
393+
// Replace pointers in reverse-postorder.
394+
for (Instruction *I : reverse(PostOrderWorklist))
395+
replace(I);
396+
}
347397

348398
void PointerReplacer::replace(Instruction *I) {
349399
if (getReplacement(I))
@@ -365,12 +415,20 @@ void PointerReplacer::replace(Instruction *I) {
365415
// replacement (new value).
366416
WorkMap[NewI] = NewI;
367417
} else if (auto *PHI = dyn_cast<PHINode>(I)) {
368-
Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
369-
auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
370-
PHI->getName(), PHI->getIterator());
418+
// Create a new PHI by replacing any incoming value that is a user of the
419+
// root pointer and has a replacement.
420+
SmallVector<Value *, 4> IncomingValues;
421+
for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) {
422+
Value *V = getReplacement(PHI->getIncomingValue(I));
423+
if (!V)
424+
V = PHI->getIncomingValue(I);
425+
IncomingValues.push_back(V);
426+
}
427+
auto *NewPHI = PHINode::Create(IncomingValues[0]->getType(),
428+
PHI->getNumIncomingValues(),
429+
PHI->getName() + ".r", PHI->getIterator());
371430
for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
372-
NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
373-
PHI->getIncomingBlock(I));
431+
NewPHI->addIncoming(IncomingValues[I], PHI->getIncomingBlock(I));
374432
WorkMap[PHI] = NewPHI;
375433
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
376434
auto *V = getReplacement(GEP->getPointerOperand());
@@ -431,22 +489,12 @@ void PointerReplacer::replace(Instruction *I) {
431489
}
432490

433491
} else {
492+
dbgs() << "Instruction " << *I
493+
<< " is not supported in PointerReplacer::replace\n";
434494
llvm_unreachable("should never reach here");
435495
}
436496
}
437497

438-
void PointerReplacer::replacePointer(Value *V) {
439-
#ifndef NDEBUG
440-
auto *PT = cast<PointerType>(Root.getType());
441-
auto *NT = cast<PointerType>(V->getType());
442-
assert(PT != NT && "Invalid usage");
443-
#endif
444-
WorkMap[&Root] = V;
445-
446-
for (Instruction *Workitem : Worklist)
447-
replace(Workitem);
448-
}
449-
450498
Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
451499
if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
452500
return I;

llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S < %s | FileCheck %s
33

4+
; REQUIRES: asserts
5+
46
%struct.type = type { [256 x <2 x i64>] }
57
@g1 = external hidden addrspace(3) global %struct.type, align 16
68

9+
; This test requires the PtrReplacer to replace users in an RPO traversal.
10+
; Furthermore, %ptr.else need not to be replaced so it must be retained in
11+
; %ptr.sink.
712
define <2 x i64> @func(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
813
; CHECK-LABEL: define <2 x i64> @func(
914
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
1015
; CHECK-NEXT: [[ENTRY:.*:]]
11-
; CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_TYPE]], align 16, addrspace(5)
12-
; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 16 dereferenceable(4096) [[COERCE]], ptr addrspace(4) noundef align 16 dereferenceable(4096) [[TMP0]], i64 4096, i1 false)
1316
; CHECK-NEXT: br i1 [[CMP_0]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
1417
; CHECK: [[IF_THEN]]:
15-
; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
18+
; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
1619
; CHECK-NEXT: br label %[[SINK:.*]]
1720
; CHECK: [[IF_ELSE]]:
1821
; CHECK-NEXT: [[PTR_ELSE:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
@@ -43,5 +46,37 @@ sink:
4346
ret <2 x i64> %val.sink
4447
}
4548

49+
define <2 x i64> @func_phi_loop(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
50+
; CHECK-LABEL: define <2 x i64> @func_phi_loop(
51+
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
52+
; CHECK-NEXT: [[ENTRY:.*]]:
53+
; CHECK-NEXT: [[VAL_0:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
54+
; CHECK-NEXT: br label %[[LOOP:.*]]
55+
; CHECK: [[LOOP]]:
56+
; CHECK-NEXT: [[PTR_PHI_R:%.*]] = phi ptr [ [[PTR_1:%.*]], %[[LOOP]] ], [ [[VAL_0]], %[[ENTRY]] ]
57+
; CHECK-NEXT: [[PTR_1]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
58+
; CHECK-NEXT: br i1 [[CMP_0]], label %[[LOOP]], label %[[SINK:.*]]
59+
; CHECK: [[SINK]]:
60+
; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_PHI_R]], align 16
61+
; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]]
62+
;
63+
entry:
64+
%coerce = alloca %struct.type, align 16, addrspace(5)
65+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
66+
%ptr.0 = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
67+
%val.0 = addrspacecast ptr addrspace(5) %ptr.0 to ptr
68+
br label %loop
69+
70+
loop:
71+
%ptr.phi = phi ptr [ %val.1, %loop ], [ %val.0, %entry ]
72+
%ptr.1 = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
73+
%val.1 = getelementptr inbounds nuw i8, ptr %ptr.1, i64 0
74+
br i1 %cmp.0, label %loop, label %sink
75+
76+
sink:
77+
%val.sink = load <2 x i64>, ptr %ptr.phi, align 16
78+
ret <2 x i64> %val.sink
79+
}
80+
4681
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
4782
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0

0 commit comments

Comments
 (0)