Skip to content

Commit 2d3f74c

Browse files
committed
[InstCombine] Iterative replacement in PtrReplacer
This patch enhances the PtrReplacer as follows: 1. Users are now collected iteratively to be generous on the stack. In the case of PHIs with incoming values which have not yet been visited, they are pushed back into the stack for reconsideration. 2. Replace users of the pointer root in a reverse-postorder traversal, instead of a simple traversal over the collected users. This reordering ensures that the uses of an instruction are replaced before replacing the instruction itself. 3. During the replacement of PHI, use the same incoming value if it does not have a replacement. This patch specifically fixes the case when an incoming value of a PHI is addrspacecasted.
1 parent 98194fd commit 2d3f74c

File tree

2 files changed

+141
-73
lines changed

2 files changed

+141
-73
lines changed

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 105 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,10 @@ class PointerReplacer {
244244
void replacePointer(Value *V);
245245

246246
private:
247-
bool collectUsersRecursive(Instruction &I);
248247
void replace(Instruction *I);
249-
Value *getReplacement(Value *I);
248+
Value *getReplacement(Value *V) const { return WorkMap.lookup(V); }
250249
bool isAvailable(Instruction *I) const {
251-
return I == &Root || Worklist.contains(I);
250+
return I == &Root || UsersToReplace.contains(I);
252251
}
253252

254253
bool isEqualOrValidAddrSpaceCast(const Instruction *I,
@@ -260,8 +259,7 @@ class PointerReplacer {
260259
return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
261260
}
262261

263-
SmallPtrSet<Instruction *, 32> ValuesToRevisit;
264-
SmallSetVector<Instruction *, 4> Worklist;
262+
SmallSetVector<Instruction *, 4> UsersToReplace;
265263
MapVector<Value *, Value *> WorkMap;
266264
InstCombinerImpl &IC;
267265
Instruction &Root;
@@ -270,80 +268,125 @@ class PointerReplacer {
270268
} // end anonymous namespace
271269

272270
bool PointerReplacer::collectUsers() {
273-
if (!collectUsersRecursive(Root))
274-
return false;
275-
276-
// Ensure that all outstanding (indirect) users of I
277-
// are inserted into the Worklist. Return false
278-
// otherwise.
279-
return llvm::set_is_subset(ValuesToRevisit, Worklist);
280-
}
271+
SmallVector<Instruction *> Worklist;
272+
SmallSetVector<Instruction *, 4> ValuesToRevisit;
273+
274+
auto PushUsersToWorklist = [&](Instruction *Inst) {
275+
for (auto *U : Inst->users()) {
276+
if (auto *I = dyn_cast<Instruction>(U)) {
277+
if (!isAvailable(I) && !ValuesToRevisit.contains(I))
278+
Worklist.emplace_back(I);
279+
}
280+
}
281+
};
281282

282-
bool PointerReplacer::collectUsersRecursive(Instruction &I) {
283-
for (auto *U : I.users()) {
284-
auto *Inst = cast<Instruction>(&*U);
283+
PushUsersToWorklist(&Root);
284+
while (!Worklist.empty()) {
285+
Instruction *Inst = Worklist.pop_back_val();
286+
if (!Inst)
287+
return false;
288+
if (isAvailable(Inst))
289+
continue;
285290
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
286291
if (Load->isVolatile())
287292
return false;
288-
Worklist.insert(Load);
293+
/// FIXME: Handle poison and null pointers.
294+
UsersToReplace.insert(Load);
289295
} else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
290-
// All incoming values must be instructions for replacability
291-
if (any_of(PHI->incoming_values(),
292-
[](Value *V) { return !isa<Instruction>(V); }))
293-
return false;
294-
295-
// If at least one incoming value of the PHI is not in Worklist,
296-
// store the PHI for revisiting and skip this iteration of the
297-
// loop.
298-
if (any_of(PHI->incoming_values(), [this](Value *V) {
299-
return !isAvailable(cast<Instruction>(V));
296+
// If all incoming values are available, mark this PHI as
297+
// replacable and push it's users into the worklist.
298+
bool IsReplacable = true;
299+
if (all_of(PHI->incoming_values(), [&](Value *V) {
300+
if (!isa<Instruction>(V))
301+
return IsReplacable = false;
302+
return isAvailable(cast<Instruction>(V));
300303
})) {
301-
ValuesToRevisit.insert(Inst);
304+
UsersToReplace.insert(PHI);
305+
PushUsersToWorklist(PHI);
302306
continue;
303307
}
304308

305-
Worklist.insert(PHI);
306-
if (!collectUsersRecursive(*PHI))
307-
return false;
308-
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
309-
if (!isa<Instruction>(SI->getTrueValue()) ||
310-
!isa<Instruction>(SI->getFalseValue()))
309+
// Either an incoming value is not an instruction or not all
310+
// incoming values are available. If this PHI was already
311+
// visited prior to this iteration, return false.
312+
if (!IsReplacable || !ValuesToRevisit.insert(PHI))
311313
return false;
312314

313-
if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
314-
!isAvailable(cast<Instruction>(SI->getFalseValue()))) {
315-
ValuesToRevisit.insert(Inst);
316-
continue;
315+
// Push PHI back into the stack, followed by unavailable
316+
// incoming values.
317+
Worklist.emplace_back(PHI);
318+
for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
319+
auto *IncomingValue = cast<Instruction>(PHI->getIncomingValue(Idx));
320+
if (UsersToReplace.contains(IncomingValue))
321+
continue;
322+
if (!ValuesToRevisit.insert(IncomingValue))
323+
return false;
324+
Worklist.emplace_back(IncomingValue);
317325
}
318-
Worklist.insert(SI);
319-
if (!collectUsersRecursive(*SI))
320-
return false;
321-
} else if (isa<GetElementPtrInst>(Inst)) {
322-
Worklist.insert(Inst);
323-
if (!collectUsersRecursive(*Inst))
326+
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
327+
auto *TrueInst = dyn_cast<Instruction>(SI->getTrueValue());
328+
auto *FalseInst = dyn_cast<Instruction>(SI->getFalseValue());
329+
if (!TrueInst || !FalseInst)
324330
return false;
331+
332+
UsersToReplace.insert(SI);
333+
PushUsersToWorklist(SI);
334+
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
335+
UsersToReplace.insert(GEP);
336+
PushUsersToWorklist(GEP);
325337
} else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
326338
if (MI->isVolatile())
327339
return false;
328-
Worklist.insert(Inst);
340+
UsersToReplace.insert(Inst);
329341
} else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
330-
Worklist.insert(Inst);
331-
if (!collectUsersRecursive(*Inst))
332-
return false;
342+
UsersToReplace.insert(Inst);
343+
PushUsersToWorklist(Inst);
333344
} else if (Inst->isLifetimeStartOrEnd()) {
334345
continue;
335346
} else {
336347
// TODO: For arbitrary uses with address space mismatches, should we check
337348
// if we can introduce a valid addrspacecast?
338-
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
349+
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *Inst << '\n');
339350
return false;
340351
}
341352
}
342353

343-
return true;
354+
return llvm::set_is_subset(ValuesToRevisit, UsersToReplace);
344355
}
345356

346-
Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
357+
void PointerReplacer::replacePointer(Value *V) {
358+
assert(cast<PointerType>(Root.getType()) != cast<PointerType>(V->getType()) &&
359+
"Invalid usage");
360+
WorkMap[&Root] = V;
361+
SmallVector<Instruction *> Worklist;
362+
SetVector<Instruction *> PostOrderWorklist;
363+
SmallPtrSet<Instruction *, 4> Visited;
364+
365+
// Perform a postorder traversal of the users of Root.
366+
Worklist.push_back(&Root);
367+
while (!Worklist.empty()) {
368+
Instruction *I = Worklist.back();
369+
370+
// If I has not been processed before, push each of its
371+
// replacable users into the worklist.
372+
if (Visited.insert(I).second) {
373+
for (auto *U : I->users()) {
374+
auto *UserInst = cast<Instruction>(U);
375+
if (UsersToReplace.contains(UserInst))
376+
Worklist.push_back(UserInst);
377+
}
378+
// Otherwise, users of I have already been pushed into
379+
// the PostOrderWorklist. Push I as well.
380+
} else {
381+
PostOrderWorklist.insert(I);
382+
Worklist.pop_back();
383+
}
384+
}
385+
386+
// Replace pointers in reverse-postorder.
387+
for (Instruction *I : reverse(PostOrderWorklist))
388+
replace(I);
389+
}
347390

348391
void PointerReplacer::replace(Instruction *I) {
349392
if (getReplacement(I))
@@ -365,13 +408,18 @@ void PointerReplacer::replace(Instruction *I) {
365408
// replacement (new value).
366409
WorkMap[NewI] = NewI;
367410
} else if (auto *PHI = dyn_cast<PHINode>(I)) {
368-
Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
369-
auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
370-
PHI->getName(), PHI->getIterator());
411+
// Create a new PHI by replacing any incoming value that is a user of the
412+
// root pointer and has a replacement.
413+
auto GetReplacementForInValue = [&](Value *V) {
414+
return WorkMap[V] ? WorkMap[V] : V;
415+
};
416+
417+
PHI->mutateType(
418+
GetReplacementForInValue(PHI->getIncomingValue(0))->getType());
371419
for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
372-
NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
373-
PHI->getIncomingBlock(I));
374-
WorkMap[PHI] = NewPHI;
420+
PHI->setIncomingValue(I,
421+
GetReplacementForInValue(PHI->getIncomingValue(I)));
422+
WorkMap[PHI] = PHI;
375423
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
376424
auto *V = getReplacement(GEP->getPointerOperand());
377425
assert(V && "Operand not replaced");
@@ -435,18 +483,6 @@ void PointerReplacer::replace(Instruction *I) {
435483
}
436484
}
437485

438-
void PointerReplacer::replacePointer(Value *V) {
439-
#ifndef NDEBUG
440-
auto *PT = cast<PointerType>(Root.getType());
441-
auto *NT = cast<PointerType>(V->getType());
442-
assert(PT != NT && "Invalid usage");
443-
#endif
444-
WorkMap[&Root] = V;
445-
446-
for (Instruction *Workitem : Worklist)
447-
replace(Workitem);
448-
}
449-
450486
Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
451487
if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
452488
return I;

llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
%struct.type = type { [256 x <2 x i64>] }
55
@g1 = external hidden addrspace(3) global %struct.type, align 16
66

7+
; This test requires the PtrReplacer to replace users in an RPO traversal.
8+
; Furthermore, %ptr.else need not to be replaced so it must be retained in
9+
; %ptr.sink.
710
define <2 x i64> @func(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
811
; CHECK-LABEL: define <2 x i64> @func(
912
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
1013
; CHECK-NEXT: [[ENTRY:.*:]]
11-
; CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_TYPE]], align 16, addrspace(5)
12-
; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 16 dereferenceable(4096) [[COERCE]], ptr addrspace(4) noundef align 16 dereferenceable(4096) [[TMP0]], i64 4096, i1 false)
1314
; CHECK-NEXT: br i1 [[CMP_0]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
1415
; CHECK: [[IF_THEN]]:
15-
; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
16+
; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
1617
; CHECK-NEXT: br label %[[SINK:.*]]
1718
; CHECK: [[IF_ELSE]]:
1819
; CHECK-NEXT: [[PTR_ELSE:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
@@ -43,5 +44,36 @@ sink:
4344
ret <2 x i64> %val.sink
4445
}
4546

46-
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
47+
define <2 x i64> @func_phi_loop(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
48+
; CHECK-LABEL: define <2 x i64> @func_phi_loop(
49+
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
50+
; CHECK-NEXT: [[ENTRY:.*]]:
51+
; CHECK-NEXT: [[VAL_0:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
52+
; CHECK-NEXT: br label %[[LOOP:.*]]
53+
; CHECK: [[LOOP]]:
54+
; CHECK-NEXT: [[PTR_PHI_R:%.*]] = phi ptr [ [[PTR_1:%.*]], %[[LOOP]] ], [ [[VAL_0]], %[[ENTRY]] ]
55+
; CHECK-NEXT: [[PTR_1]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
56+
; CHECK-NEXT: br i1 [[CMP_0]], label %[[LOOP]], label %[[SINK:.*]]
57+
; CHECK: [[SINK]]:
58+
; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_PHI_R]], align 16
59+
; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]]
60+
;
61+
entry:
62+
%coerce = alloca %struct.type, align 16, addrspace(5)
63+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
64+
%ptr.0 = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
65+
%val.0 = addrspacecast ptr addrspace(5) %ptr.0 to ptr
66+
br label %loop
67+
68+
loop:
69+
%ptr.phi = phi ptr [ %val.1, %loop ], [ %val.0, %entry ]
70+
%ptr.1 = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
71+
%val.1 = getelementptr inbounds nuw i8, ptr %ptr.1, i64 0
72+
br i1 %cmp.0, label %loop, label %sink
73+
74+
sink:
75+
%val.sink = load <2 x i64>, ptr %ptr.phi, align 16
76+
ret <2 x i64> %val.sink
77+
}
78+
4779
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0

0 commit comments

Comments
 (0)