Skip to content

Commit a61f9fe

Browse files
[Inline][Cloning] Defer simplification after phi-nodes resolution
A logic issue arose when inlining via `CloneAndPruneFunctionInto`, which, besides cloning, performs instruction simplification as well. By the time a new cloned instruction is being simplified, phi-nodes are not remapped yet as the whole CFG needs to be processed first. As `VMap` state at this stage is incomplete, `threadCmpOverPHI` and variants could lead to unsound optimizations. This issue has been addressed by performing basic constant folding while cloning, and postponing instruction simplification once phi-nodes are revisited. Fixes: #87534.
1 parent c1d0051 commit a61f9fe

File tree

4 files changed

+39
-61
lines changed

4 files changed

+39
-61
lines changed

llvm/lib/Transforms/Utils/CloneFunction.cpp

Lines changed: 35 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "llvm/ADT/SetVector.h"
1616
#include "llvm/ADT/SmallVector.h"
17+
#include "llvm/Analysis/ConstantFolding.h"
1718
#include "llvm/Analysis/DomTreeUpdater.h"
1819
#include "llvm/Analysis/InstructionSimplify.h"
1920
#include "llvm/Analysis/LoopInfo.h"
@@ -540,18 +541,13 @@ void PruningFunctionCloner::CloneBlock(
540541
RemapInstruction(NewInst, VMap,
541542
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
542543

543-
// If we can simplify this instruction to some other value, simply add
544-
// a mapping to that value rather than inserting a new instruction into
545-
// the basic block.
546-
if (Value *V =
547-
simplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
548-
// On the off-chance that this simplifies to an instruction in the old
549-
// function, map it back into the new function.
550-
if (NewFunc != OldFunc)
551-
if (Value *MappedV = VMap.lookup(V))
552-
V = MappedV;
553-
554-
if (!NewInst->mayHaveSideEffects()) {
544+
// Eagerly constant fold the newly cloned instruction. If successful, add
545+
// a mapping to the new value. Non-constant operands may be incomplete at
546+
// this stage, thus instruction simplification is performed after
547+
// processing phi-nodes.
548+
if (Value *V = ConstantFoldInstruction(
549+
NewInst, BB->getModule()->getDataLayout())) {
550+
if (isInstructionTriviallyDead(NewInst)) {
555551
VMap[&*II] = V;
556552
NewInst->eraseFromParent();
557553
continue;
@@ -823,52 +819,34 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
823819
}
824820
}
825821

826-
// Make a second pass over the PHINodes now that all of them have been
827-
// remapped into the new function, simplifying the PHINode and performing any
828-
// recursive simplifications exposed. This will transparently update the
829-
// WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
830-
// two PHINodes, the iteration over the old PHIs remains valid, and the
831-
// mapping will just map us to the new node (which may not even be a PHI
832-
// node).
822+
// As phi-nodes have been now remapped, allow incremental simplification of
823+
// newly-cloned instructions.
833824
const DataLayout &DL = NewFunc->getParent()->getDataLayout();
834-
SmallSetVector<const Value *, 8> Worklist;
835-
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
836-
if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
837-
Worklist.insert(PHIToResolve[Idx]);
838-
839-
// Note that we must test the size on each iteration, the worklist can grow.
840-
for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
841-
const Value *OrigV = Worklist[Idx];
842-
auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
843-
if (!I)
844-
continue;
845-
846-
// Skip over non-intrinsic callsites, we don't want to remove any nodes from
847-
// the CGSCC.
848-
CallBase *CB = dyn_cast<CallBase>(I);
849-
if (CB && CB->getCalledFunction() &&
850-
!CB->getCalledFunction()->isIntrinsic())
851-
continue;
852-
853-
// See if this instruction simplifies.
854-
Value *SimpleV = simplifyInstruction(I, DL);
855-
if (!SimpleV)
856-
continue;
857-
858-
// Stash away all the uses of the old instruction so we can check them for
859-
// recursive simplifications after a RAUW. This is cheaper than checking all
860-
// uses of To on the recursive step in most cases.
861-
for (const User *U : OrigV->users())
862-
Worklist.insert(cast<Instruction>(U));
863-
864-
// Replace the instruction with its simplified value.
865-
I->replaceAllUsesWith(SimpleV);
866-
867-
// If the original instruction had no side effects, remove it.
868-
if (isInstructionTriviallyDead(I))
869-
I->eraseFromParent();
870-
else
871-
VMap[OrigV] = I;
825+
for (const auto &BB : *OldFunc) {
826+
for (const auto &I : BB) {
827+
auto *NewI = dyn_cast_or_null<Instruction>(VMap.lookup(&I));
828+
if (!NewI)
829+
continue;
830+
831+
// Skip over non-intrinsic callsites, we don't want to remove any nodes
832+
// from the CGSCC.
833+
CallBase *CB = dyn_cast<CallBase>(NewI);
834+
if (CB && CB->getCalledFunction() &&
835+
!CB->getCalledFunction()->isIntrinsic())
836+
continue;
837+
838+
if (Value *V = simplifyInstruction(NewI, DL)) {
839+
NewI->replaceAllUsesWith(V);
840+
841+
if (isInstructionTriviallyDead(NewI)) {
842+
NewI->eraseFromParent();
843+
} else {
844+
// Did not erase it? Restore the new instruction into VMap previously
845+
// dropped by `ValueIsRAUWd`.
846+
VMap[&I] = NewI;
847+
}
848+
}
849+
}
872850
}
873851

874852
// Remap debug intrinsic operands now that all values have been mapped.

llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ store_ptr_in_gvar: ; preds = %entry
3838

3939
check_pointers_are_equal: ; preds = %store_ptr_in_gvar, %entry
4040
%phi = phi ptr [ %ptr, %store_ptr_in_gvar ], [ @other_g_var, %entry ]
41-
; FIXME: While inlining, the following is miscompiled to i1 false,
42-
; as %ptr in the phi-node is not taken into account.
4341
%.not1 = icmp eq ptr %phi, %ptr
4442
br i1 %.not1, label %return, label %abort
4543

@@ -64,9 +62,13 @@ define i32 @main() {
6462
; CHECK-NEXT: br label [[CHECK_POINTERS_ARE_EQUAL_I]]
6563
; CHECK: check_pointers_are_equal.i:
6664
; CHECK-NEXT: [[PHI_I:%.*]] = phi ptr [ [[G_VAR]], [[STORE_PTR_IN_GVAR_I]] ], [ @other_g_var, [[TMP0:%.*]] ]
65+
; CHECK-NEXT: [[DOTNOT1_I:%.*]] = icmp eq ptr [[PHI_I]], [[G_VAR]]
66+
; CHECK-NEXT: br i1 [[DOTNOT1_I]], label [[CALLEE_EXIT:%.*]], label [[ABORT_I:%.*]]
67+
; CHECK: abort.i:
6768
; CHECK-NEXT: call void @abort()
6869
; CHECK-NEXT: unreachable
6970
; CHECK: callee.exit:
71+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr [[G_VAR]])
7072
; CHECK-NEXT: ret i32 0
7173
;
7274
call void @callee(ptr noundef byval(%struct.a) align 8 @g_var)

llvm/test/Transforms/Inline/prof-update-sample-alwaysinline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ define void @caller() {
5353
!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
5454
attributes #0 = { alwaysinline }
5555
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
56-
; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i32 2000}
5756
; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
5857
; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
5958
; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}

llvm/test/Transforms/Inline/prof-update-sample.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ define void @caller() {
5252
!17 = !{!"branch_weights", i32 400}
5353
!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
5454
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
55-
; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i32 2000}
5655
; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i32 1200}
5756
; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12}
5857
; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i32 800}

0 commit comments

Comments
 (0)