-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ObjectSizeOffsetVisitor] Bail after visiting 100 instructions #67479
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,7 @@ | |
#include "llvm/IR/Type.h" | ||
#include "llvm/IR/Value.h" | ||
#include "llvm/Support/Casting.h" | ||
#include "llvm/Support/CommandLine.h" | ||
#include "llvm/Support/Debug.h" | ||
#include "llvm/Support/MathExtras.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
|
@@ -50,6 +51,12 @@ using namespace llvm; | |
|
||
#define DEBUG_TYPE "memory-builtins" | ||
|
||
static cl::opt<unsigned> ObjectSizeOffsetVisitorMaxVisitInstructions( | ||
"object-size-offset-visitor-max-visit-instructions", | ||
cl::desc("Maximum number of instructions for ObjectSizeOffsetVisitor to " | ||
"look at"), | ||
cl::init(100)); | ||
|
||
enum AllocType : uint8_t { | ||
OpNewLike = 1<<0, // allocates; never returns null | ||
MallocLike = 1<<1, // allocates; may return null | ||
|
@@ -694,6 +701,11 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL, | |
} | ||
|
||
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { | ||
InstructionsVisited = 0; | ||
return computeImpl(V); | ||
} | ||
|
||
SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) { | ||
unsigned InitialIntTyBits = DL.getIndexTypeSizeInBits(V->getType()); | ||
|
||
// Stripping pointer casts can strip address space casts which can change the | ||
|
@@ -710,14 +722,15 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { | |
IntTyBits = DL.getIndexTypeSizeInBits(V->getType()); | ||
Zero = APInt::getZero(IntTyBits); | ||
|
||
SizeOffsetType SOT = computeValue(V); | ||
|
||
bool IndexTypeSizeChanged = InitialIntTyBits != IntTyBits; | ||
if (!IndexTypeSizeChanged && Offset.isZero()) | ||
return computeImpl(V); | ||
return SOT; | ||
|
||
// We stripped an address space cast that changed the index type size or we | ||
// accumulated some constant offset (or both). Readjust the bit width to match | ||
// the argument index type size and apply the offset, as required. | ||
SizeOffsetType SOT = computeImpl(V); | ||
if (IndexTypeSizeChanged) { | ||
if (knownSize(SOT) && !::CheckedZextOrTrunc(SOT.first, InitialIntTyBits)) | ||
SOT.first = APInt(); | ||
|
@@ -729,13 +742,16 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { | |
SOT.second.getBitWidth() > 1 ? SOT.second + Offset : SOT.second}; | ||
} | ||
|
||
SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) { | ||
SizeOffsetType ObjectSizeOffsetVisitor::computeValue(Value *V) { | ||
if (Instruction *I = dyn_cast<Instruction>(V)) { | ||
// If we have already seen this instruction, bail out. Cycles can happen in | ||
// unreachable code after constant propagation. | ||
auto P = SeenInsts.try_emplace(I, unknown()); | ||
if (!P.second) | ||
return P.first->second; | ||
++InstructionsVisited; | ||
if (InstructionsVisited > ObjectSizeOffsetVisitorMaxVisitInstructions) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. I was not aware that ObjectSizeOffsetVisitor is designed for reuse, but apparently AddressSanitizer does use it that way. Possibly we should be clearing SeenInsts for each call as well... |
||
return unknown(); | ||
SizeOffsetType Res = visit(*I); | ||
// Cache the result for later visits. If we happened to visit this during | ||
// the above recursion, we would consider it unknown until now. | ||
|
@@ -830,7 +846,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { | |
SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) { | ||
if (GA.isInterposable()) | ||
return unknown(); | ||
return compute(GA.getAliasee()); | ||
return computeImpl(GA.getAliasee()); | ||
} | ||
|
||
SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ | ||
|
@@ -885,7 +901,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::findLoadSizeOffset( | |
continue; | ||
case AliasResult::MustAlias: | ||
if (SI->getValueOperand()->getType()->isPointerTy()) | ||
return Known(compute(SI->getValueOperand())); | ||
return Known(computeImpl(SI->getValueOperand())); | ||
else | ||
return Unknown(); // No handling of non-pointer values by `compute`. | ||
default: | ||
|
@@ -998,15 +1014,15 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PN) { | |
return unknown(); | ||
auto IncomingValues = PN.incoming_values(); | ||
return std::accumulate(IncomingValues.begin() + 1, IncomingValues.end(), | ||
compute(*IncomingValues.begin()), | ||
computeImpl(*IncomingValues.begin()), | ||
[this](SizeOffsetType LHS, Value *VRHS) { | ||
return combineSizeOffset(LHS, compute(VRHS)); | ||
return combineSizeOffset(LHS, computeImpl(VRHS)); | ||
}); | ||
} | ||
|
||
SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { | ||
return combineSizeOffset(compute(I.getTrueValue()), | ||
compute(I.getFalseValue())); | ||
return combineSizeOffset(computeImpl(I.getTrueValue()), | ||
computeImpl(I.getFalseValue())); | ||
} | ||
|
||
SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 | ||
; RUN: opt -passes=dse -S -object-size-offset-visitor-max-visit-instructions=2 < %s | FileCheck %s --check-prefix=NO | ||
; RUN: opt -passes=dse -S -object-size-offset-visitor-max-visit-instructions=3 < %s | FileCheck %s --check-prefix=YES | ||
|
||
declare void @use(ptr) | ||
|
||
define void @f(i32 %i, i1 %c) { | ||
; NO-LABEL: define void @f( | ||
; NO-SAME: i32 [[I:%.*]], i1 [[C:%.*]]) { | ||
; NO-NEXT: b0: | ||
; NO-NEXT: [[A1:%.*]] = alloca i32, align 4 | ||
; NO-NEXT: [[A2:%.*]] = alloca i32, align 4 | ||
; NO-NEXT: br i1 [[C]], label [[B1:%.*]], label [[B2:%.*]] | ||
; NO: b1: | ||
; NO-NEXT: br label [[B2]] | ||
; NO: b2: | ||
; NO-NEXT: [[A5:%.*]] = phi ptr [ [[A1]], [[B0:%.*]] ], [ [[A2]], [[B1]] ] | ||
; NO-NEXT: [[G:%.*]] = getelementptr i8, ptr [[A5]], i32 [[I]] | ||
; NO-NEXT: store i8 1, ptr [[G]], align 1 | ||
; NO-NEXT: store i32 0, ptr [[A5]], align 4 | ||
; NO-NEXT: call void @use(ptr [[A5]]) | ||
; NO-NEXT: ret void | ||
; | ||
; YES-LABEL: define void @f( | ||
; YES-SAME: i32 [[I:%.*]], i1 [[C:%.*]]) { | ||
; YES-NEXT: b0: | ||
; YES-NEXT: [[A1:%.*]] = alloca i32, align 4 | ||
; YES-NEXT: [[A2:%.*]] = alloca i32, align 4 | ||
; YES-NEXT: br i1 [[C]], label [[B1:%.*]], label [[B2:%.*]] | ||
; YES: b1: | ||
; YES-NEXT: br label [[B2]] | ||
; YES: b2: | ||
; YES-NEXT: [[A5:%.*]] = phi ptr [ [[A1]], [[B0:%.*]] ], [ [[A2]], [[B1]] ] | ||
; YES-NEXT: store i32 0, ptr [[A5]], align 4 | ||
; YES-NEXT: call void @use(ptr [[A5]]) | ||
; YES-NEXT: ret void | ||
; | ||
b0: | ||
%a1 = alloca i32 | ||
%a2 = alloca i32 | ||
br i1 %c, label %b1, label %b2 | ||
b1: | ||
br label %b2 | ||
b2: | ||
%a5 = phi ptr [ %a1, %b0 ], [ %a2, %b1 ] | ||
%g = getelementptr i8, ptr %a5, i32 %i | ||
store i8 1, ptr %g | ||
store i32 0, ptr %a5 | ||
call void @use(ptr %a5) | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should either not be a depth (but total visited count) or be a lot smaller. Recursing 100 levels allows for a lot of fan-out. Especially as this is only counting the phis and not other instructions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lowered to 20.
I think it's unlikely that we hit huge fanout in real world code, it's typically pretty linear at least with large autogenerated code. In the case I was looking at, this dropped DSE time from >30s to <1s.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess the fact that values don't get revisited does avoid large fan-out in practice.
Huh, that sounds like a bug. We usually don't query objectsize on non-root objects, and DSE probably shouldn't do that either.
Somewhat expensive objectsize analysis is okay for actual llvm.objectsize intrinsics, but not for generic passes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've adjusted the call in DSE to check for an identified object first: 7aab12e