Skip to content

Commit 1c8add1

Browse files
authored
[analyzer] Add hack in ArrayBound to cover up missing casts (#127117)
Currently there are many casts that are not modeled (i.e. ignored) by the analyzer, which can cause paradox states (e.g. negative value stored in `unsigned` variable) and false positive reports from various checkers, e.g. from `security.ArrayBound`. Unfortunately this issue is deeply rooted in the architectural limitations of the analyzer (if we started to model the casts, it would break other things). For details see the umbrella ticket #39492 This commit adds an ugly hack in `security.ArrayBound` to silence most of the false positives caused by this shortcoming of the engine. Fixes #126884
1 parent 7c03865 commit 1c8add1

File tree

2 files changed

+87
-34
lines changed

2 files changed

+87
-34
lines changed

clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp

Lines changed: 75 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,37 @@ using namespace taint;
3434
using llvm::formatv;
3535

3636
namespace {
37-
/// If `E` is a "clean" array subscript expression, return the type of the
38-
/// accessed element. If the base of the subscript expression is modified by
39-
/// pointer arithmetic (and not the beginning of a "full" memory region), this
40-
/// always returns nullopt because that's the right (or the least bad) thing to
41-
/// do for the diagnostic output that's relying on this.
42-
static std::optional<QualType> determineElementType(const Expr *E,
43-
const CheckerContext &C) {
37+
/// If `E` is an array subscript expression with a base that is "clean" (= not
38+
/// modified by pointer arithmetic = the beginning of a memory region), return
39+
/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
40+
/// This helper function is used by two separate heuristics that are only valid
41+
/// in these "clean" cases.
42+
static const ArraySubscriptExpr *
43+
getAsCleanArraySubscriptExpr(const Expr *E, const CheckerContext &C) {
4444
const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
4545
if (!ASE)
46-
return std::nullopt;
46+
return nullptr;
4747

4848
const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
4949
if (!SubscriptBaseReg)
50-
return std::nullopt;
50+
return nullptr;
5151

5252
// The base of the subscript expression is affected by pointer arithmetics,
53-
// so we want to report byte offsets instead of indices.
53+
// so we want to report byte offsets instead of indices and we don't want to
54+
// activate the "index is unsigned -> cannot be negative" shortcut.
5455
if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))
56+
return nullptr;
57+
58+
return ASE;
59+
}
60+
61+
/// If `E` is a "clean" array subscript expression, return the type of the
62+
/// accessed element; otherwise return std::nullopt because that's the best (or
63+
/// least bad) option for the diagnostic generation that relies on this.
64+
static std::optional<QualType> determineElementType(const Expr *E,
65+
const CheckerContext &C) {
66+
const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
67+
if (!ASE)
5568
return std::nullopt;
5669

5770
return ASE->getType();
@@ -140,7 +153,9 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
140153
ProgramStateRef ErrorState, NonLoc Val,
141154
bool MarkTaint);
142155

143-
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
156+
static bool isFromCtypeMacro(const Expr *E, ASTContext &AC);
157+
158+
static bool isOffsetObviouslyNonnegative(const Expr *E, CheckerContext &C);
144159

145160
static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
146161
NonLoc Offset, NonLoc Limit,
@@ -587,20 +602,48 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
587602
State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
588603

589604
if (PrecedesLowerBound) {
590-
// The offset may be invalid (negative)...
591-
if (!WithinLowerBound) {
592-
// ...and it cannot be valid (>= 0), so report an error.
593-
Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
594-
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
595-
return;
605+
// The analyzer thinks that the offset may be invalid (negative)...
606+
607+
if (isOffsetObviouslyNonnegative(E, C)) {
608+
// ...but the offset is obviously non-negative (clear array subscript
609+
// with an unsigned index), so we're in a buggy situation.
610+
611+
// TODO: Currently the analyzer ignores many casts (e.g. signed ->
612+
// unsigned casts), so it can easily reach states where it will load a
613+
// signed (and negative) value from an unsigned variable. This sanity
614+
// check is a duct tape "solution" that silences most of the ugly false
615+
// positives that are caused by this buggy behavior. Note that this is
616+
// not a complete solution: this cannot silence reports where pointer
617+
// arithmetic complicates the picture and cannot ensure modeling of the
618+
// "unsigned index is positive with highest bit set" cases which are
619+
// "usurped" by the nonsense "unsigned index is negative" case.
620+
// For more information about this topic, see the umbrella ticket
621+
// https://github.com/llvm/llvm-project/issues/39492
622+
// TODO: Remove this hack once 'SymbolCast's are modeled properly.
623+
624+
if (!WithinLowerBound) {
625+
// The state is completely nonsense -- let's just sink it!
626+
C.addSink();
627+
return;
628+
}
629+
// Otherwise continue on the 'WithinLowerBound' branch where the
630+
// unsigned index _is_ non-negative. Don't mention this assumption as a
631+
// note tag, because it would just confuse the users!
632+
} else {
633+
if (!WithinLowerBound) {
634+
// ...and it cannot be valid (>= 0), so report an error.
635+
Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
636+
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
637+
return;
638+
}
639+
// ...but it can be valid as well, so the checker will (optimistically)
640+
// assume that it's valid and mention this in the note tag.
641+
SUR.recordNonNegativeAssumption();
596642
}
597-
// ...but it can be valid as well, so the checker will (optimistically)
598-
// assume that it's valid and mention this in the note tag.
599-
SUR.recordNonNegativeAssumption();
600643
}
601644

602645
// Actually update the state. The "if" only fails in the extremely unlikely
603-
// case when compareValueToThreshold returns {nullptr, nullptr} becasue
646+
// case when compareValueToThreshold returns {nullptr, nullptr} because
604647
// evalBinOpNN fails to evaluate the less-than operator.
605648
if (WithinLowerBound)
606649
State = WithinLowerBound;
@@ -660,7 +703,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
660703
}
661704

662705
// Actually update the state. The "if" only fails in the extremely unlikely
663-
// case when compareValueToThreshold returns {nullptr, nullptr} becasue
706+
// case when compareValueToThreshold returns {nullptr, nullptr} because
664707
// evalBinOpNN fails to evaluate the less-than operator.
665708
if (WithinUpperBound)
666709
State = WithinUpperBound;
@@ -725,8 +768,8 @@ void ArrayBoundChecker::reportOOB(CheckerContext &C, ProgramStateRef ErrorState,
725768
C.emitReport(std::move(BR));
726769
}
727770

728-
bool ArrayBoundChecker::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
729-
SourceLocation Loc = S->getBeginLoc();
771+
bool ArrayBoundChecker::isFromCtypeMacro(const Expr *E, ASTContext &ACtx) {
772+
SourceLocation Loc = E->getBeginLoc();
730773
if (!Loc.isMacroID())
731774
return false;
732775

@@ -744,6 +787,14 @@ bool ArrayBoundChecker::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
744787
(MacroName == "isupper") || (MacroName == "isxdigit"));
745788
}
746789

790+
bool ArrayBoundChecker::isOffsetObviouslyNonnegative(const Expr *E,
791+
CheckerContext &C) {
792+
const ArraySubscriptExpr *ASE = getAsCleanArraySubscriptExpr(E, C);
793+
if (!ASE)
794+
return false;
795+
return ASE->getIdx()->getType()->isUnsignedIntegerOrEnumerationType();
796+
}
797+
747798
bool ArrayBoundChecker::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
748799
ParentMapContext &ParentCtx = ACtx.getParentMapContext();
749800
do {

clang/test/Analysis/out-of-bounds.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -188,29 +188,31 @@ int test_cast_to_unsigned(signed char x) {
188188
if (x >= 0)
189189
return x;
190190
// FIXME: Here the analyzer ignores the signed -> unsigned cast, and manages to
191-
// load a negative value from an unsigned variable. This causes an underflow
192-
// report, which is an ugly false positive.
191+
// load a negative value from an unsigned variable.
193192
// The underlying issue is tracked by Github ticket #39492.
194193
clang_analyzer_value(y); // expected-warning {{8s:{ [-128, -1] } }}
195-
return table[y]; // expected-warning {{Out of bound access to memory preceding}}
194+
// However, a hack in the ArrayBound checker suppresses the false positive
195+
// underflow report that would be generated here.
196+
return table[y]; // no-warning
196197
}
197198

198199
int test_cast_to_unsigned_overflow(signed char x) {
199200
unsigned char y = x;
200201
if (x >= 0)
201202
return x;
202-
// A variant of 'test_cast_to_unsigned' where the correct behavior would be
203-
// an overflow report (because the negative values are cast to `unsigned
204-
// char` values that are too large).
205-
// FIXME: See comment in 'test_cast_to_unsigned'.
203+
// FIXME: As in 'test_cast_to_unsigned', the analyzer thinks that this
204+
// unsigned variable contains a negative value.
206205
clang_analyzer_value(y); // expected-warning {{8s:{ [-128, -1] } }}
207-
return small_table[y]; // expected-warning {{Out of bound access to memory preceding}}
206+
// FIXME: The following subscript expression should produce an overflow
207+
// report (because negative signed char corresponds to unsigned char >= 128);
208+
// but the hack in ArrayBound just silences reports and cannot "restore" the
209+
// real execution paths.
210+
return small_table[y]; // no-warning
208211
}
209212

210213
int test_negative_offset_with_unsigned_idx(void) {
211214
// An example where the subscript operator uses an unsigned index, but the
212-
// underflow report is still justified. (We should try to keep this if we
213-
// silence false positives like the one in 'test_cast_to_unsigned'.)
215+
// underflow report is still justified.
214216
int *p = table - 10;
215217
unsigned idx = 2u;
216218
return p[idx]; // expected-warning {{Out of bound access to memory preceding}}

0 commit comments

Comments
 (0)