12
12
// ===----------------------------------------------------------------------===//
13
13
14
14
#include " clang/AST/CharUnits.h"
15
+ #include " clang/AST/ParentMapContext.h"
15
16
#include " clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
16
17
#include " clang/StaticAnalyzer/Checkers/Taint.h"
17
18
#include " clang/StaticAnalyzer/Core/BugReporter/BugType.h"
@@ -34,20 +35,46 @@ using llvm::formatv;
34
35
namespace {
35
36
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
36
37
37
- class ArrayBoundCheckerV2 :
38
- public Checker<check::Location> {
38
+ struct Messages {
39
+ std::string Short, Full;
40
+ };
41
+
42
+ // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
43
+ // instead of `PreStmt` because the current implementation passes the whole
44
+ // expression to `CheckerContext::getSVal()` which only works after the
45
+ // symbolic evaluation of the expression. (To turn them into `PreStmt`
46
+ // callbacks, we'd need to duplicate the logic that evaluates these
47
+ // expressions.) The `MemberExpr` callback would work as `PreStmt` but it's
48
+ // defined as `PostStmt` for the sake of consistency with the other callbacks.
49
+ class ArrayBoundCheckerV2 : public Checker <check::PostStmt<ArraySubscriptExpr>,
50
+ check::PostStmt<UnaryOperator>,
51
+ check::PostStmt<MemberExpr>> {
39
52
BugType BT{this , " Out-of-bound access" };
40
53
BugType TaintBT{this , " Out-of-bound access" , categories::TaintedData};
41
54
55
+ void performCheck (const Expr *E, CheckerContext &C) const ;
56
+
42
57
void reportOOB (CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
43
- NonLoc Offset, std::string RegName, std::string Msg ) const ;
58
+ NonLoc Offset, Messages Msgs ) const ;
44
59
45
60
static bool isFromCtypeMacro (const Stmt *S, ASTContext &AC);
46
61
62
+ static bool isInAddressOf (const Stmt *S, ASTContext &AC);
63
+
47
64
public:
48
- void checkLocation (SVal l, bool isLoad, const Stmt *S,
49
- CheckerContext &C) const ;
65
+ void checkPostStmt (const ArraySubscriptExpr *E, CheckerContext &C) const {
66
+ performCheck (E, C);
67
+ }
68
+ void checkPostStmt (const UnaryOperator *E, CheckerContext &C) const {
69
+ if (E->getOpcode () == UO_Deref)
70
+ performCheck (E, C);
71
+ }
72
+ void checkPostStmt (const MemberExpr *E, CheckerContext &C) const {
73
+ if (E->isArrow ())
74
+ performCheck (E->getBase (), C);
75
+ }
50
76
};
77
+
51
78
} // anonymous namespace
52
79
53
80
// / For a given Location that can be represented as a symbolic expression
@@ -149,9 +176,11 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
149
176
// where the first one corresponds to "value below threshold" and the second
150
177
// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
151
178
// the case when the evaluation fails.
179
+ // If the optional argument CheckEquality is true, then use BO_EQ instead of
180
+ // the default BO_LT after consistently applying the same simplification steps.
152
181
static std::pair<ProgramStateRef, ProgramStateRef>
153
182
compareValueToThreshold (ProgramStateRef State, NonLoc Value, NonLoc Threshold,
154
- SValBuilder &SVB) {
183
+ SValBuilder &SVB, bool CheckEquality = false ) {
155
184
if (auto ConcreteThreshold = Threshold.getAs <nonloc::ConcreteInt>()) {
156
185
std::tie (Value, Threshold) = getSimplifiedOffsets (Value, *ConcreteThreshold, SVB);
157
186
}
@@ -167,8 +196,10 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
167
196
return {nullptr , State};
168
197
}
169
198
}
199
+ const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;
170
200
auto BelowThreshold =
171
- SVB.evalBinOpNN (State, BO_LT, Value, Threshold, SVB.getConditionType ()).getAs <NonLoc>();
201
+ SVB.evalBinOpNN (State, OpKind, Value, Threshold, SVB.getConditionType ())
202
+ .getAs <NonLoc>();
172
203
173
204
if (BelowThreshold)
174
205
return State->assume (*BelowThreshold);
@@ -217,16 +248,19 @@ static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
217
248
return formatv (ShortMsgTemplates[Kind], RegName);
218
249
}
219
250
220
- static std::string getPrecedesMsg (std::string RegName, NonLoc Offset) {
251
+ static Messages getPrecedesMsgs (const SubRegion *Region, NonLoc Offset) {
252
+ std::string RegName = getRegionName (Region);
221
253
SmallString<128 > Buf;
222
254
llvm::raw_svector_ostream Out (Buf);
223
255
Out << " Access of " << RegName << " at negative byte offset" ;
224
256
if (auto ConcreteIdx = Offset.getAs <nonloc::ConcreteInt>())
225
257
Out << ' ' << ConcreteIdx->getValue ();
226
- return std::string (Buf);
258
+ return { getShortMsg (OOB_Precedes, RegName), std::string (Buf)} ;
227
259
}
228
- static std::string getExceedsMsg (ASTContext &ACtx, std::string RegName,
229
- NonLoc Offset, NonLoc Extent, SVal Location) {
260
+
261
+ static Messages getExceedsMsgs (ASTContext &ACtx, const SubRegion *Region,
262
+ NonLoc Offset, NonLoc Extent, SVal Location) {
263
+ std::string RegName = getRegionName (Region);
230
264
const auto *EReg = Location.getAsRegion ()->getAs <ElementRegion>();
231
265
assert (EReg && " this checker only handles element access" );
232
266
QualType ElemType = EReg->getElementType ();
@@ -273,20 +307,18 @@ static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName,
273
307
Out << " s" ;
274
308
}
275
309
276
- return std::string (Buf);
277
- }
278
- static std::string getTaintMsg (std::string RegName) {
279
- SmallString<128 > Buf;
280
- llvm::raw_svector_ostream Out (Buf);
281
- Out << " Access of " << RegName
282
- << " with a tainted offset that may be too large" ;
283
- return std::string (Buf);
310
+ return {getShortMsg (OOB_Exceeds, RegName), std::string (Buf)};
284
311
}
285
312
286
- void ArrayBoundCheckerV2::checkLocation (SVal Location, bool IsLoad,
287
- const Stmt *LoadS,
288
- CheckerContext &C) const {
313
+ static Messages getTaintMsgs (const SubRegion *Region, const char *OffsetName) {
314
+ std::string RegName = getRegionName (Region);
315
+ return {formatv (" Potential out of bound access to {0} with tainted {1}" ,
316
+ RegName, OffsetName),
317
+ formatv (" Access of {0} with a tainted {1} that may be too large" ,
318
+ RegName, OffsetName)};
319
+ }
289
320
321
+ void ArrayBoundCheckerV2::performCheck (const Expr *E, CheckerContext &C) const {
290
322
// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
291
323
// some new logic here that reasons directly about memory region extents.
292
324
// Once that logic is more mature, we can bring it back to assumeInBound()
@@ -297,12 +329,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
297
329
// have some flexibility in defining the base region, we can achieve
298
330
// various levels of conservatism in our buffer overflow checking.
299
331
332
+ const SVal Location = C.getSVal (E);
333
+
300
334
// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
301
335
// #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
302
336
// and incomplete analysis of these leads to false positives. As even
303
337
// accurate reports would be confusing for the users, just disable reports
304
338
// from these macros:
305
- if (isFromCtypeMacro (LoadS , C.getASTContext ()))
339
+ if (isFromCtypeMacro (E , C.getASTContext ()))
306
340
return ;
307
341
308
342
ProgramStateRef State = C.getState ();
@@ -331,9 +365,8 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
331
365
332
366
if (PrecedesLowerBound && !WithinLowerBound) {
333
367
// We know that the index definitely precedes the lower bound.
334
- std::string RegName = getRegionName (Reg);
335
- std::string Msg = getPrecedesMsg (RegName, ByteOffset);
336
- reportOOB (C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg);
368
+ Messages Msgs = getPrecedesMsgs (Reg, ByteOffset);
369
+ reportOOB (C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs);
337
370
return ;
338
371
}
339
372
@@ -350,17 +383,38 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
350
383
if (ExceedsUpperBound) {
351
384
if (!WithinUpperBound) {
352
385
// We know that the index definitely exceeds the upper bound.
353
- std::string RegName = getRegionName (Reg);
354
- std::string Msg = getExceedsMsg (C.getASTContext (), RegName, ByteOffset,
355
- *KnownSize, Location);
356
- reportOOB (C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
386
+ if (isa<ArraySubscriptExpr>(E) && isInAddressOf (E, C.getASTContext ())) {
387
+ // ...but this is within an addressof expression, so we need to check
388
+ // for the exceptional case that `&array[size]` is valid.
389
+ auto [EqualsToThreshold, NotEqualToThreshold] =
390
+ compareValueToThreshold (ExceedsUpperBound, ByteOffset, *KnownSize,
391
+ SVB, /* CheckEquality=*/ true );
392
+ if (EqualsToThreshold && !NotEqualToThreshold) {
393
+ // We are definitely in the exceptional case, so return early
394
+ // instead of reporting a bug.
395
+ C.addTransition (EqualsToThreshold);
396
+ return ;
397
+ }
398
+ }
399
+ Messages Msgs = getExceedsMsgs (C.getASTContext (), Reg, ByteOffset,
400
+ *KnownSize, Location);
401
+ reportOOB (C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs);
357
402
return ;
358
403
}
359
404
if (isTainted (State, ByteOffset)) {
360
- // Both cases are possible, but the index is tainted, so report.
405
+ // Both cases are possible, but the offset is tainted, so report.
361
406
std::string RegName = getRegionName (Reg);
362
- std::string Msg = getTaintMsg (RegName);
363
- reportOOB (C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);
407
+
408
+ // Diagnostic detail: "tainted offset" is always correct, but the
409
+ // common case is that 'idx' is tainted in 'arr[idx]' and then it's
410
+ // nicer to say "tainted index".
411
+ const char *OffsetName = " offset" ;
412
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
413
+ if (isTainted (State, ASE->getIdx (), C.getLocationContext ()))
414
+ OffsetName = " index" ;
415
+
416
+ Messages Msgs = getTaintMsgs (Reg, OffsetName);
417
+ reportOOB (C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs);
364
418
return ;
365
419
}
366
420
}
@@ -374,17 +428,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
374
428
375
429
void ArrayBoundCheckerV2::reportOOB (CheckerContext &C,
376
430
ProgramStateRef ErrorState, OOB_Kind Kind,
377
- NonLoc Offset, std::string RegName,
378
- std::string Msg) const {
431
+ NonLoc Offset, Messages Msgs) const {
379
432
380
433
ExplodedNode *ErrorNode = C.generateErrorNode (ErrorState);
381
434
if (!ErrorNode)
382
435
return ;
383
436
384
- std::string ShortMsg = getShortMsg (Kind, RegName);
385
-
386
437
auto BR = std::make_unique<PathSensitiveBugReport>(
387
- Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg , ErrorNode);
438
+ Kind == OOB_Taint ? TaintBT : BT, Msgs. Short , Msgs. Full , ErrorNode);
388
439
389
440
// Track back the propagation of taintedness.
390
441
if (Kind == OOB_Taint)
@@ -413,6 +464,18 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
413
464
(MacroName == " isupper" ) || (MacroName == " isxdigit" ));
414
465
}
415
466
467
+ bool ArrayBoundCheckerV2::isInAddressOf (const Stmt *S, ASTContext &ACtx) {
468
+ ParentMapContext &ParentCtx = ACtx.getParentMapContext ();
469
+ do {
470
+ const DynTypedNodeList Parents = ParentCtx.getParents (*S);
471
+ if (Parents.empty ())
472
+ return false ;
473
+ S = Parents[0 ].get <Stmt>();
474
+ } while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
475
+ const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
476
+ return UnaryOp && UnaryOp->getOpcode () == UO_AddrOf;
477
+ }
478
+
416
479
void ento::registerArrayBoundCheckerV2 (CheckerManager &mgr) {
417
480
mgr.registerChecker <ArrayBoundCheckerV2>();
418
481
}
0 commit comments