-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[analyzer] Check the correct first and last elements in cstring.UninitializedRead #95408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-static-analyzer-1 Author: Kristóf Umann (Szelethus) ChangesI intend to fix this checker up so that we can move it out of alpha. I made a bunch of analyses, and found many similar false positives: int t[] = {1,2,3};
memcpy(dst, t, sizeof(t) / sizeof(t[0])); // warn The problem here is the way CStringChecker checks whether the destination and source buffers are initialized: heuristically, it only checks the first and last element. This is fine, however, it retrieves these elements as characters, even if the underlaying object is not a character array. Reading the last byte of an integer is undefined, so the checker emits a bug here. A quick search tells you the rationale: "Both objects are reinterpreted as arrays of unsigned char.". But the static analyzer right now can't check byte-by-byte if a memory region is initialized, it can only check if its a well-defined character or not. In this patch, I pry the original array out of the arguments to memcpy (and similar functions), and retrieve the actual first and last elements according to the array's actual element type. Currently, my improvements reduced the number of reports to 29 on these projects: memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres Before my patch, there were 87. Full diff: https://github.com/llvm/llvm-project/pull/95408.diff 3 Files Affected:
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index 151d3e57c1cb8..05d679b659029 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -34,6 +34,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
#include <limits>
@@ -99,6 +100,8 @@ class MemRegion : public llvm::FoldingSetNode {
#define REGION(Id, Parent) Id ## Kind,
#define REGION_RANGE(Id, First, Last) BEGIN_##Id = First, END_##Id = Last,
#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+#undef REGION_RANGE
};
private:
@@ -171,6 +174,17 @@ class MemRegion : public llvm::FoldingSetNode {
Kind getKind() const { return kind; }
+ StringRef getKindStr() const {
+ switch (getKind()) {
+#define REGION(Id, Parent) \
+ case Id##Kind: \
+ return #Id;
+#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+ }
+ llvm_unreachable("Unkown kind!");
+ }
+
template<typename RegionTy> const RegionTy* getAs() const;
template <typename RegionTy>
LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy *castAs() const;
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 238e87a712a43..fdea2e01215b4 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InterCheckerAPI.h"
+#include "clang/AST/OperationKinds.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CharInfo.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
@@ -22,10 +23,13 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <functional>
#include <optional>
@@ -304,6 +308,10 @@ class CStringChecker : public Checker< eval::Call,
// Re-usable checks
ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
AnyArgExpr Arg, SVal l) const;
+ // Check whether the origin region behind \p Element (like the actual array
+ // region \p Element is from) is initialized.
+ ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
+ AnyArgExpr Buffer, SVal Element, SVal Size) const;
ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
AnyArgExpr Buffer, SVal Element,
AccessKind Access,
@@ -329,7 +337,7 @@ class CStringChecker : public Checker< eval::Call,
const Stmt *S, StringRef WarningMsg) const;
void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
- const Expr *E) const;
+ const Expr *E, StringRef Msg) const;
ProgramStateRef checkAdditionOverflow(CheckerContext &C,
ProgramStateRef state,
NonLoc left,
@@ -393,6 +401,173 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
return stateNonNull;
}
+static std::optional<NonLoc> getIndex(ProgramStateRef State,
+ const ElementRegion *ER, CharKind CK) {
+ SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+ ASTContext &Ctx = SValBuilder.getContext();
+
+ if (CK == CharKind::Regular) {
+ if (ER->getValueType() != Ctx.CharTy)
+ return {};
+ return ER->getIndex();
+ }
+
+ if (ER->getValueType() != Ctx.WideCharTy)
+ return {};
+
+ QualType SizeTy = Ctx.getSizeType();
+ NonLoc WideSize =
+ SValBuilder
+ .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
+ SizeTy)
+ .castAs<NonLoc>();
+ SVal Offset =
+ SValBuilder.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
+ if (Offset.isUnknown())
+ return {};
+ return Offset.castAs<NonLoc>();
+}
+
+// Try to get hold of the origin regin (e.g. the actual array region from an
+// element region).
+static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
+ const MemRegion *MR = ER->getSuperRegion();
+ const MemRegion *Ret = MR;
+ assert(MR);
+ if (const auto *sym = MR->getAs<SymbolicRegion>()) {
+ SymbolRef sym2 = sym->getSymbol();
+ if (!sym2)
+ return nullptr;
+ Ret = sym2->getOriginRegion();
+ }
+ if (const auto *element = MR->getAs<ElementRegion>()) {
+ Ret = element->getBaseRegion();
+ }
+ return dyn_cast_or_null<TypedValueRegion>(Ret);
+}
+
+// Basically 1 -> 1st, 12 -> 12th, etc.
+static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
+ Os << Idx << llvm::getOrdinalSuffix(Idx);
+}
+
+ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
+ ProgramStateRef State,
+ AnyArgExpr Buffer, SVal Element,
+ SVal Size) const {
+
+ // If a previous check has failed, propagate the failure.
+ if (!State)
+ return nullptr;
+
+ const MemRegion *R = Element.getAsRegion();
+ if (!R)
+ return State;
+
+ const auto *ER = dyn_cast<ElementRegion>(R);
+ if (!ER)
+ return State;
+
+ const TypedValueRegion *Orig = getOriginRegion(ER);
+ if (!Orig)
+ return State;
+
+ SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+ ASTContext &Ctx = SValBuilder.getContext();
+
+ // FIXME: We ought to able to check objects as well. Maybe
+ // UninitializedObjectChecker could help?
+ if (!Orig->getValueType()->isArrayType())
+ return State;
+
+ const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType());
+ const NonLoc Zero = SValBuilder.makeZeroArrayIndex();
+
+ SVal FirstElementVal =
+ State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>();
+ if (!isa<Loc>(FirstElementVal))
+ return State;
+
+ // Ensure that we wouldn't read uninitialized value.
+ if (Filter.CheckCStringUninitializedRead &&
+ State->getSVal(FirstElementVal.castAs<Loc>()).isUndef()) {
+ llvm::SmallString<258> Buf;
+ llvm::raw_svector_ostream OS(Buf);
+ OS << "The first element of the ";
+ printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+ OS << " argument is undefined";
+ emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+ return nullptr;
+ }
+
+ // We won't check whether the entire region is fully initialized -- lets just
+ // check that the first and the last element is. So, onto checking the last
+ // element:
+ const QualType IdxTy = SValBuilder.getArrayIndexType();
+
+ NonLoc ElemSize =
+ SValBuilder
+ .makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
+ .castAs<NonLoc>();
+
+ // FIXME: Check that the size arg to the cstring function is divisible by
+ // size of the actual element type?
+
+ // The type of the argument to the cstring function is either char or wchar,
+ // but thats not the type of the original array (or memory region).
+ // Suppose the following:
+ // int t[5];
+ // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
+ // When checking whether t is fully initialized, we see it as char array of
+ // size sizeof(int)*5. If we check the last element as a character, we read
+ // the last byte of an integer, which will be undefined. But just because
+ // that value is undefined, it doesn't mean that the element is uninitialized!
+ // For this reason, we need to retrieve the actual last element with the
+ // correct type.
+
+ // Divide the size argument to the cstring function by the actual element
+ // type. This value will be size of the array, or the index to the
+ // past-the-end element.
+ std::optional<NonLoc> Offset =
+ SValBuilder
+ .evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
+ IdxTy)
+ .getAs<NonLoc>();
+
+ // Retrieve the index of the last element.
+ const NonLoc One = SValBuilder.makeIntVal(1, IdxTy).castAs<NonLoc>();
+ SVal LastIdx = SValBuilder.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
+
+ if (!Offset)
+ return State;
+
+ SVal LastElementVal =
+ State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(Orig));
+ if (!isa<Loc>(LastElementVal))
+ return State;
+
+ if (Filter.CheckCStringUninitializedRead &&
+ State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
+ const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
+ // If we can't get emit a sensible last element index, just bail out --
+ // prefer to emit nothing in favour of emitting garbage quality reports.
+ if (!IdxInt) {
+ C.addSink();
+ return nullptr;
+ }
+ llvm::SmallString<258> Buf;
+ llvm::raw_svector_ostream OS(Buf);
+ OS << "The last element of the ";
+ printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+ OS << " argument to access (the ";
+ printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1);
+ OS << ") is undefined";
+ emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+ return nullptr;
+ }
+ return State;
+}
+
// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
ProgramStateRef state,
@@ -413,30 +588,10 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
if (!ER)
return state;
- SValBuilder &svalBuilder = C.getSValBuilder();
- ASTContext &Ctx = svalBuilder.getContext();
-
// Get the index of the accessed element.
- NonLoc Idx = ER->getIndex();
-
- if (CK == CharKind::Regular) {
- if (ER->getValueType() != Ctx.CharTy)
- return state;
- } else {
- if (ER->getValueType() != Ctx.WideCharTy)
- return state;
-
- QualType SizeTy = Ctx.getSizeType();
- NonLoc WideSize =
- svalBuilder
- .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
- SizeTy)
- .castAs<NonLoc>();
- SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
- if (Offset.isUnknown())
- return state;
- Idx = Offset.castAs<NonLoc>();
- }
+ std::optional<NonLoc> Idx = getIndex(state, ER, CK);
+ if (!Idx)
+ return state;
// Get the size of the array.
const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
@@ -444,7 +599,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
getDynamicExtent(state, superReg, C.getSValBuilder());
ProgramStateRef StInBound, StOutBound;
- std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
+ std::tie(StInBound, StOutBound) = state->assumeInBoundDual(*Idx, Size);
if (StOutBound && !StInBound) {
// These checks are either enabled by the CString out-of-bounds checker
// explicitly or implicitly by the Malloc checker.
@@ -459,15 +614,6 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
return nullptr;
}
- // Ensure that we wouldn't read uninitialized value.
- if (Access == AccessKind::read) {
- if (Filter.CheckCStringUninitializedRead &&
- StInBound->getSVal(ER).isUndef()) {
- emitUninitializedReadBug(C, StInBound, Buffer.Expression);
- return nullptr;
- }
- }
-
// Array bound check succeeded. From this point forward the array bound
// should always succeed.
return StInBound;
@@ -502,6 +648,7 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
// Check if the first byte of the buffer is accessible.
State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
+
if (!State)
return nullptr;
@@ -526,6 +673,8 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
SVal BufEnd =
svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
+ if (Access == AccessKind::read)
+ State = checkInit(C, State, Buffer, BufEnd, *Length);
// If the buffer isn't large enough, abort.
if (!State)
@@ -694,16 +843,17 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
ProgramStateRef State,
- const Expr *E) const {
+ const Expr *E,
+ StringRef Msg) const {
if (ExplodedNode *N = C.generateErrorNode(State)) {
- const char *Msg =
- "Bytes string function accesses uninitialized/garbage values";
if (!BT_UninitRead)
BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
"Accessing unitialized/garbage values"));
auto Report =
std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
+ Report->addNote("Other elements might also be undefined",
+ Report->getLocation());
Report->addRange(E->getSourceRange());
bugreporter::trackExpressionValue(N, E, *Report);
C.emitReport(std::move(Report));
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index c535e018e62c2..97625a3650663 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -16,7 +16,31 @@ void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
void top(char *dst) {
char buf[10];
- memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
+ memcpy(dst, buf, 10); // expected-warning{{The first element of the 2nd argument is undefined}}
+ // expected-note@-1{{Other elements might also be undefined}}
+ (void)buf;
+}
+
+void top2(char *dst) {
+ char buf[10];
+ buf[0] = 'i';
+ memcpy(dst, buf, 10); // expected-warning{{The last element of the 2nd argument to access (the 10th) is undefined}}
+ // expected-note@-1{{Other elements might also be undefined}}
+ (void)buf;
+}
+
+void top3(char *dst) {
+ char buf[10];
+ buf[0] = 'i';
+ memcpy(dst, buf, 1);
+ (void)buf;
+}
+
+void top4(char *dst) {
+ char buf[10];
+ buf[0] = 'i';
+ memcpy(dst, buf, 2); // expected-warning{{The last element of the 2nd argument to access (the 2nd) is undefined}}
+ // expected-note@-1{{Other elements might also be undefined}}
(void)buf;
}
@@ -26,16 +50,12 @@ void top(char *dst) {
void *mempcpy(void *restrict s1, const void *restrict s2, size_t n);
-void mempcpy14() {
+void mempcpy13() {
int src[] = {1, 2, 3, 4};
int dst[5] = {0};
int *p;
- p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
- // FIXME: This behaviour is actually surprising and needs to be fixed,
- // mempcpy seems to consider the very last byte of the src buffer uninitialized
- // and returning undef unfortunately. It should have returned unknown or a conjured value instead.
-
+ p = mempcpy(dst, src, 4 * sizeof(int));
clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal)
}
@@ -52,8 +72,34 @@ void mempcpy15() {
struct st *p2;
p1 = (&s2) + 1;
- p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
- // FIXME: It seems same as mempcpy14() case.
-
+
+ p2 = mempcpy(&s2, &s1, sizeof(struct st));
+
clang_analyzer_eval(p1 == p2); // no-warning (above is fatal)
}
+
+void mempcpy16() {
+ struct st s1;
+ struct st s2;
+
+ // FIXME: Maybe ask UninitializedObjectChecker whether s1 is fully
+ // initialized?
+ mempcpy(&s2, &s1, sizeof(struct st));
+}
+
+void initialized(int *dest) {
+ int t[] = {1, 2, 3};
+ memcpy(dest, t, sizeof(t));
+}
+
+// Creduced crash.
+
+void *ga_copy_strings_from_0;
+void *memmove();
+void alloc();
+void ga_copy_strings() {
+ int i = 0;
+ for (;; ++i)
+ memmove(alloc, ((char **)ga_copy_strings_from_0)[i], 1);
+}
+
|
@llvm/pr-subscribers-clang Author: Kristóf Umann (Szelethus) ChangesI intend to fix this checker up so that we can move it out of alpha. I made a bunch of analyses, and found many similar false positives: int t[] = {1,2,3};
memcpy(dst, t, sizeof(t) / sizeof(t[0])); // warn The problem here is the way CStringChecker checks whether the destination and source buffers are initialized: heuristically, it only checks the first and last element. This is fine, however, it retrieves these elements as characters, even if the underlaying object is not a character array. Reading the last byte of an integer is undefined, so the checker emits a bug here. A quick search tells you the rationale: "Both objects are reinterpreted as arrays of unsigned char.". But the static analyzer right now can't check byte-by-byte if a memory region is initialized, it can only check if its a well-defined character or not. In this patch, I pry the original array out of the arguments to memcpy (and similar functions), and retrieve the actual first and last elements according to the array's actual element type. Currently, my improvements reduced the number of reports to 29 on these projects: memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres Before my patch, there were 87. Full diff: https://github.com/llvm/llvm-project/pull/95408.diff 3 Files Affected:
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index 151d3e57c1cb8..05d679b659029 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -34,6 +34,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
#include <limits>
@@ -99,6 +100,8 @@ class MemRegion : public llvm::FoldingSetNode {
#define REGION(Id, Parent) Id ## Kind,
#define REGION_RANGE(Id, First, Last) BEGIN_##Id = First, END_##Id = Last,
#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+#undef REGION_RANGE
};
private:
@@ -171,6 +174,17 @@ class MemRegion : public llvm::FoldingSetNode {
Kind getKind() const { return kind; }
+ StringRef getKindStr() const {
+ switch (getKind()) {
+#define REGION(Id, Parent) \
+ case Id##Kind: \
+ return #Id;
+#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+ }
+ llvm_unreachable("Unkown kind!");
+ }
+
template<typename RegionTy> const RegionTy* getAs() const;
template <typename RegionTy>
LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy *castAs() const;
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 238e87a712a43..fdea2e01215b4 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InterCheckerAPI.h"
+#include "clang/AST/OperationKinds.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CharInfo.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
@@ -22,10 +23,13 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <functional>
#include <optional>
@@ -304,6 +308,10 @@ class CStringChecker : public Checker< eval::Call,
// Re-usable checks
ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
AnyArgExpr Arg, SVal l) const;
+ // Check whether the origin region behind \p Element (like the actual array
+ // region \p Element is from) is initialized.
+ ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
+ AnyArgExpr Buffer, SVal Element, SVal Size) const;
ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
AnyArgExpr Buffer, SVal Element,
AccessKind Access,
@@ -329,7 +337,7 @@ class CStringChecker : public Checker< eval::Call,
const Stmt *S, StringRef WarningMsg) const;
void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
- const Expr *E) const;
+ const Expr *E, StringRef Msg) const;
ProgramStateRef checkAdditionOverflow(CheckerContext &C,
ProgramStateRef state,
NonLoc left,
@@ -393,6 +401,173 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
return stateNonNull;
}
+static std::optional<NonLoc> getIndex(ProgramStateRef State,
+ const ElementRegion *ER, CharKind CK) {
+ SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+ ASTContext &Ctx = SValBuilder.getContext();
+
+ if (CK == CharKind::Regular) {
+ if (ER->getValueType() != Ctx.CharTy)
+ return {};
+ return ER->getIndex();
+ }
+
+ if (ER->getValueType() != Ctx.WideCharTy)
+ return {};
+
+ QualType SizeTy = Ctx.getSizeType();
+ NonLoc WideSize =
+ SValBuilder
+ .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
+ SizeTy)
+ .castAs<NonLoc>();
+ SVal Offset =
+ SValBuilder.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
+ if (Offset.isUnknown())
+ return {};
+ return Offset.castAs<NonLoc>();
+}
+
+// Try to get hold of the origin regin (e.g. the actual array region from an
+// element region).
+static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
+ const MemRegion *MR = ER->getSuperRegion();
+ const MemRegion *Ret = MR;
+ assert(MR);
+ if (const auto *sym = MR->getAs<SymbolicRegion>()) {
+ SymbolRef sym2 = sym->getSymbol();
+ if (!sym2)
+ return nullptr;
+ Ret = sym2->getOriginRegion();
+ }
+ if (const auto *element = MR->getAs<ElementRegion>()) {
+ Ret = element->getBaseRegion();
+ }
+ return dyn_cast_or_null<TypedValueRegion>(Ret);
+}
+
+// Basically 1 -> 1st, 12 -> 12th, etc.
+static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
+ Os << Idx << llvm::getOrdinalSuffix(Idx);
+}
+
+ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
+ ProgramStateRef State,
+ AnyArgExpr Buffer, SVal Element,
+ SVal Size) const {
+
+ // If a previous check has failed, propagate the failure.
+ if (!State)
+ return nullptr;
+
+ const MemRegion *R = Element.getAsRegion();
+ if (!R)
+ return State;
+
+ const auto *ER = dyn_cast<ElementRegion>(R);
+ if (!ER)
+ return State;
+
+ const TypedValueRegion *Orig = getOriginRegion(ER);
+ if (!Orig)
+ return State;
+
+ SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+ ASTContext &Ctx = SValBuilder.getContext();
+
+ // FIXME: We ought to able to check objects as well. Maybe
+ // UninitializedObjectChecker could help?
+ if (!Orig->getValueType()->isArrayType())
+ return State;
+
+ const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType());
+ const NonLoc Zero = SValBuilder.makeZeroArrayIndex();
+
+ SVal FirstElementVal =
+ State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>();
+ if (!isa<Loc>(FirstElementVal))
+ return State;
+
+ // Ensure that we wouldn't read uninitialized value.
+ if (Filter.CheckCStringUninitializedRead &&
+ State->getSVal(FirstElementVal.castAs<Loc>()).isUndef()) {
+ llvm::SmallString<258> Buf;
+ llvm::raw_svector_ostream OS(Buf);
+ OS << "The first element of the ";
+ printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+ OS << " argument is undefined";
+ emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+ return nullptr;
+ }
+
+ // We won't check whether the entire region is fully initialized -- lets just
+ // check that the first and the last element is. So, onto checking the last
+ // element:
+ const QualType IdxTy = SValBuilder.getArrayIndexType();
+
+ NonLoc ElemSize =
+ SValBuilder
+ .makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
+ .castAs<NonLoc>();
+
+ // FIXME: Check that the size arg to the cstring function is divisible by
+ // size of the actual element type?
+
+ // The type of the argument to the cstring function is either char or wchar,
+ // but thats not the type of the original array (or memory region).
+ // Suppose the following:
+ // int t[5];
+ // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
+ // When checking whether t is fully initialized, we see it as char array of
+ // size sizeof(int)*5. If we check the last element as a character, we read
+ // the last byte of an integer, which will be undefined. But just because
+ // that value is undefined, it doesn't mean that the element is uninitialized!
+ // For this reason, we need to retrieve the actual last element with the
+ // correct type.
+
+ // Divide the size argument to the cstring function by the actual element
+ // type. This value will be size of the array, or the index to the
+ // past-the-end element.
+ std::optional<NonLoc> Offset =
+ SValBuilder
+ .evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
+ IdxTy)
+ .getAs<NonLoc>();
+
+ // Retrieve the index of the last element.
+ const NonLoc One = SValBuilder.makeIntVal(1, IdxTy).castAs<NonLoc>();
+ SVal LastIdx = SValBuilder.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
+
+ if (!Offset)
+ return State;
+
+ SVal LastElementVal =
+ State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(Orig));
+ if (!isa<Loc>(LastElementVal))
+ return State;
+
+ if (Filter.CheckCStringUninitializedRead &&
+ State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
+ const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
+ // If we can't get emit a sensible last element index, just bail out --
+ // prefer to emit nothing in favour of emitting garbage quality reports.
+ if (!IdxInt) {
+ C.addSink();
+ return nullptr;
+ }
+ llvm::SmallString<258> Buf;
+ llvm::raw_svector_ostream OS(Buf);
+ OS << "The last element of the ";
+ printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+ OS << " argument to access (the ";
+ printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1);
+ OS << ") is undefined";
+ emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+ return nullptr;
+ }
+ return State;
+}
+
// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
ProgramStateRef state,
@@ -413,30 +588,10 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
if (!ER)
return state;
- SValBuilder &svalBuilder = C.getSValBuilder();
- ASTContext &Ctx = svalBuilder.getContext();
-
// Get the index of the accessed element.
- NonLoc Idx = ER->getIndex();
-
- if (CK == CharKind::Regular) {
- if (ER->getValueType() != Ctx.CharTy)
- return state;
- } else {
- if (ER->getValueType() != Ctx.WideCharTy)
- return state;
-
- QualType SizeTy = Ctx.getSizeType();
- NonLoc WideSize =
- svalBuilder
- .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
- SizeTy)
- .castAs<NonLoc>();
- SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
- if (Offset.isUnknown())
- return state;
- Idx = Offset.castAs<NonLoc>();
- }
+ std::optional<NonLoc> Idx = getIndex(state, ER, CK);
+ if (!Idx)
+ return state;
// Get the size of the array.
const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
@@ -444,7 +599,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
getDynamicExtent(state, superReg, C.getSValBuilder());
ProgramStateRef StInBound, StOutBound;
- std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
+ std::tie(StInBound, StOutBound) = state->assumeInBoundDual(*Idx, Size);
if (StOutBound && !StInBound) {
// These checks are either enabled by the CString out-of-bounds checker
// explicitly or implicitly by the Malloc checker.
@@ -459,15 +614,6 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
return nullptr;
}
- // Ensure that we wouldn't read uninitialized value.
- if (Access == AccessKind::read) {
- if (Filter.CheckCStringUninitializedRead &&
- StInBound->getSVal(ER).isUndef()) {
- emitUninitializedReadBug(C, StInBound, Buffer.Expression);
- return nullptr;
- }
- }
-
// Array bound check succeeded. From this point forward the array bound
// should always succeed.
return StInBound;
@@ -502,6 +648,7 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
// Check if the first byte of the buffer is accessible.
State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
+
if (!State)
return nullptr;
@@ -526,6 +673,8 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
SVal BufEnd =
svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
+ if (Access == AccessKind::read)
+ State = checkInit(C, State, Buffer, BufEnd, *Length);
// If the buffer isn't large enough, abort.
if (!State)
@@ -694,16 +843,17 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
ProgramStateRef State,
- const Expr *E) const {
+ const Expr *E,
+ StringRef Msg) const {
if (ExplodedNode *N = C.generateErrorNode(State)) {
- const char *Msg =
- "Bytes string function accesses uninitialized/garbage values";
if (!BT_UninitRead)
BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
"Accessing unitialized/garbage values"));
auto Report =
std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
+ Report->addNote("Other elements might also be undefined",
+ Report->getLocation());
Report->addRange(E->getSourceRange());
bugreporter::trackExpressionValue(N, E, *Report);
C.emitReport(std::move(Report));
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index c535e018e62c2..97625a3650663 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -16,7 +16,31 @@ void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
void top(char *dst) {
char buf[10];
- memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
+ memcpy(dst, buf, 10); // expected-warning{{The first element of the 2nd argument is undefined}}
+ // expected-note@-1{{Other elements might also be undefined}}
+ (void)buf;
+}
+
+void top2(char *dst) {
+ char buf[10];
+ buf[0] = 'i';
+ memcpy(dst, buf, 10); // expected-warning{{The last element of the 2nd argument to access (the 10th) is undefined}}
+ // expected-note@-1{{Other elements might also be undefined}}
+ (void)buf;
+}
+
+void top3(char *dst) {
+ char buf[10];
+ buf[0] = 'i';
+ memcpy(dst, buf, 1);
+ (void)buf;
+}
+
+void top4(char *dst) {
+ char buf[10];
+ buf[0] = 'i';
+ memcpy(dst, buf, 2); // expected-warning{{The last element of the 2nd argument to access (the 2nd) is undefined}}
+ // expected-note@-1{{Other elements might also be undefined}}
(void)buf;
}
@@ -26,16 +50,12 @@ void top(char *dst) {
void *mempcpy(void *restrict s1, const void *restrict s2, size_t n);
-void mempcpy14() {
+void mempcpy13() {
int src[] = {1, 2, 3, 4};
int dst[5] = {0};
int *p;
- p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
- // FIXME: This behaviour is actually surprising and needs to be fixed,
- // mempcpy seems to consider the very last byte of the src buffer uninitialized
- // and returning undef unfortunately. It should have returned unknown or a conjured value instead.
-
+ p = mempcpy(dst, src, 4 * sizeof(int));
clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal)
}
@@ -52,8 +72,34 @@ void mempcpy15() {
struct st *p2;
p1 = (&s2) + 1;
- p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
- // FIXME: It seems same as mempcpy14() case.
-
+
+ p2 = mempcpy(&s2, &s1, sizeof(struct st));
+
clang_analyzer_eval(p1 == p2); // no-warning (above is fatal)
}
+
+void mempcpy16() {
+ struct st s1;
+ struct st s2;
+
+ // FIXME: Maybe ask UninitializedObjectChecker whether s1 is fully
+ // initialized?
+ mempcpy(&s2, &s1, sizeof(struct st));
+}
+
+void initialized(int *dest) {
+ int t[] = {1, 2, 3};
+ memcpy(dest, t, sizeof(t));
+}
+
+// Creduced crash.
+
+void *ga_copy_strings_from_0;
+void *memmove();
+void alloc();
+void ga_copy_strings() {
+ int i = 0;
+ for (;; ++i)
+ memmove(alloc, ((char **)ga_copy_strings_from_0)[i], 1);
+}
+
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The intention of the patch makes sense to me. However, I believe that the bug is inside the Store. It should not say it's Undefined
if actually an existing binding overlaps (actually completely covers) the requested region.
So, that said, the checker does the right thing, but the Store lies to it.
And especially for memcpy-like raw memory manipulating APIs, implementing this element-type-wise check is really difficult. Partially because in CSA we have only limited trustworthy type information for such buffers.
I'm also pragmatic with the subject, and believe in solutions today, than waiting for one years to come. But I still want to ask if we could join forces and implement the proposed Store model discussed here, as a counter proposal for the original RFC?
For instance, that would make such loads not result in an Undefined value - unless it's actually uninitialized.
(Once we had that Store model, we would probably want to revert this element-type-based solution outlined here.)
Anyways, I'm looking forward to extensively review this PR. I just grabbed the opportunity to get attention to the proposed Store model and maybe get that one day.
clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
Outdated
Show resolved
Hide resolved
clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
Outdated
Show resolved
Hide resolved
SVal FirstElementVal = | ||
State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>(); | ||
if (!isa<Loc>(FirstElementVal)) | ||
return State; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, you cast it using .castAs<Loc>()
, then you check if it's a Loc
? This definitely smells.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh ya, this is a historical artifact. Though I wonder if the cast is a little too brave...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall I'm satisfied with this commit, but I have some minor remarks.
I'm publishing some review comments now, but I'll continue the review tomorrow.
if (const auto *element = MR->getAs<ElementRegion>()) { | ||
Ret = element->getBaseRegion(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This getBaseRegion()
call could produce some surprising and inconsistent results: e.g. if you have
struct S {
int field[20][20];
} array[10];
then calling getOriginRegion
on the ElementRegion representing array[5].field[3]
would return the FieldRegion representing array[5].field
, but (if I understand this correctly) calling it on array[5].field[3][6]
would return the NonParamVarRegion representing array
because getBaseRegion()
strips the FieldRegion layer.
Consider writing an explicit loop that only strips ElementRegion layers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed getBaseRegion()
to getSuperRegion()
, because we're only really trying to peel off a simple ElementRegeion
layer. Unfortunately, I stumbled upon a big in StoreManager, for which I added a new test case (memcpy_array_from_matrix()
).
OS << "The last element of the "; | ||
printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1); | ||
OS << " argument to access (the "; | ||
printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1); | ||
OS << ") is undefined"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OS << "The last element of the "; | |
printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1); | |
OS << " argument to access (the "; | |
printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1); | |
OS << ") is undefined"; | |
OS << "The last ("; | |
printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1); | |
OS << ") element of the "; | |
printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1); | |
OS << " argument is undefined"; |
Perhaps rewrite the message to something like this -- it was difficult to understand when the index within the array was at the end, separated from the word "last".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did keep the "to access" part to indicate that we're not necessarily copying the entire array, only as much as the size parameter dictates.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What would you think about messages like "In the 4th argument the last accessed element (which is at index 31) is undefined"?
This differs from your suggestion in three ways:
- You're right that we need to emphasize that we're only accessing a part of the array, but I feel that "last accessed element" would be a better phrasing than the dangling "to access" (which was confusing for me before your explanation).
- I think it's better to express the index as a zero-based index instead of an one-based ordinal number, because one-based indexing could be unexpected and confusing in C/C++.
- I reordered the parts of the message to ensure that "last accessed element" and the "(which is at index ...)" note can be next to each other.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I read the rest of the commit and only found two very minor observations.
I checked out the code and gave it a test run. Across 200+ projects, it suppresses around 650 reports. I've sampled them and they appear like the FP this patch was motivated by. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the updates!
Unfortunately, as I dug deeper into your function getOriginRegion()
, I realized that it is logically incorrect (see inline comment for explanation). (I was a bit suspicious during the earlier reviews as well, but this is a very complex area and earlier I didn't spend enough time on it.)
|
||
// Basically 1 -> 1st, 12 -> 12th, etc. | ||
static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) { | ||
Os << Idx << llvm::getOrdinalSuffix(Idx); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Os << Idx << llvm::getOrdinalSuffix(Idx); | |
switch (Idx) { | |
case 1: | |
Os << "first"; | |
break; | |
case 2: | |
Os << "second"; | |
break; | |
case 3: | |
Os << "third"; | |
break; | |
default: | |
Os << Idx << llvm::getOrdinalSuffix(Idx); | |
} |
Consider printing "first", "second" and "third" instead of "1st", "2nd" and "3rd" because IMO it makes the diagnostics easier to read -- and this function will be used for argument indices in builtin functions, so these cases will be very common.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I agree that "(second)" doesn't look good when it represents an element index (as highlighted):
but that's already changed to "(at index 1)".
On the other hand, I still think "second argument" is significantly better than "2nd argument":
I can accept "2nd argument" if you think that it's significantly better, but if you're indifferent, then please change it.
// Try to get hold of the origin region (e.g. the actual array region from an | ||
// element region). | ||
static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) { | ||
const MemRegion *MR = ER->getSuperRegion(); | ||
const MemRegion *Ret = MR; | ||
assert(MR); | ||
if (const auto *sym = MR->getAs<SymbolicRegion>()) { | ||
SymbolRef sym2 = sym->getSymbol(); | ||
if (!sym2) | ||
return nullptr; | ||
Ret = sym2->getOriginRegion(); | ||
} | ||
return dyn_cast_or_null<TypedValueRegion>(Ret); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought several hours about this function, and my TLDR conclusion is that (1) this logic is wrong (2) SymExpr::getOriginRegion()
is a red flag and (3) just use a plain getSuperRegion()
call instead of this function. (Note that the branch where SymExpr::getOriginRegion() is used is not exercised in any testcase.)
More precisely, the problem is that SymExpr::getOriginRegion()
is an extremely unnatural operation, which extracts a part of the "opaque unique tag" of the symbol. The result of S->getOriginRegion()
is:
- the region R when
S
is aSymbolRegionValue
which represents "the value stored in the memory region R at the start of the analysis of this function" - the region R when
S
is aSymbolDerived
which represents "the value stored in the memory region R after a certain invalidation" - nullpointer in other cases (e.g.
SymbolConjured
).
As a concrete example, if you have a function like
void save_to_logs(int *src, int begin, int count) {
memcpy(global_log_array, src + begin, count * sizeof(int));
}
then the element region representing src + begin
would be passed to getOriginRegion()
, which would first strip the ElementRegion
layer to get a SymbolicRegion
, then unwrap that to get a SymbolRegionValue
and finally return the ParamVarRegion
which represents the pointer-sized memory area of the variable src
.
This won't lead to a crash because this ParamVarRegion
is immediately discarded by the next check, which requires an array type:
if (!Orig->getValueType()->isArrayType())
return State;
However, there are no situations where getOriginRegion()
= "region where a pointer to this symbolic region was stored at some earlier step" is actually useful, so you should just remove the branch using it (and then the remaining logic is just a getSuperRegion()
+ dyn_cast_or_null<TypedValueRegion>
which doesn't need to be a separate function).
By the way, I see that you need a TypedValueRegion
after this point to continue the calculations -- but if the SuperRegion of your ElementRegion is a SymbolicRegion, then the right thing to do is just discarding it. A SymbolicRegion
is not necessarily a real "region", it may just represent a pointer that points into the middle of some opaque unknown memory.
I disagree. Whether a value is initialized or is well defined are two different concepts (but happen to overlap in most cases). Reading the last byte of an integer as a character should yield an
I think adding an "isUnitialized" interface would be better. |
@@ -461,7 +446,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C, | |||
if (!ER) | |||
return State; | |||
|
|||
const TypedValueRegion *Orig = getOriginRegion(ER); | |||
const auto *Orig = ER->getSuperRegion()->getAs<TypedValueRegion>(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nitpick: also change the name "Orig
".
…tializedRead I intend to fix this checker up so that we can move it out of alpha. I made a bunch of analyses, and found many similar false positives: ```c++ int t[] = {1,2,3}; memcpy(dst, t, sizeof(t) / sizeof(t[0])); // warn ``` The problem here is the way CStringChecker checks whether the destination and source buffers are initialized: heuristically, it only checks the first and last element. This is fine, however, it retrieves these elements as characters, even if the underlaying object is not a character array. Reading the last byte of an integer is undefined, so the checker emits a bug here. A quick search tells you the rationale: "Both objects are reinterpreted as arrays of unsigned char.". But the static analyzer right now can't check byte-by-byte if a memory region is _initialized_, it can only check if its a well-defined character or not. In this patch, I pry the original array out of the arguments to memcpy (and similar functions), and retrieve the actual first and last elements according to the array's actual element type. Currently, my improvements reduced the number of reports to 29 on these projects: memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_upper_bound_patched&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100 Before my patch, there were 87. https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_baseline&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100 Change-Id: I0ef16beed735201af857abb6ed02de764af2e78a
ccb1197
to
d37f0cb
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, with the disclaimer that I'd still prefer e.g. "second argument instead of "2nd argument" -- but I won't block the review with this bikeshedding.
I merget the PR as is, but I'll keep the warning message in mind, I'm open to changing it as we are getting closer to moving out-of-alpha. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/1014 Here is the relevant piece of the build log for the reference:
|
…tializedRead (llvm#95408) I intend to fix this checker up so that we can move it out of alpha. I made a bunch of analyses, and found many similar false positives: ```c++ int t[] = {1,2,3}; memcpy(dst, t, sizeof(t) / sizeof(t[0])); // warn ``` The problem here is the way CStringChecker checks whether the destination and source buffers are initialized: heuristically, it only checks the first and last element. This is fine, however, it retrieves these elements as characters, even if the underlaying object is not a character array. Reading the last byte of an integer is undefined, so the checker emits a bug here. A quick search tells you the rationale: "Both objects are reinterpreted as arrays of unsigned char.". But the static analyzer right now can't check byte-by-byte if a memory region is _initialized_, it can only check if its a well-defined character or not. In this patch, I pry the original array out of the arguments to memcpy (and similar functions), and retrieve the actual first and last elements according to the array's actual element type. Currently, my improvements reduced the number of reports to 29 on these projects: memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_upper_bound_patched&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100 Before my patch, there were 87. https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_baseline&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100
I intend to fix this checker up so that we can move it out of alpha. I made a bunch of analyses, and found many similar false positives:
The problem here is the way CStringChecker checks whether the destination and source buffers are initialized: heuristically, it only checks the first and last element. This is fine, however, it retrieves these elements as characters, even if the underlaying object is not a character array. Reading the last byte of an integer is undefined, so the checker emits a bug here.
A quick search tells you the rationale: "Both objects are reinterpreted as arrays of unsigned char.". But the static analyzer right now can't check byte-by-byte if a memory region is initialized, it can only check if its a well-defined character or not.
In this patch, I pry the original array out of the arguments to memcpy (and similar functions), and retrieve the actual first and last elements according to the array's actual element type.
Currently, my improvements reduced the number of reports to 29 on these projects: memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres
https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_upper_bound_patched&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100
Before my patch, there were 87.
https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_baseline&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100