Skip to content

Commit 69bc159

Browse files
authored
[analyzer] Refine invalidation caused by fread (#93408)
This change enables more accurate modeling of the write effects of `fread`. In particular, instead of invalidating the whole buffer, in a best-effort basis, we would try to invalidate the actually accesses elements of the buffer. This preserves the previous value of the buffer of the unaffected slots. As a result, diagnose more uninitialized buffer uses for example. Currently, this refined invalidation only triggers for `fread` if and only if the `count` parameter and the buffer pointer's index component are concrete or perfectly-constrained symbols. Additionally, if the `fread` would read more than 64 elements, the whole buffer is invalidated as before. This is to have safeguards against performance issues. Refer to the comments of the assertions in the following example to see the changes in the diagnostics: ```c++ void demo() { FILE *fp = fopen("/home/test", "rb+"); if (!fp) return; int buffer[10]; // uninitialized int read_items = fread(buffer+1, sizeof(int), 5, fp); if (5 == read_items) { int v1 = buffer[1]; // Unknown value but not garbage. clang_analyzer_isTainted(v1); // expected-warning {{YES}} <-- Would be "NO" without this patch. clang_analyzer_dump(v1); // expected-warning {{conj_}} <-- Not a "derived" symbol, so it's directly invalidated now. int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}} <-- Had no report here before. (void)(v1 + v0); } else { // If 'fread' had an error. int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}} <-- Had no report here before. (void)v0; } fclose(fp); } ``` CPP-3247, CPP-3802 Co-authored by Marco Borgeaud (marco-antognini-sonarsource)
1 parent 4afd286 commit 69bc159

File tree

5 files changed

+577
-15
lines changed

5 files changed

+577
-15
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1503,7 +1503,7 @@ class MemRegionManager {
15031503
/// associated element type, index, and super region.
15041504
const ElementRegion *getElementRegion(QualType elementType, NonLoc Idx,
15051505
const SubRegion *superRegion,
1506-
ASTContext &Ctx);
1506+
const ASTContext &Ctx);
15071507

15081508
const ElementRegion *getElementRegionWithSuper(const ElementRegion *ER,
15091509
const SubRegion *superRegion) {

clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp

Lines changed: 125 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -717,18 +717,56 @@ const ExplodedNode *StreamChecker::getAcquisitionSite(const ExplodedNode *N,
717717
return nullptr;
718718
}
719719

720+
static std::optional<int64_t> getKnownValue(ProgramStateRef State, SVal V) {
721+
SValBuilder &SVB = State->getStateManager().getSValBuilder();
722+
if (const llvm::APSInt *Int = SVB.getKnownValue(State, V))
723+
return Int->tryExtValue();
724+
return std::nullopt;
725+
}
726+
727+
/// Invalidate only the requested elements instead of the whole buffer.
728+
/// This is basically a refinement of the more generic 'escapeArgs' or
729+
/// the plain old 'invalidateRegions'.
730+
static ProgramStateRef
731+
escapeByStartIndexAndCount(ProgramStateRef State, const CallEvent &Call,
732+
unsigned BlockCount, const SubRegion *Buffer,
733+
QualType ElemType, int64_t StartIndex,
734+
int64_t ElementCount) {
735+
constexpr auto DoNotInvalidateSuperRegion =
736+
RegionAndSymbolInvalidationTraits::InvalidationKinds::
737+
TK_DoNotInvalidateSuperRegion;
738+
739+
const LocationContext *LCtx = Call.getLocationContext();
740+
const ASTContext &Ctx = State->getStateManager().getContext();
741+
SValBuilder &SVB = State->getStateManager().getSValBuilder();
742+
auto &RegionManager = Buffer->getMemRegionManager();
743+
744+
SmallVector<SVal> EscapingVals;
745+
EscapingVals.reserve(ElementCount);
746+
747+
RegionAndSymbolInvalidationTraits ITraits;
748+
for (auto Idx : llvm::seq(StartIndex, StartIndex + ElementCount)) {
749+
NonLoc Index = SVB.makeArrayIndex(Idx);
750+
const auto *Element =
751+
RegionManager.getElementRegion(ElemType, Index, Buffer, Ctx);
752+
EscapingVals.push_back(loc::MemRegionVal(Element));
753+
ITraits.setTrait(Element, DoNotInvalidateSuperRegion);
754+
}
755+
return State->invalidateRegions(
756+
EscapingVals, Call.getOriginExpr(), BlockCount, LCtx,
757+
/*CausesPointerEscape=*/false,
758+
/*InvalidatedSymbols=*/nullptr, &Call, &ITraits);
759+
}
760+
720761
static ProgramStateRef escapeArgs(ProgramStateRef State, CheckerContext &C,
721762
const CallEvent &Call,
722763
ArrayRef<unsigned int> EscapingArgs) {
723-
const auto *CE = Call.getOriginExpr();
724-
725-
SmallVector<SVal> EscapingVals;
726-
EscapingVals.reserve(EscapingArgs.size());
727-
for (auto EscArgIdx : EscapingArgs)
728-
EscapingVals.push_back(Call.getArgSVal(EscArgIdx));
729-
State = State->invalidateRegions(EscapingVals, CE, C.blockCount(),
730-
C.getLocationContext(),
731-
/*CausesPointerEscape=*/false);
764+
auto GetArgSVal = [&Call](int Idx) { return Call.getArgSVal(Idx); };
765+
auto EscapingVals = to_vector(map_range(EscapingArgs, GetArgSVal));
766+
State = State->invalidateRegions(EscapingVals, Call.getOriginExpr(),
767+
C.blockCount(), C.getLocationContext(),
768+
/*CausesPointerEscape=*/false,
769+
/*InvalidatedSymbols=*/nullptr);
732770
return State;
733771
}
734772

@@ -907,6 +945,76 @@ void StreamChecker::preWrite(const FnDescription *Desc, const CallEvent &Call,
907945
C.addTransition(State);
908946
}
909947

948+
static std::optional<QualType> getPointeeType(const MemRegion *R) {
949+
if (!R)
950+
return std::nullopt;
951+
if (const auto *ER = dyn_cast<ElementRegion>(R))
952+
return ER->getElementType();
953+
if (const auto *TR = dyn_cast<TypedValueRegion>(R))
954+
return TR->getValueType();
955+
if (const auto *SR = dyn_cast<SymbolicRegion>(R))
956+
return SR->getPointeeStaticType();
957+
return std::nullopt;
958+
}
959+
960+
static std::optional<NonLoc> getStartIndex(SValBuilder &SVB,
961+
const MemRegion *R) {
962+
if (!R)
963+
return std::nullopt;
964+
965+
auto Zero = [&SVB] {
966+
BasicValueFactory &BVF = SVB.getBasicValueFactory();
967+
return nonloc::ConcreteInt(BVF.getIntValue(0, /*isUnsigned=*/false));
968+
};
969+
970+
if (const auto *ER = dyn_cast<ElementRegion>(R))
971+
return ER->getIndex();
972+
if (const auto *TR = dyn_cast<TypedValueRegion>(R))
973+
return Zero();
974+
if (const auto *SR = dyn_cast<SymbolicRegion>(R))
975+
return Zero();
976+
return std::nullopt;
977+
}
978+
979+
static ProgramStateRef
980+
tryToInvalidateFReadBufferByElements(ProgramStateRef State, CheckerContext &C,
981+
const CallEvent &Call, NonLoc SizeVal,
982+
NonLoc NMembVal) {
983+
// Try to invalidate the individual elements.
984+
const auto *Buffer =
985+
dyn_cast_or_null<SubRegion>(Call.getArgSVal(0).getAsRegion());
986+
987+
std::optional<QualType> ElemTy = getPointeeType(Buffer);
988+
std::optional<SVal> StartElementIndex =
989+
getStartIndex(C.getSValBuilder(), Buffer);
990+
991+
// Drop the outermost ElementRegion to get the buffer.
992+
if (const auto *ER = dyn_cast_or_null<ElementRegion>(Buffer))
993+
Buffer = dyn_cast<SubRegion>(ER->getSuperRegion());
994+
995+
std::optional<int64_t> CountVal = getKnownValue(State, NMembVal);
996+
std::optional<int64_t> Size = getKnownValue(State, SizeVal);
997+
std::optional<int64_t> StartIndexVal =
998+
getKnownValue(State, StartElementIndex.value_or(UnknownVal()));
999+
1000+
if (ElemTy && CountVal && Size && StartIndexVal) {
1001+
int64_t NumBytesRead = Size.value() * CountVal.value();
1002+
int64_t ElemSizeInChars =
1003+
C.getASTContext().getTypeSizeInChars(*ElemTy).getQuantity();
1004+
bool IncompleteLastElement = (NumBytesRead % ElemSizeInChars) != 0;
1005+
int64_t NumCompleteOrIncompleteElementsRead =
1006+
NumBytesRead / ElemSizeInChars + IncompleteLastElement;
1007+
1008+
constexpr int MaxInvalidatedElementsLimit = 64;
1009+
if (NumCompleteOrIncompleteElementsRead <= MaxInvalidatedElementsLimit) {
1010+
return escapeByStartIndexAndCount(State, Call, C.blockCount(), Buffer,
1011+
*ElemTy, *StartIndexVal,
1012+
NumCompleteOrIncompleteElementsRead);
1013+
}
1014+
}
1015+
return nullptr;
1016+
}
1017+
9101018
void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
9111019
const CallEvent &Call, CheckerContext &C,
9121020
bool IsFread) const {
@@ -937,8 +1045,14 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
9371045

9381046
// At read, invalidate the buffer in any case of error or success,
9391047
// except if EOF was already present.
940-
if (IsFread && !E.isStreamEof())
941-
State = escapeArgs(State, C, Call, {0});
1048+
if (IsFread && !E.isStreamEof()) {
1049+
// Try to invalidate the individual elements.
1050+
// Otherwise just fall back to invalidating the whole buffer.
1051+
ProgramStateRef InvalidatedState = tryToInvalidateFReadBufferByElements(
1052+
State, C, Call, *SizeVal, *NMembVal);
1053+
State =
1054+
InvalidatedState ? InvalidatedState : escapeArgs(State, C, Call, {0});
1055+
}
9421056

9431057
// Generate a transition for the success state.
9441058
// If we know the state to be FEOF at fread, do not add a success state.

clang/lib/StaticAnalyzer/Core/MemRegion.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,10 +1155,10 @@ MemRegionManager::getCompoundLiteralRegion(const CompoundLiteralExpr *CL,
11551155
return getSubRegion<CompoundLiteralRegion>(CL, sReg);
11561156
}
11571157

1158-
const ElementRegion*
1158+
const ElementRegion *
11591159
MemRegionManager::getElementRegion(QualType elementType, NonLoc Idx,
1160-
const SubRegion* superRegion,
1161-
ASTContext &Ctx){
1160+
const SubRegion *superRegion,
1161+
const ASTContext &Ctx) {
11621162
QualType T = Ctx.getCanonicalType(elementType).getUnqualifiedType();
11631163

11641164
llvm::FoldingSetNodeID ID;

clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@
55
// suppressed.
66
#pragma clang system_header
77

8+
typedef __typeof(sizeof(int)) size_t;
89
typedef struct _FILE {
910
unsigned char *_p;
1011
} FILE;
1112
FILE *fopen(const char *restrict, const char *restrict) __asm("_" "fopen" );
1213
int fputc(int, FILE *);
1314
int fputs(const char *restrict, FILE *restrict) __asm("_" "fputs" );
15+
size_t fread(void *buffer, size_t size, size_t count, FILE *stream);
16+
int fgetc(FILE *stream);
1417
int fclose(FILE *);
1518
void exit(int);
1619

0 commit comments

Comments
 (0)