Skip to content

[analyzer] Removing untrusted buffer size taint warning #68607

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions clang/docs/analyzer/checkers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3033,13 +3033,6 @@ Further examples of injection vulnerabilities this checker can find.
sprintf(buf, s); // warn: untrusted data used as a format string
}

void test() {
size_t ts;
scanf("%zd", &ts); // 'ts' marked as tainted
int *p = (int *)malloc(ts * sizeof(int));
// warn: untrusted data used as buffer size
}

There are built-in sources, propagations and sinks even if no external taint
configuration is provided.

Expand Down Expand Up @@ -3067,9 +3060,7 @@ Default propagations rules:

Default sinks:
``printf``, ``setproctitle``, ``system``, ``popen``, ``execl``, ``execle``,
``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen``,
``memcpy``, ``memmove``, ``strncpy``, ``strndup``, ``malloc``, ``calloc``,
``alloca``, ``memccpy``, ``realloc``, ``bcopy``
``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen``

Please note that there are no built-in filter functions.

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,9 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,

// If the size can be nonzero, we have to check the other arguments.
if (stateNonZeroSize) {
// TODO: If Size is tainted and we cannot prove that it is smaller or equal
// to the size of the destination buffer, then emit a warning
// that an attacker may provoke a buffer overflow error.
state = stateNonZeroSize;

// Ensure the destination is not null. If it is NULL there will be a
Expand Down
57 changes: 18 additions & 39 deletions clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
"Untrusted data is passed to a system call "
"(CERT/STR02-C. Sanitize data passed to complex subsystems)";

/// Check if tainted data is used as a buffer size in strn.. functions,
/// and allocators.
constexpr llvm::StringLiteral MsgTaintedBufferSize =
"Untrusted data is used to specify the buffer size "
"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
"for character data and the null terminator)";

/// Check if tainted data is used as a custom sink's parameter.
constexpr llvm::StringLiteral MsgCustomSink =
"Untrusted data is passed to a user-defined sink";
Expand Down Expand Up @@ -298,14 +291,6 @@ class GenericTaintRule {
return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
}

/// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
static GenericTaintRule
SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs,
std::optional<StringRef> Msg = std::nullopt) {
return {
std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
}

/// Process a function which could either be a taint source, a taint sink, a
/// taint filter or a taint propagator.
void process(const GenericTaintChecker &Checker, const CallEvent &Call,
Expand Down Expand Up @@ -733,12 +718,21 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
{{CDM::CLibraryMaybeHardened, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
{{CDM::CLibraryMaybeHardened, {{"strcat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
{{CDM::CLibraryMaybeHardened, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
{{CDM::CLibrary, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{CDM::CLibrary, {{"strdupa"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{CDM::CLibrary, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
{{CDM::CLibrary, BI.getName(Builtin::BImemcpy)},
TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
{{CDM::CLibrary, {BI.getName(Builtin::BImemmove)}},
TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
{{CDM::CLibrary, {BI.getName(Builtin::BIstrncpy)}},
TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
{{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}},
TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
{{CDM::CLibrary, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})},

// Sinks
{{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
Expand All @@ -752,30 +746,15 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
{{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
{{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
{{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
{{CDM::CLibrary, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
{{CDM::CLibrary, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
{{CDM::CLibrary, {{"alloca"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
{{CDM::CLibrary, {{"memccpy"}}}, TR::Sink({{3}}, MsgTaintedBufferSize)},
{{CDM::CLibrary, {{"realloc"}}}, TR::Sink({{1}}, MsgTaintedBufferSize)},

// malloc, calloc, alloca, realloc, memccpy
// are intentionally not marked as taint sinks because unconditional
// reporting for these functions generates many false positives.
// These taint sinks should be implemented in other checkers with more
// sophisticated sanitation heuristics.
{{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
{{{{"setproctitle_fast"}}},
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},

// SinkProps
{{CDM::CLibraryMaybeHardened, BI.getName(Builtin::BImemcpy)},
TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
MsgTaintedBufferSize)},
{{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BImemmove)}},
TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
MsgTaintedBufferSize)},
{{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncpy)}},
TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
MsgTaintedBufferSize)},
{{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}},
TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
MsgTaintedBufferSize)},
{{CDM::CLibrary, {{"bcopy"}}},
TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}};

// `getenv` returns taint only in untrusted environments.
if (TR::UntrustedEnv(C)) {
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1819,6 +1819,10 @@ ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C,
if (Size.isUndef())
Size = UnknownVal();

// TODO: If Size is tainted and we cannot prove that it is within
// reasonable bounds, emit a warning that an attacker may
// provoke a memory exhaustion error.

// Set the region's extent.
State = setDynamicExtent(State, RetVal.getAsRegion(),
Size.castAs<DefinedOrUnknownSVal>(), SVB);
Expand Down
69 changes: 35 additions & 34 deletions clang/test/Analysis/taint-diagnostic-visitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ int scanf(const char *restrict format, ...);
int system(const char *command);
char* getenv( const char* env_var );
size_t strlen( const char* str );
int atoi( const char* str );
char *strcat( char *dest, const char *src );
char* strcpy( char* dest, const char* src );
void *malloc(size_t size );
void free( void *ptr );
char *fgets(char *str, int n, FILE *stream);
Expand Down Expand Up @@ -53,34 +54,32 @@ void taintDiagnosticVLA(void) {

// Tests if the originated note is correctly placed even if the path is
// propagating through variables and expressions
char *taintDiagnosticPropagation(){
char *pathbuf;
char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
// expected-note@-1 {{Taint propagated to the return value}}
if (size){ // expected-note {{Assuming 'size' is non-null}}
// expected-note@-1 {{Taking true branch}}
pathbuf=(char*) malloc(atoi(size)); // expected-warning{{Untrusted data is used to specify the buffer size}}
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
// expected-note@-2 {{Taint propagated to the return value}}
return pathbuf;
int taintDiagnosticPropagation(){
int res;
char *cmd=getenv("CMD"); // expected-note {{Taint originated here}}
// expected-note@-1 {{Taint propagated to the return value}}
if (cmd){ // expected-note {{Assuming 'cmd' is non-null}}
// expected-note@-1 {{Taking true branch}}
res = system(cmd); // expected-warning{{Untrusted data is passed to a system call}}
// expected-note@-1{{Untrusted data is passed to a system call}}
return res;
}
return 0;
return -1;
}

// Taint origin should be marked correctly even if there are multiple taint
// sources in the function
char *taintDiagnosticPropagation2(){
char *pathbuf;
int taintDiagnosticPropagation2(){
int res;
char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source
char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
// expected-note@-1 {{Taint propagated to the return value}}
char *cmd=getenv("CMD"); // expected-note {{Taint originated here}}
// expected-note@-1 {{Taint propagated to the return value}}
char *user_env=getenv("USER_ENV_VAR");//unrelated taint source
if (size){ // expected-note {{Assuming 'size' is non-null}}
// expected-note@-1 {{Taking true branch}}
pathbuf=(char*) malloc(atoi(size)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
// expected-note@-2 {{Taint propagated to the return value}}
return pathbuf;
if (cmd){ // expected-note {{Assuming 'cmd' is non-null}}
// expected-note@-1 {{Taking true branch}}
res = system(cmd); // expected-warning{{Untrusted data is passed to a system call}}
// expected-note@-1{{Untrusted data is passed to a system call}}
return res;
}
return 0;
}
Expand All @@ -95,22 +94,24 @@ void testReadStdIn(){
}

void multipleTaintSources(void) {
int x,y,z;
scanf("%d", &x); // expected-note {{Taint originated here}}
char cmd[2048], file[1024];
scanf ("%1022[^\n] ", cmd); // expected-note {{Taint originated here}}
// expected-note@-1 {{Taint propagated to the 2nd argument}}
scanf("%d", &y); // expected-note {{Taint originated here}}
scanf ("%1023[^\n]", file); // expected-note {{Taint originated here}}
// expected-note@-1 {{Taint propagated to the 2nd argument}}
scanf("%d", &z);
int* ptr = (int*) malloc(y + x); // expected-warning {{Untrusted data is used to specify the buffer size}}
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
free (ptr);
strcat(cmd, file); // expected-note {{Taint propagated to the 1st argument}}
strcat(cmd, " "); // expected-note {{Taint propagated to the 1st argument}}
system(cmd); // expected-warning {{Untrusted data is passed to a system call}}
// expected-note@-1{{Untrusted data is passed to a system call}}
}

void multipleTaintedArgs(void) {
int x,y;
scanf("%d %d", &x, &y); // expected-note {{Taint originated here}}
char cmd[1024], file[1024], buf[2048];
scanf("%1022s %1023s", cmd, file); // expected-note {{Taint originated here}}
// expected-note@-1 {{Taint propagated to the 2nd argument, 3rd argument}}
int* ptr = (int*) malloc(x + y); // expected-warning {{Untrusted data is used to specify the buffer size}}
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
free (ptr);
strcpy(buf, cmd);// expected-note {{Taint propagated to the 1st argument}}
strcat(buf, " ");// expected-note {{Taint propagated to the 1st argument}}
strcat(buf, file);// expected-note {{Taint propagated to the 1st argument}}
system(buf); // expected-warning {{Untrusted data is passed to a system call}}
// expected-note@-1{{Untrusted data is passed to a system call}}
}
28 changes: 17 additions & 11 deletions clang/test/Analysis/taint-generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,15 +305,21 @@ void testGets_s(void) {

void testTaintedBufferSize(void) {
size_t ts;
// The functions malloc, calloc, bcopy and memcpy are not taint sinks in the
// default config of GenericTaintChecker (because that would cause too many
// false positives).
// FIXME: We should generate warnings when a value passed to these functions
// is tainted and _can be very large_ (because that's exploitable). This
// functionality probably belongs to the checkers that do more detailed
// modeling of these functions (MallocChecker and CStringChecker).
scanf("%zd", &ts);

int *buf1 = (int*)malloc(ts*sizeof(int)); // expected-warning {{Untrusted data is used to specify the buffer size}}
char *dst = (char*)calloc(ts, sizeof(char)); //expected-warning {{Untrusted data is used to specify the buffer size}}
bcopy(buf1, dst, ts); // expected-warning {{Untrusted data is used to specify the buffer size}}
__builtin_memcpy(dst, buf1, (ts + 4)*sizeof(char)); // expected-warning {{Untrusted data is used to specify the buffer size}}
int *buf1 = (int*)malloc(ts*sizeof(int)); // warn here, ts is unbounded and tainted
char *dst = (char*)calloc(ts, sizeof(char)); // warn here, ts is unbounded tainted
bcopy(buf1, dst, ts); // no warning here, since the size of buf1, dst equals ts. Cannot overflow.
__builtin_memcpy(dst, buf1, (ts + 4)*sizeof(char)); // warn here, dst overflows (whatever the value of ts)

// If both buffers are trusted, do not issue a warning.
char *dst2 = (char*)malloc(ts*sizeof(char)); // expected-warning {{Untrusted data is used to specify the buffer size}}
char *dst2 = (char*)malloc(ts*sizeof(char)); // warn here, ts in unbounded
strncat(dst2, dst, ts); // no-warning
}

Expand Down Expand Up @@ -353,7 +359,7 @@ void testStruct(void) {

sock = socket(AF_INET, SOCK_STREAM, 0);
read(sock, &tainted, sizeof(tainted));
__builtin_memcpy(buffer, tainted.buf, tainted.length); // expected-warning {{Untrusted data is used to specify the buffer size}}
clang_analyzer_isTainted_int(tainted.length); // expected-warning {{YES }}
}

void testStructArray(void) {
Expand All @@ -368,17 +374,17 @@ void testStructArray(void) {
__builtin_memset(srcbuf, 0, sizeof(srcbuf));

read(sock, &tainted[0], sizeof(tainted));
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{YES}}

__builtin_memset(&tainted, 0, sizeof(tainted));
read(sock, &tainted, sizeof(tainted));
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{YES}}

__builtin_memset(&tainted, 0, sizeof(tainted));
// If we taint element 1, we should not raise an alert on taint for element 0 or element 2
read(sock, &tainted[1], sizeof(tainted));
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // no-warning
__builtin_memcpy(dstbuf, srcbuf, tainted[2].length); // no-warning
clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{NO}}
clang_analyzer_isTainted_int(tainted[2].length); // expected-warning {{NO}}
}

void testUnion(void) {
Expand Down