Skip to content

Commit 6ceb1c0

Browse files
authored
[analyzer] Remove untrusted buffer size warning in the TaintPropagation checker (#68607)
Before this commit the the checker alpha.security.taint.TaintPropagation always reported warnings when the size argument of a memcpy-like or malloc-like function was tainted. However, this produced false positive reports in situations where the size was tainted, but correctly performed bound checks guaranteed the safety of the call. This commit removes the rough "always warn if the size argument is tainted" heuristic; but it would be good to add a more refined "warns if the size argument is tainted and can be too large" heuristic in follow-up commits. That logic would belong to CStringChecker and MallocChecker, because those are the checkers responsible for the more detailed modeling of memcpy-like and malloc-like functions. To mark this plan, TODO comments are added in those two checkers. There were several test cases that used these sinks to test generic properties of taint tracking; those were adapted to use different logic. As a minor unrelated change, this commit ensures that strcat (and its wide variant, wcsncat) propagates taint from the first argument to the first argument, i.e. a tainted string remains tainted if we concatenate it with another string. This change was required because the adapted variant of multipleTaintedArgs is relying on strncat to compose a value that combines taint from two different sources.
1 parent 9b9a2a2 commit 6ceb1c0

File tree

6 files changed

+78
-94
lines changed

6 files changed

+78
-94
lines changed

clang/docs/analyzer/checkers.rst

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3033,13 +3033,6 @@ Further examples of injection vulnerabilities this checker can find.
30333033
sprintf(buf, s); // warn: untrusted data used as a format string
30343034
}
30353035
3036-
void test() {
3037-
size_t ts;
3038-
scanf("%zd", &ts); // 'ts' marked as tainted
3039-
int *p = (int *)malloc(ts * sizeof(int));
3040-
// warn: untrusted data used as buffer size
3041-
}
3042-
30433036
There are built-in sources, propagations and sinks even if no external taint
30443037
configuration is provided.
30453038
@@ -3067,9 +3060,7 @@ Default propagations rules:
30673060
30683061
Default sinks:
30693062
``printf``, ``setproctitle``, ``system``, ``popen``, ``execl``, ``execle``,
3070-
``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen``,
3071-
``memcpy``, ``memmove``, ``strncpy``, ``strndup``, ``malloc``, ``calloc``,
3072-
``alloca``, ``memccpy``, ``realloc``, ``bcopy``
3063+
``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen``
30733064
30743065
Please note that there are no built-in filter functions.
30753066

clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,6 +1338,9 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
13381338

13391339
// If the size can be nonzero, we have to check the other arguments.
13401340
if (stateNonZeroSize) {
1341+
// TODO: If Size is tainted and we cannot prove that it is smaller or equal
1342+
// to the size of the destination buffer, then emit a warning
1343+
// that an attacker may provoke a buffer overflow error.
13411344
state = stateNonZeroSize;
13421345

13431346
// Ensure the destination is not null. If it is NULL there will be a

clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Lines changed: 18 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
5959
"Untrusted data is passed to a system call "
6060
"(CERT/STR02-C. Sanitize data passed to complex subsystems)";
6161

62-
/// Check if tainted data is used as a buffer size in strn.. functions,
63-
/// and allocators.
64-
constexpr llvm::StringLiteral MsgTaintedBufferSize =
65-
"Untrusted data is used to specify the buffer size "
66-
"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
67-
"for character data and the null terminator)";
68-
6962
/// Check if tainted data is used as a custom sink's parameter.
7063
constexpr llvm::StringLiteral MsgCustomSink =
7164
"Untrusted data is passed to a user-defined sink";
@@ -298,14 +291,6 @@ class GenericTaintRule {
298291
return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
299292
}
300293

301-
/// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
302-
static GenericTaintRule
303-
SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs,
304-
std::optional<StringRef> Msg = std::nullopt) {
305-
return {
306-
std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
307-
}
308-
309294
/// Process a function which could either be a taint source, a taint sink, a
310295
/// taint filter or a taint propagator.
311296
void process(const GenericTaintChecker &Checker, const CallEvent &Call,
@@ -733,12 +718,21 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
733718
{{CDM::CLibraryMaybeHardened, {{"stpcpy"}}},
734719
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
735720
{{CDM::CLibraryMaybeHardened, {{"strcat"}}},
736-
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
721+
TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
737722
{{CDM::CLibraryMaybeHardened, {{"wcsncat"}}},
738-
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
723+
TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},
739724
{{CDM::CLibrary, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
740725
{{CDM::CLibrary, {{"strdupa"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
741726
{{CDM::CLibrary, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
727+
{{CDM::CLibrary, BI.getName(Builtin::BImemcpy)},
728+
TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
729+
{{CDM::CLibrary, {BI.getName(Builtin::BImemmove)}},
730+
TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
731+
{{CDM::CLibrary, {BI.getName(Builtin::BIstrncpy)}},
732+
TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
733+
{{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}},
734+
TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
735+
{{CDM::CLibrary, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})},
742736

743737
// Sinks
744738
{{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -752,30 +746,15 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
752746
{{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
753747
{{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
754748
{{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
755-
{{CDM::CLibrary, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
756-
{{CDM::CLibrary, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
757-
{{CDM::CLibrary, {{"alloca"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
758-
{{CDM::CLibrary, {{"memccpy"}}}, TR::Sink({{3}}, MsgTaintedBufferSize)},
759-
{{CDM::CLibrary, {{"realloc"}}}, TR::Sink({{1}}, MsgTaintedBufferSize)},
749+
750+
// malloc, calloc, alloca, realloc, memccpy
751+
// are intentionally not marked as taint sinks because unconditional
752+
// reporting for these functions generates many false positives.
753+
// These taint sinks should be implemented in other checkers with more
754+
// sophisticated sanitation heuristics.
760755
{{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
761756
{{{{"setproctitle_fast"}}},
762-
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
763-
764-
// SinkProps
765-
{{CDM::CLibraryMaybeHardened, BI.getName(Builtin::BImemcpy)},
766-
TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
767-
MsgTaintedBufferSize)},
768-
{{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BImemmove)}},
769-
TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
770-
MsgTaintedBufferSize)},
771-
{{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncpy)}},
772-
TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
773-
MsgTaintedBufferSize)},
774-
{{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}},
775-
TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
776-
MsgTaintedBufferSize)},
777-
{{CDM::CLibrary, {{"bcopy"}}},
778-
TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
757+
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}};
779758

780759
// `getenv` returns taint only in untrusted environments.
781760
if (TR::UntrustedEnv(C)) {

clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,6 +1819,10 @@ ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C,
18191819
if (Size.isUndef())
18201820
Size = UnknownVal();
18211821

1822+
// TODO: If Size is tainted and we cannot prove that it is within
1823+
// reasonable bounds, emit a warning that an attacker may
1824+
// provoke a memory exhaustion error.
1825+
18221826
// Set the region's extent.
18231827
State = setDynamicExtent(State, RetVal.getAsRegion(),
18241828
Size.castAs<DefinedOrUnknownSVal>(), SVB);

clang/test/Analysis/taint-diagnostic-visitor.c

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ int scanf(const char *restrict format, ...);
1010
int system(const char *command);
1111
char* getenv( const char* env_var );
1212
size_t strlen( const char* str );
13-
int atoi( const char* str );
13+
char *strcat( char *dest, const char *src );
14+
char* strcpy( char* dest, const char* src );
1415
void *malloc(size_t size );
1516
void free( void *ptr );
1617
char *fgets(char *str, int n, FILE *stream);
@@ -53,34 +54,32 @@ void taintDiagnosticVLA(void) {
5354

5455
// Tests if the originated note is correctly placed even if the path is
5556
// propagating through variables and expressions
56-
char *taintDiagnosticPropagation(){
57-
char *pathbuf;
58-
char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
59-
// expected-note@-1 {{Taint propagated to the return value}}
60-
if (size){ // expected-note {{Assuming 'size' is non-null}}
61-
// expected-note@-1 {{Taking true branch}}
62-
pathbuf=(char*) malloc(atoi(size)); // expected-warning{{Untrusted data is used to specify the buffer size}}
63-
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
64-
// expected-note@-2 {{Taint propagated to the return value}}
65-
return pathbuf;
57+
int taintDiagnosticPropagation(){
58+
int res;
59+
char *cmd=getenv("CMD"); // expected-note {{Taint originated here}}
60+
// expected-note@-1 {{Taint propagated to the return value}}
61+
if (cmd){ // expected-note {{Assuming 'cmd' is non-null}}
62+
// expected-note@-1 {{Taking true branch}}
63+
res = system(cmd); // expected-warning{{Untrusted data is passed to a system call}}
64+
// expected-note@-1{{Untrusted data is passed to a system call}}
65+
return res;
6666
}
67-
return 0;
67+
return -1;
6868
}
6969

7070
// Taint origin should be marked correctly even if there are multiple taint
7171
// sources in the function
72-
char *taintDiagnosticPropagation2(){
73-
char *pathbuf;
72+
int taintDiagnosticPropagation2(){
73+
int res;
7474
char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source
75-
char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
76-
// expected-note@-1 {{Taint propagated to the return value}}
75+
char *cmd=getenv("CMD"); // expected-note {{Taint originated here}}
76+
// expected-note@-1 {{Taint propagated to the return value}}
7777
char *user_env=getenv("USER_ENV_VAR");//unrelated taint source
78-
if (size){ // expected-note {{Assuming 'size' is non-null}}
79-
// expected-note@-1 {{Taking true branch}}
80-
pathbuf=(char*) malloc(atoi(size)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
81-
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
82-
// expected-note@-2 {{Taint propagated to the return value}}
83-
return pathbuf;
78+
if (cmd){ // expected-note {{Assuming 'cmd' is non-null}}
79+
// expected-note@-1 {{Taking true branch}}
80+
res = system(cmd); // expected-warning{{Untrusted data is passed to a system call}}
81+
// expected-note@-1{{Untrusted data is passed to a system call}}
82+
return res;
8483
}
8584
return 0;
8685
}
@@ -95,22 +94,24 @@ void testReadStdIn(){
9594
}
9695

9796
void multipleTaintSources(void) {
98-
int x,y,z;
99-
scanf("%d", &x); // expected-note {{Taint originated here}}
97+
char cmd[2048], file[1024];
98+
scanf ("%1022[^\n] ", cmd); // expected-note {{Taint originated here}}
10099
// expected-note@-1 {{Taint propagated to the 2nd argument}}
101-
scanf("%d", &y); // expected-note {{Taint originated here}}
100+
scanf ("%1023[^\n]", file); // expected-note {{Taint originated here}}
102101
// expected-note@-1 {{Taint propagated to the 2nd argument}}
103-
scanf("%d", &z);
104-
int* ptr = (int*) malloc(y + x); // expected-warning {{Untrusted data is used to specify the buffer size}}
105-
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
106-
free (ptr);
102+
strcat(cmd, file); // expected-note {{Taint propagated to the 1st argument}}
103+
strcat(cmd, " "); // expected-note {{Taint propagated to the 1st argument}}
104+
system(cmd); // expected-warning {{Untrusted data is passed to a system call}}
105+
// expected-note@-1{{Untrusted data is passed to a system call}}
107106
}
108107

109108
void multipleTaintedArgs(void) {
110-
int x,y;
111-
scanf("%d %d", &x, &y); // expected-note {{Taint originated here}}
109+
char cmd[1024], file[1024], buf[2048];
110+
scanf("%1022s %1023s", cmd, file); // expected-note {{Taint originated here}}
112111
// expected-note@-1 {{Taint propagated to the 2nd argument, 3rd argument}}
113-
int* ptr = (int*) malloc(x + y); // expected-warning {{Untrusted data is used to specify the buffer size}}
114-
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
115-
free (ptr);
112+
strcpy(buf, cmd);// expected-note {{Taint propagated to the 1st argument}}
113+
strcat(buf, " ");// expected-note {{Taint propagated to the 1st argument}}
114+
strcat(buf, file);// expected-note {{Taint propagated to the 1st argument}}
115+
system(buf); // expected-warning {{Untrusted data is passed to a system call}}
116+
// expected-note@-1{{Untrusted data is passed to a system call}}
116117
}

clang/test/Analysis/taint-generic.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -305,15 +305,21 @@ void testGets_s(void) {
305305

306306
void testTaintedBufferSize(void) {
307307
size_t ts;
308+
// The functions malloc, calloc, bcopy and memcpy are not taint sinks in the
309+
// default config of GenericTaintChecker (because that would cause too many
310+
// false positives).
311+
// FIXME: We should generate warnings when a value passed to these functions
312+
// is tainted and _can be very large_ (because that's exploitable). This
313+
// functionality probably belongs to the checkers that do more detailed
314+
// modeling of these functions (MallocChecker and CStringChecker).
308315
scanf("%zd", &ts);
309-
310-
int *buf1 = (int*)malloc(ts*sizeof(int)); // expected-warning {{Untrusted data is used to specify the buffer size}}
311-
char *dst = (char*)calloc(ts, sizeof(char)); //expected-warning {{Untrusted data is used to specify the buffer size}}
312-
bcopy(buf1, dst, ts); // expected-warning {{Untrusted data is used to specify the buffer size}}
313-
__builtin_memcpy(dst, buf1, (ts + 4)*sizeof(char)); // expected-warning {{Untrusted data is used to specify the buffer size}}
316+
int *buf1 = (int*)malloc(ts*sizeof(int)); // warn here, ts is unbounded and tainted
317+
char *dst = (char*)calloc(ts, sizeof(char)); // warn here, ts is unbounded tainted
318+
bcopy(buf1, dst, ts); // no warning here, since the size of buf1, dst equals ts. Cannot overflow.
319+
__builtin_memcpy(dst, buf1, (ts + 4)*sizeof(char)); // warn here, dst overflows (whatever the value of ts)
314320

315321
// If both buffers are trusted, do not issue a warning.
316-
char *dst2 = (char*)malloc(ts*sizeof(char)); // expected-warning {{Untrusted data is used to specify the buffer size}}
322+
char *dst2 = (char*)malloc(ts*sizeof(char)); // warn here, ts in unbounded
317323
strncat(dst2, dst, ts); // no-warning
318324
}
319325

@@ -353,7 +359,7 @@ void testStruct(void) {
353359

354360
sock = socket(AF_INET, SOCK_STREAM, 0);
355361
read(sock, &tainted, sizeof(tainted));
356-
__builtin_memcpy(buffer, tainted.buf, tainted.length); // expected-warning {{Untrusted data is used to specify the buffer size}}
362+
clang_analyzer_isTainted_int(tainted.length); // expected-warning {{YES }}
357363
}
358364

359365
void testStructArray(void) {
@@ -368,17 +374,17 @@ void testStructArray(void) {
368374
__builtin_memset(srcbuf, 0, sizeof(srcbuf));
369375

370376
read(sock, &tainted[0], sizeof(tainted));
371-
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
377+
clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{YES}}
372378

373379
__builtin_memset(&tainted, 0, sizeof(tainted));
374380
read(sock, &tainted, sizeof(tainted));
375-
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
381+
clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{YES}}
376382

377383
__builtin_memset(&tainted, 0, sizeof(tainted));
378384
// If we taint element 1, we should not raise an alert on taint for element 0 or element 2
379385
read(sock, &tainted[1], sizeof(tainted));
380-
__builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // no-warning
381-
__builtin_memcpy(dstbuf, srcbuf, tainted[2].length); // no-warning
386+
clang_analyzer_isTainted_int(tainted[0].length); // expected-warning {{NO}}
387+
clang_analyzer_isTainted_int(tainted[2].length); // expected-warning {{NO}}
382388
}
383389

384390
void testUnion(void) {

0 commit comments

Comments
 (0)