Skip to content

Commit 889c886

Browse files
committed
[analyzer] TaintPropagation checker strlen() should not propagate
strlen(..) call should not propagate taintedness, because it brings in many false positive findings. It is a common pattern to copy user provided input to another buffer. In these cases we always get warnings about tainted data used as the malloc parameter: buf = malloc(strlen(tainted_txt) + 1); // false warning This pattern can lead to a denial of service attack only, when the attacker can directly specify the size of the allocated area as an arbitrary large number (e.g. the value is converted from a user provided string). Later, we could reintroduce strlen() as a taint propagating function with the consideration not to emit warnings when the tainted value cannot be "arbitrarily large" (such as the size of an already allocated memory block).
1 parent 77106b7 commit 889c886

File tree

5 files changed

+38
-19
lines changed

5 files changed

+38
-19
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,13 @@ Static Analyzer
409409
bitwise shift operators produce undefined behavior (because some operand is
410410
negative or too large).
411411

412+
- The ``alpha.security.taint.TaintPropagation`` checker no longer propagates
413+
taint on ``strlen`` and ``strnlen`` calls, unless these are marked
414+
explicitly propagators in the user-provided taint configuration file.
415+
This removal empirically reduces the number of false positive reports.
416+
Read the PR for the details.
417+
(`#66086 <https://github.com/llvm/llvm-project/pull/66086>`_)
418+
412419
.. _release-notes-sanitizers:
413420

414421
Sanitizers

clang/docs/analyzer/checkers.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2599,8 +2599,8 @@ Default propagations rules:
25992599
``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``,
26002600
``qsort_r``, ``rawmemchr``, ``read``, ``recv``, ``recvfrom``, ``rindex``,
26012601
``strcasestr``, ``strchr``, ``strchrnul``, ``strcasecmp``, ``strcmp``,
2602-
``strcspn``, ``strlen``, ``strncasecmp``, ``strncmp``, ``strndup``,
2603-
``strndupa``, ``strnlen``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``,
2602+
``strcspn``, ``strncasecmp``, ``strncmp``, ``strndup``,
2603+
``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``,
26042604
``strstr``, ``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``,
26052605
``toupper``, ``ttyname``, ``ttyname_r``, ``wctomb``, ``wcwidth``
26062606

clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -695,9 +695,10 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
695695
{{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
696696
{{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
697697
{{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
698-
{{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
699-
{{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
700-
{{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
698+
699+
// strlen, wcslen, strnlen and alike intentionally don't propagate taint.
700+
// See the details here: https://github.com/llvm/llvm-project/pull/66086
701+
701702
{{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
702703
{{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
703704
{{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},

clang/test/Analysis/taint-diagnostic-visitor.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ int scanf(const char *restrict format, ...);
1010
int system(const char *command);
1111
char* getenv( const char* env_var );
1212
size_t strlen( const char* str );
13+
int atoi( const char* str );
1314
void *malloc(size_t size );
1415
void free( void *ptr );
1516
char *fgets(char *str, int n, FILE *stream);
@@ -54,11 +55,11 @@ void taintDiagnosticVLA(void) {
5455
// propagating through variables and expressions
5556
char *taintDiagnosticPropagation(){
5657
char *pathbuf;
57-
char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
58+
char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
5859
// expected-note@-1 {{Taint propagated to the return value}}
59-
if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
60+
if (size){ // expected-note {{Assuming 'size' is non-null}}
6061
// expected-note@-1 {{Taking true branch}}
61-
pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
62+
pathbuf=(char*) malloc(atoi(size)); // expected-warning{{Untrusted data is used to specify the buffer size}}
6263
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
6364
// expected-note@-2 {{Taint propagated to the return value}}
6465
return pathbuf;
@@ -71,12 +72,12 @@ char *taintDiagnosticPropagation(){
7172
char *taintDiagnosticPropagation2(){
7273
char *pathbuf;
7374
char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source
74-
char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
75+
char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
7576
// expected-note@-1 {{Taint propagated to the return value}}
7677
char *user_env=getenv("USER_ENV_VAR");//unrelated taint source
77-
if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
78+
if (size){ // expected-note {{Assuming 'size' is non-null}}
7879
// expected-note@-1 {{Taking true branch}}
79-
pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
80+
pathbuf=(char*) malloc(atoi(size)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
8081
// expected-note@-1{{Untrusted data is used to specify the buffer size}}
8182
// expected-note@-2 {{Taint propagated to the return value}}
8283
return pathbuf;

clang/test/Analysis/taint-generic.c

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -443,22 +443,26 @@ int testSprintf_propagates_taint(char *buf, char *msg) {
443443
return 1 / x; // expected-warning {{Division by a tainted value, possibly zero}}
444444
}
445445

446-
void test_wchar_apis_propagate(const char *path) {
446+
void test_wchar_apis_dont_propagate(const char *path) {
447+
// strlen, wcslen, strnlen and alike intentionally don't propagate taint.
448+
// See the details here: https://github.com/llvm/llvm-project/pull/66086
449+
// This isn't ideal, but this is only what we have now.
450+
447451
FILE *f = fopen(path, "r");
448452
clang_analyzer_isTainted_charp((char*)f); // expected-warning {{YES}}
449453
wchar_t wbuf[10];
450454
fgetws(wbuf, sizeof(wbuf)/sizeof(*wbuf), f);
451455
clang_analyzer_isTainted_wchar(*wbuf); // expected-warning {{YES}}
452456
int n = wcslen(wbuf);
453-
clang_analyzer_isTainted_int(n); // expected-warning {{YES}}
457+
clang_analyzer_isTainted_int(n); // expected-warning {{NO}}
454458

455459
wchar_t dst[100] = L"ABC";
456460
clang_analyzer_isTainted_wchar(*dst); // expected-warning {{NO}}
457461
wcsncat(dst, wbuf, sizeof(wbuf)/sizeof(*wbuf));
458462
clang_analyzer_isTainted_wchar(*dst); // expected-warning {{YES}}
459463

460464
int m = wcslen(dst);
461-
clang_analyzer_isTainted_int(m); // expected-warning {{YES}}
465+
clang_analyzer_isTainted_int(m); // expected-warning {{NO}}
462466
}
463467

464468
int scanf_s(const char *format, ...);
@@ -948,21 +952,27 @@ void testStrndupa(size_t n) {
948952
}
949953

950954
size_t strlen(const char *s);
951-
void testStrlen() {
955+
void testStrlen_dont_propagate() {
956+
// strlen, wcslen, strnlen and alike intentionally don't propagate taint.
957+
// See the details here: https://github.com/llvm/llvm-project/pull/66086
958+
// This isn't ideal, but this is only what we have now.
952959
char s[10];
953960
scanf("%9s", s);
954961

955962
size_t result = strlen(s);
956-
clang_analyzer_isTainted_int(result); // expected-warning {{YES}}
963+
// strlen propagating taint would bring in many false positives
964+
clang_analyzer_isTainted_int(result); // expected-warning {{NO}}
957965
}
958966

959967
size_t strnlen(const char *s, size_t maxlen);
960-
void testStrnlen(size_t maxlen) {
968+
void testStrnlen_dont_propagate(size_t maxlen) {
969+
// strlen, wcslen, strnlen and alike intentionally don't propagate taint.
970+
// See the details here: https://github.com/llvm/llvm-project/pull/66086
971+
// This isn't ideal, but this is only what we have now.
961972
char s[10];
962973
scanf("%9s", s);
963-
964974
size_t result = strnlen(s, maxlen);
965-
clang_analyzer_isTainted_int(result); // expected-warning {{YES}}
975+
clang_analyzer_isTainted_int(result); // expected-warning {{NO}}
966976
}
967977

968978
long strtol(const char *restrict nptr, char **restrict endptr, int base);

0 commit comments

Comments
 (0)