-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[-Wunsafe-buffer-usage] Ignore constant safe indices in array subscripts #80504
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[-Wunsafe-buffer-usage] Ignore constant safe indices in array subscripts #80504
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
fd3d507
to
1fdef3b
Compare
@llvm/pr-subscribers-clang-analysis @llvm/pr-subscribers-clang Author: None (jkorous-apple) Changesdepends on Full diff: https://github.com/llvm/llvm-project/pull/80504.diff 5 Files Affected:
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index d00c598c4b9de3..aa3240a86e562b 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -281,10 +281,13 @@ isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
// 4. the operand of a pointer subtraction operation
// (i.e., computing the distance between two pointers); or ...
- auto CallArgMatcher =
- callExpr(forEachArgumentWithParam(InnerMatcher,
- hasPointerType() /* array also decays to pointer type*/),
- unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
+ // clang-format off
+ auto CallArgMatcher = callExpr(
+ forEachArgumentWithParamType(
+ InnerMatcher,
+ isAnyPointer() /* array also decays to pointer type*/),
+ unless(callee(
+ functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
auto CastOperandMatcher =
castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral),
@@ -306,6 +309,7 @@ isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
hasRHS(hasPointerType())),
eachOf(hasLHS(InnerMatcher),
hasRHS(InnerMatcher)));
+ // clang-format on
return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
PtrSubtractionMatcher));
@@ -402,6 +406,37 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
}
return false;
}
+
+AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
+ // FIXME: Proper solution:
+ // - refactor Sema::CheckArrayAccess
+ // - split safe/OOB/unknown decision logic from diagnostics emitting code
+ // - e. g. "Try harder to find a NamedDecl to point at in the note." already duplicated
+ // - call both from Sema and from here
+
+ const DeclRefExpr * BaseDRE = dyn_cast_or_null<DeclRefExpr>(Node.getBase()->IgnoreParenImpCasts());
+ if (!BaseDRE)
+ return false;
+ if (!BaseDRE->getDecl())
+ return false;
+ auto BaseVarDeclTy = BaseDRE->getDecl()->getType();
+ if (!BaseVarDeclTy->isConstantArrayType())
+ return false;
+ const auto * CATy = dyn_cast_or_null<ConstantArrayType>(BaseVarDeclTy);
+ if (!CATy)
+ return false;
+ const APInt ArrSize = CATy->getSize();
+
+ if (const auto * IdxLit = dyn_cast<IntegerLiteral>(Node.getIdx())) {
+ const APInt ArrIdx = IdxLit->getValue();
+ // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a bug
+ if (ArrIdx.isNonNegative() && ArrIdx.getLimitedValue() < ArrSize.getLimitedValue())
+ return true;
+ }
+
+ return false;
+}
+
} // namespace clang::ast_matchers
namespace {
@@ -594,16 +629,16 @@ class ArraySubscriptGadget : public WarningGadget {
}
static Matcher matcher() {
- // FIXME: What if the index is integer literal 0? Should this be
- // a safe gadget in this case?
- // clang-format off
+ // clang-format off
return stmt(arraySubscriptExpr(
hasBase(ignoringParenImpCasts(
anyOf(hasPointerType(), hasArrayType()))),
- unless(hasIndex(
- anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
- )))
- .bind(ArraySubscrTag));
+ unless(anyOf(
+ isSafeArraySubscript(),
+ hasIndex(
+ anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
+ )
+ ))).bind(ArraySubscrTag));
// clang-format on
}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
index 90c11b1be95c25..4804223e8be058 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-everything -Wunsafe-buffer-usage \
// RUN: -fsafe-buffer-usage-suggestions \
// RUN: -verify %s
@@ -22,3 +22,19 @@ struct Foo {
void foo2(Foo& f, unsigned idx) {
f.member_buffer[idx] = 0; // expected-warning{{unsafe buffer access}}
}
+
+void constant_idx_safe(unsigned idx) {
+ int buffer[10];
+ buffer[9] = 0;
+}
+
+void constant_idx_safe0(unsigned idx) {
+ int buffer[10];
+ buffer[0] = 0;
+}
+
+void constant_idx_unsafe(unsigned idx) {
+ int buffer[10]; // expected-warning{{'buffer' is an unsafe buffer that does not perform bounds checks}}
+ // expected-note@-1{{change type of 'buffer' to 'std::array' to harden it}}
+ buffer[10] = 0; // expected-note{{used in buffer access here}}
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp
index f94072015ff87d..b3c64f1b0d085e 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp
@@ -83,11 +83,11 @@ void unsafe_method_invocation_single_param() {
}
-void unsafe_method_invocation_single_param_array() {
+void unsafe_method_invocation_single_param_array(int idx) {
int p[32];
// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 32> p"
- int tmp = p[5];
+ int tmp = p[idx];
foo(p);
// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:8-[[@LINE-1]]:8}:".data()"
}
@@ -126,14 +126,14 @@ void unsafe_method_invocation_double_param() {
m1(q, q, 8);
}
-void unsafe_method_invocation_double_param_array() {
+void unsafe_method_invocation_double_param_array(int idx) {
int p[14];
// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 14> p"
int q[40];
// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 40> q"
- q[5] = p[5];
+ q[idx] = p[idx];
m1(p, p, 10);
// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:7}:".data()"
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
new file mode 100644
index 00000000000000..216813ce45bfd5
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage \
+// RUN: -fsafe-buffer-usage-suggestions \
+// RUN: -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+void unsafe_array_func_ptr_call(void (*fn_ptr)(int *param)) {
+ int p[32];
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 32> p"
+
+ int idx;
+ p[idx] = 10;
+ fn_ptr(p);
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()"
+}
+
+void unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) {
+ int *p;
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span<int> p"
+
+ p[5] = 10;
+ fn_ptr(p);
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()"
+}
+
+void addr_of_unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) {
+ int *p;
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span<int> p"
+
+ p[5] = 10;
+ fn_ptr(&p[0]);
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:15}:"p.data()"
+}
+
+void addr_of_unsafe_ptr_w_offset_func_ptr_call(void (*fn_ptr)(int *param)) {
+ int *p;
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span<int> p"
+
+ p[5] = 10;
+ fn_ptr(&p[3]);
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:15}:"&p.data()[3]"
+}
+
+void preincrement_unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) {
+ int *p;
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span<int> p"
+
+ p[5] = 10;
+ fn_ptr(++p);
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:13}:"(p = p.subspan(1)).data()"
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp
index 67cdf252d6a8b6..9738b357834eed 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp
@@ -36,7 +36,7 @@ void testIncrement(char *p) { // expected-warning{{'p' is an unsafe pointer used
void * voidPtrCall(void);
char * charPtrCall(void);
-void testArraySubscripts(int *p, int **pp) {
+void testArraySubscripts(int idx, int *p, int **pp) {
// expected-warning@-1{{'p' is an unsafe pointer used for buffer access}}
// expected-warning@-2{{'pp' is an unsafe pointer used for buffer access}}
foo(p[1], // expected-note{{used in buffer access here}}
@@ -64,13 +64,14 @@ void testArraySubscripts(int *p, int **pp) {
// expected-note@-1{{change type of 'a' to 'std::array' to label it for hardening}}
int b[10][10]; // expected-warning{{'b' is an unsafe buffer that does not perform bounds checks}}
- foo(a[1], 1[a], // expected-note2{{used in buffer access here}}
- b[3][4], // expected-warning{{unsafe buffer access}}
- // expected-note@-1{{used in buffer access here}}
- 4[b][3], // expected-warning{{unsafe buffer access}}
- // expected-note@-1{{used in buffer access here}}
- 4[3[b]]); // expected-warning{{unsafe buffer access}}
- // expected-note@-1{{used in buffer access here}}
+ foo(a[idx], idx[a], // expected-note2{{used in buffer access here}}
+ b[idx][idx + 1], // expected-warning{{unsafe buffer access}}
+ // expected-note@-1{{used in buffer access here}}
+ (idx + 1)[b][idx],// expected-warning{{unsafe buffer access}}
+ // expected-note@-1{{used in buffer access here}}
+ (idx + 1)[idx[b]]);
+ // expected-warning@-1{{unsafe buffer access}}
+ // expected-note@-2{{used in buffer access here}}
// Not to warn when index is zero
foo(p[0], pp[0][0], 0[0[pp]], 0[pp][0],
@@ -158,9 +159,9 @@ void testLambdaCaptureAndGlobal(int * p) {
// expected-warning@-1{{'p' is an unsafe pointer used for buffer access}}
int a[10]; // expected-warning{{'a' is an unsafe buffer that does not perform bounds checks}}
- auto Lam = [p, a]() {
+ auto Lam = [p, a](int idx) {
return p[1] // expected-note{{used in buffer access here}}
- + a[1] + garray[1] // expected-note2{{used in buffer access here}}
+ + a[idx] + garray[idx]// expected-note2{{used in buffer access here}}
+ gp[1]; // expected-note{{used in buffer access here}}
};
@@ -178,31 +179,31 @@ void testLambdaCapture() {
// expected-note@-1{{change type of 'b' to 'std::array' to label it for hardening}}
int c[10];
- auto Lam1 = [a]() {
- return a[1]; // expected-note{{used in buffer access here}}
+ auto Lam1 = [a](unsigned idx) {
+ return a[idx]; // expected-note{{used in buffer access here}}
};
- auto Lam2 = [x = b[3]]() { // expected-note{{used in buffer access here}}
+ auto Lam2 = [x = b[c[5]]]() { // expected-note{{used in buffer access here}}
return x;
};
- auto Lam = [x = c]() { // expected-warning{{'x' is an unsafe pointer used for buffer access}}
- return x[3]; // expected-note{{used in buffer access here}}
+ auto Lam = [x = c](unsigned idx) { // expected-warning{{'x' is an unsafe pointer used for buffer access}}
+ return x[idx]; // expected-note{{used in buffer access here}}
};
}
-void testLambdaImplicitCapture() {
+void testLambdaImplicitCapture(long idx) {
int a[10]; // expected-warning{{'a' is an unsafe buffer that does not perform bounds checks}}
// expected-note@-1{{change type of 'a' to 'std::array' to label it for hardening}}
int b[10]; // expected-warning{{'b' is an unsafe buffer that does not perform bounds checks}}
// expected-note@-1{{change type of 'b' to 'std::array' to label it for hardening}}
auto Lam1 = [=]() {
- return a[1]; // expected-note{{used in buffer access here}}
+ return a[idx]; // expected-note{{used in buffer access here}}
};
auto Lam2 = [&]() {
- return b[1]; // expected-note{{used in buffer access here}}
+ return b[idx]; // expected-note{{used in buffer access here}}
};
}
@@ -344,31 +345,31 @@ int testVariableDecls(int * p) {
return p[1]; // expected-note{{used in buffer access here}}
}
-template<typename T> void fArr(T t[]) {
+template<typename T> void fArr(T t[], long long idx) {
// expected-warning@-1{{'t' is an unsafe pointer used for buffer access}}
foo(t[1]); // expected-note{{used in buffer access here}}
T ar[8]; // expected-warning{{'ar' is an unsafe buffer that does not perform bounds checks}}
// expected-note@-1{{change type of 'ar' to 'std::array' to label it for hardening}}
- foo(ar[5]); // expected-note{{used in buffer access here}}
+ foo(ar[idx]); // expected-note{{used in buffer access here}}
}
-template void fArr<int>(int t[]); // FIXME: expected note {{in instantiation of}}
+template void fArr<int>(int t[], long long); // FIXME: expected note {{in instantiation of}}
int testReturn(int t[]) {// expected-note{{change type of 't' to 'std::span' to preserve bounds information}}
// expected-warning@-1{{'t' is an unsafe pointer used for buffer access}}
return t[1]; // expected-note{{used in buffer access here}}
}
-int testArrayAccesses(int n) {
+int testArrayAccesses(int n, int idx) {
// auto deduced array type
int cArr[2][3] = {{1, 2, 3}, {4, 5, 6}};
// expected-warning@-1{{'cArr' is an unsafe buffer that does not perform bounds checks}}
int d = cArr[0][0];
foo(cArr[0][0]);
- foo(cArr[1][2]); // expected-note{{used in buffer access here}}
- // expected-warning@-1{{unsafe buffer access}}
- auto cPtr = cArr[1][2]; // expected-note{{used in buffer access here}}
- // expected-warning@-1{{unsafe buffer access}}
+ foo(cArr[idx][idx + 1]); // expected-note{{used in buffer access here}}
+ // expected-warning@-1{{unsafe buffer access}}
+ auto cPtr = cArr[idx][idx * 2]; // expected-note{{used in buffer access here}}
+ // expected-warning@-1{{unsafe buffer access}}
foo(cPtr);
// Typdefs
|
unless(anyOf( | ||
isSafeArraySubscript(), | ||
hasIndex( | ||
anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is unnecessary now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, it is still necessary - we need to somehow cover:
int arr[0];
arr[0] = 5;
And that has been defined as out of scope for the warning. I don't feel like calling it "safe" and I'd rather have it visible in the top-level matcher.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So you want to suppress the warning here right? In this case yeah makes sense.
It's also somewhat covered by
warning: zero size arrays are an extension [-Wzero-length-array]
auto BaseVarDeclTy = BaseDRE->getDecl()->getType(); | ||
if (!BaseVarDeclTy->isConstantArrayType()) | ||
return false; | ||
const auto * CATy = dyn_cast_or_null<ConstantArrayType>(BaseVarDeclTy); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Take a look getAsConstantArrayType().
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use getAsArrayType
instead of dyn_cast
.
Don't emit warnings for array subscripts on constant size arrays where the index is constant and within bounds. Example: int arr[10]; arr[5] = 0; //safe, no warning This patch recognizes only array indices that are integer literals - it doesn't understand more complex expressions (arithmetic on constants, etc.).
…e index is ignored
-Warray-bounds implemented in Sema::CheckArrayAccess() already solves a similar (opposite) problem and is battle-tested. Adding -Wunsafe-buffer-usage diagnostics to Sema is a non starter as we need to emit both the warnings and fixits and the performance impact of the fixit machine is unacceptable for Sema. CheckArrayAccess() as is doesn't distinguish between "safe" and "unknown" array accesses. It also mixes the analysis that decides if an index is out of bounds with crafting the diagnostics. A refactor of CheckArrayAccess() might serve both the original purpose and help us avoid false-positive with -Wunsafe-buffer-usage on constant size arrrays.
ed8933d
to
e4cf184
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
Co-authored-by: Artem Dergachev <[email protected]>
…pts (llvm#80504) [-Wunsafe-buffer-usage] Ignore safe array subscripts Don't emit warnings for array subscripts on constant size arrays where the index is constant and within bounds. Example: int arr[10]; arr[5] = 0; //safe, no warning This patch recognizes only array indices that are integer literals - it doesn't understand more complex expressions (arithmetic on constants, etc.). -Warray-bounds implemented in Sema::CheckArrayAccess() already solves a similar (opposite) problem, handles complex expressions and is battle-tested. Adding -Wunsafe-buffer-usage diagnostics to Sema is a non-starter as we need to emit both the warnings and fixits and the performance impact of the fixit machine is unacceptable for Sema. CheckArrayAccess() as is doesn't distinguish between "safe" and "unknown" array accesses. It also mixes the analysis that decides if an index is out of bounds with crafting the diagnostics. A refactor of CheckArrayAccess() might serve both the original purpose and help us avoid false-positive with -Wunsafe-buffer-usage on constant size arrrays. (cherry picked from commit 9a1e637)
depends on
#80358