-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Clang] Fix __builtin_dynamic_object_size off by 4 #111015
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Fixes: llvm#111009 Change the behavior of __bdos to be in-line with gcc by changing the behvaior from: max(sizeof(struct s), offsetof(struct s, array) + p->count * sizeof(*p->array)) to: sizeof(struct s) + p->count * sizeof(*p->array))
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Jan Hendrik Farr (Cydox) ChangesFixes: #111009 Change the behavior of __bdos to be in-line with gcc by changing the behvaior from:
to:
Patch is 48.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111015.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c864714182e019..f12f8d4bfb1571 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -919,8 +919,7 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
// 2) bdos of the whole struct, including the flexible array:
//
// __builtin_dynamic_object_size(p, 1) ==
- // max(sizeof(struct s),
- // offsetof(struct s, array) + p->count * sizeof(*p->array))
+ // sizeof(struct s) + p->count * sizeof(*p->array))
//
ASTContext &Ctx = getContext();
const Expr *Base = E->IgnoreParenImpCasts();
@@ -1052,22 +1051,13 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
// The whole struct is specificed in the __bdos.
const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
- // Get the offset of the FAM.
- llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
- Value *OffsetAndFAMSize =
- Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
// Get the full size of the struct.
llvm::Constant *SizeofStruct =
ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
- // max(sizeof(struct s),
- // offsetof(struct s, array) + p->count * sizeof(*p->array))
- Res = IsSigned
- ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
- OffsetAndFAMSize, SizeofStruct)
- : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
- OffsetAndFAMSize, SizeofStruct);
+ // Add full size of struct and fam size
+ Res = Builder.CreateAdd(SizeofStruct, Res, "", !IsSigned, IsSigned);
}
// A negative \p IdxInst or \p CountedByInst means that the index lands
diff --git a/clang/test/CodeGen/attr-counted-by-pr111009.c b/clang/test/CodeGen/attr-counted-by-pr111009.c
new file mode 100644
index 00000000000000..87db75cd4a4ee8
--- /dev/null
+++ b/clang/test/CodeGen/attr-counted-by-pr111009.c
@@ -0,0 +1,61 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wno-missing-declarations -emit-llvm -o - %s | FileCheck %s
+
+// See #111009
+// Based on reproducer from Thorsten Blum:
+// https://lore.kernel.org/linux-kernel/[email protected]/
+
+typedef unsigned int uid_t;
+typedef unsigned int gid_t;
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+typedef struct refcount_struct {
+ atomic_t refs;
+} refcount_t;
+
+struct callback_head {
+ struct callback_head *next;
+ void (*func)(struct callback_head *head);
+} __attribute__((aligned(sizeof(void *))));
+#define rcu_head callback_head
+
+typedef struct {
+ uid_t val;
+} kuid_t;
+
+typedef struct {
+ gid_t val;
+} kgid_t;
+
+struct posix_acl_entry {
+ short e_tag;
+ unsigned short e_perm;
+ union {
+ kuid_t e_uid;
+ kgid_t e_gid;
+ };
+};
+
+struct posix_acl {
+ refcount_t a_refcount;
+ struct rcu_head a_rcu;
+ unsigned int a_count;
+ struct posix_acl_entry a_entries[] __attribute__((counted_by(a_count)));
+};
+
+// CHECK-LABEL: define dso_local range(i32 32, 25) i32 @test(
+// CHECK-SAME: ptr nocapture noundef readonly [[FOO:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 24
+// CHECK-NEXT: [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD]], 3
+// CHECK-NEXT: [[CONV:%.*]] = add i32 [[TMP0]], 32
+// CHECK-NEXT: ret i32 [[CONV]]
+//
+int test(struct posix_acl *foo) {
+ return __builtin_dynamic_object_size(foo, 0);
+}
+
diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index 4a130c5e3d401f..706b53d78ee011 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -66,7 +66,7 @@ struct anon_struct {
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10:[0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9:[0-9]+]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont3:
// SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -114,7 +114,7 @@ void test1(struct annotated *p, int index, int val) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont3:
// SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -203,18 +203,15 @@ size_t test2_bdos(struct annotated *p) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont3:
// SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
-// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
-// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
-// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP3]], i64 4)
-// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
-// SANITIZE-WITH-ATTR-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 12
+// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD]], 2
+// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 16
// SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
-// SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP6]]
+// SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP3]]
// SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
// SANITIZE-WITH-ATTR-NEXT: ret void
//
@@ -223,13 +220,10 @@ size_t test2_bdos(struct annotated *p) {
// NO-SANITIZE-WITH-ATTR-NEXT: entry:
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 4)
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 12
+// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD]], 2
+// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 16
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
-// NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]]
+// NO-SANITIZE-WITH-ATTR-NEXT: [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP1]]
// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
// NO-SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
// NO-SANITIZE-WITH-ATTR-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
@@ -257,31 +251,29 @@ void test3(struct annotated *p, size_t index) {
p->array[index] = __builtin_dynamic_object_size(p, 1);
}
-// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934601) i64 @test3_bdos(
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934576, 8589934605) i64 @test3_bdos(
// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
// SANITIZE-WITH-ATTR-NEXT: entry:
// SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
// SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
-// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 4)
-// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 12
-// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], -1
-// SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 0
-// SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP5]]
+// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 16
+// SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], -1
+// SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 0
+// SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP4]]
//
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934601) i64 @test3_bdos(
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934576, 8589934605) i64 @test3_bdos(
// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
// NO-SANITIZE-WITH-ATTR-NEXT: entry:
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
// NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 4)
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 12
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], -1
-// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 0
-// NO-SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP5]]
+// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], 16
+// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], -1
+// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 0
+// NO-SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP4]]
//
// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos(
// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
@@ -308,7 +300,7 @@ size_t test3_bdos(struct annotated *p) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont4:
// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], 2
@@ -325,7 +317,7 @@ size_t test3_bdos(struct annotated *p) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP7:%.*]] = icmp ult i64 [[IDXPROM12]], [[TMP6]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP7]], label [[CONT19:%.*]], label [[HANDLER_OUT_OF_BOUNDS15:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds15:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM12]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM12]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont19:
// SANITIZE-WITH-ATTR-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD6]], 3
@@ -342,7 +334,7 @@ size_t test3_bdos(struct annotated *p) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP13:%.*]] = icmp ult i64 [[IDXPROM28]], [[TMP12]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP13]], label [[CONT35:%.*]], label [[HANDLER_OUT_OF_BOUNDS31:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds31:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM28]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM28]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont35:
// SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM28]]
@@ -494,7 +486,7 @@ size_t test4_bdos(struct annotated *p, int index) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont3:
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
@@ -590,7 +582,7 @@ size_t test5_bdos(struct anon_struct *p) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont3:
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
@@ -683,7 +675,7 @@ size_t test6_bdos(struct anon_struct *p) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont7:
// SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
@@ -723,12 +715,12 @@ void test7(struct union_of_fams *p, int index) {
}
// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7_bdos(
-// SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
// SANITIZE-WITH-ATTR-NEXT: entry:
// SANITIZE-WITH-ATTR-NEXT: ret i64 -1
//
// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7_bdos(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
// NO-SANITIZE-WITH-ATTR-NEXT: entry:
// NO-SANITIZE-WITH-ATTR-NEXT: ret i64 -1
//
@@ -756,7 +748,7 @@ size_t test7_bdos(struct union_of_fams *p) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: cont7:
// SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
@@ -837,7 +829,7 @@ size_t test8_bdos(struct union_of_fams *p) {
// SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR: handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB14:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB14:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
// SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]]
// SANI...
[truncated]
|
I'm not 100% sure if the gcc or the clang behavior is currently correct. However, I'm gonna argue that gcc has it correct. gcc currently says that the __bdos of struct containing a flexible array member is:
clang however does the following:
The kernel assumes the gcc behvaior in places like linux/fs/posix_acl.c: struct posix_acl *
posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
{
struct posix_acl *clone = NULL;
if (acl) {
int size = sizeof(struct posix_acl) + acl->a_count *
sizeof(struct posix_acl_entry);
clone = kmemdup(acl, size, flags);
if (clone)
refcount_set(&clone->a_refcount, 1);
}
return clone;
}
EXPORT_SYMBOL_GPL(posix_acl_clone); This is the code that triggers the problem in [1]. The way I see it, this code should work, as you also allocate Based on the C standard the size of that object is the size passed to [1] https://lore.kernel.org/linux-kernel/[email protected]/ |
✅ With the latest revision this PR passed the C/C++ code formatter. |
The value clang computes is the difference between the address of the beginning of the struct, and the end of the array (or the end of the struct, if the array fits in the padding). The value gcc computes is that, plus the offset between the beginning of the array and the end of the struct. So the formula used by gcc is adding unused padding after the end of every flexible array. My default stance would be that gcc and the Linux code in question are wrong. We could reconsider if strict checking is impractical for Linux, but I'd expect kernel devs to prefer catching accesses one past the end of the array. |
I'm gonna write a better explanation with some drawings of my arguments in a bit, but to start of with, clang's answer is inconsistent. If you rewrite the reproducer in the issue to this version without the #include <stdio.h>
#include <stdlib.h>
typedef struct {
int counter;
} atomic_t;
typedef struct refcount_struct {
atomic_t refs;
} refcount_t;
struct callback_head {
struct callback_head *next;
void (*func)(struct callback_head *head);
} __attribute__((aligned(sizeof(void *))));
#define rcu_head callback_head
typedef struct {
uid_t val;
} kuid_t;
typedef struct {
gid_t val;
} kgid_t;
struct posix_acl_entry {
short e_tag;
unsigned short e_perm;
union {
kuid_t e_uid;
kgid_t e_gid;
};
};
struct posix_acl {
refcount_t a_refcount;
struct rcu_head a_rcu;
unsigned int a_count;
struct posix_acl_entry a_entries[];
};
int main() {
unsigned int count = 1;
struct posix_acl *acl;
acl = malloc(sizeof(struct posix_acl) +
sizeof(struct posix_acl_entry) * count);
acl->a_count = count;
acl->a_entries[0].e_tag = 0x1;
printf("%zu\n", __builtin_dynamic_object_size(acl, 0));
return 0;
}
Compiling the original reproducer in #111009 (only change is the counted_by attribute) you get this:
|
I'm not sure what you think you're proving with that example; without the counted_by, we can't compute the size of the struct, so we just fall back to returning the size of the allocation. |
If you access the array none of this really matters, as you wouldn't be dealing with |
I wrote a similar conclusion to @efriedma-quic in the email thread. The problem with your example is that, in the absence of the |
I slightly changed my mind. Was gonna write that gcc is definitely right, now I'm more at a 50/50. I got two arguments, one from a standards perspective, the other from practical/safety perspective. Standards argument:When you do acl = malloc(sizeof(struct posix_acl) + sizeof(struct posix_acl_entry) * 1);
acl.a_count = 1; The size of the object that got created there is in fact 40 (feel free to correct me here). The description of But you could also do acl = malloc(offsetof(struct posix_acl, a_entries) + sizeof(struct posix_acl_entry) * 1); in which case the size of that object is 36. For __bdos we don't know which situation we're in though. Practical/Safety argument:It comes down to these 4 cases. A
B
C
D
Only C is undefined behavior. So it comes down to whether you want false positives or false negatives. Sticking to the current behavior will lead to random crashes of otherwise safe code that might only trigger occasionally. Code like this will probably lurk for quite a while, especially because gcc has different behavior. Changing the behavior on the other hand will mean not all cases of C might be caught. Which is the correct way to go here is neither obvious nor my call to make. I think we could find a way to scan the kernel for all the A cases and convert them to B or D. I would like to know if any of you disagree or agree with the correct size of the object from the standards perspective. |
After looking at the assembly produced by gcc more, it actually looks like it's using the allocation size if it's known in the current context (for example if the struct was just malloced in the same function) and otherwise returns INT_MAX for the __bdos of a struct containing a flexible array member. It's only returning the size based on the __counted_by attribute of you ask it for the __bdos of the flexible array member itself. int test(struct posix_acl *acl) {
return __builtin_dynamic_object_size(acl, 0);
} actually compiles to
using gcc (trunk) on compiler explorer. So the call to kmemdup in this function: struct posix_acl *
posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
{
struct posix_acl *clone = NULL;
if (acl) {
int size = offsetof(struct posix_acl, a_entries) + acl->a_count *
sizeof(struct posix_acl_entry);
clone = kmemdup(acl, size, flags);
if (clone)
refcount_set(&clone->a_refcount, 1);
}
return clone;
}
EXPORT_SYMBOL_GPL(posix_acl_clone); won't actually be bounds-checked currently when compiling the kernel with gcc. Compiling with clang will make this bounds-checked using the lower bound of possible sizes (max(sizeof(struct s), offsetof(struct s, array) + p->count * sizeof(*p->array))) |
So, we would actually get gcc's behavior with this patch:
|
From page 113 and 114 of: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1548.pdf "As a special case, the last element of a structure with more than one named member may have an incomplete array type; this is called a flexible array member. In most situations, the flexible array member is ignored. In particular, the size of the structure is as if the flexible array member were omitted except that it may have more trailing padding than the omission would imply. However, when a . (or ->) operator has a left operand that is (a pointer to) a structure with a flexible array member and the right operand names that member, it behaves as if that member were replaced with the longest array (with the same element type) that would not make the structure larger than the object being accessed; the offset of the array shall remain that of the flexible array member, even if this would differ from that of the replacement array. If this array would have no elements, it behaves as if it had one element but the behavior is undefined if any attempt is made to access that element or to generate a pointer one past it." [emphasis added] The struct object that has an FAM behaves like if the FAM was replaced with the largest array that would not make the object larger than it is (if the . or -> operator is used). Clang's behavior is currently returning just the size of the object that it would be if the FAM was replaced with that largest possible array (and the object only behaves like that when the . or -> operator is used). So when GCC's behavior thus makes sense. It does not use Returning |
The point of __counted_by is precisely to supplement the normal standard rules: specifically, if you have counted_by, the size of the flexible array is precisely the size specified by the attribute. Not whatever size is implied by the access. Otherwise, it would be illegal for __bdos to use the counted_by attribute at all. The size of the array can't change based on how __bdos is queried.
|
Let's say we simply define a I agree that the size of the flexible array member when the __counted_by attribute is used is exactly count multiplied by size of each element. And this PR changes none of that. __builtin_dynamic_object_size(acl->a_entries) Still does exactly that and therefore bounds-checking even an off-by-1 scenario will work correctly. This PR only changes the behavior of: __builtin_dynamic_object_size(acl) Here you are not asking for the size of the FAM, but of the whole struct object. The size of that object is whatever you gave to This is different when you ask for the size of the FAM itself, each size of the FAM only has a single correct value of This is why I believe the gcc behavior is correct. When it knows the size given to
I agree that it is a weird compromise, I'm thinking clang should to with the gcc behavior here instead. However:
Can you tell me what the size specified by the standard is? Let's say for these two examples:
(2)
What is the size of the object My answers: |
Picture of the sizes involved to scale:
Based on the C standard what do we know if
|
* If ``type & 2 == 0``, the least ``n`` is returned such that accesses to | |
``(const char*)ptr + n`` and beyond are known to be out of bounds. This is | |
``(size_t)-1`` if no better bound is known. | |
* If ``type & 2 == 2``, the greatest ``n`` is returned such that accesses to | |
``(const char*)ptr + i`` are known to be in bounds, for 0 <= ``i`` < ``n``. | |
This is ``(size_t)0`` if no better bound is known. |
__bdos(acl, 0)
:
What is "the least n
[...] such that accesses to (const char*)ptr + n
and beyond are known to be out of bounds?"
n = malloc-max = offsetof(struct posix_acl, a_entries) + (acl->a_count + 1) * sizeof(struct posix_acl_entry) - 1 = 43
If alloc_size
is known that might be smaller.
__bdos(acl, 2)
:
What is "the greatest n
[...] such that accesses to (const char*)ptr + i
are known to be in bounds, for 0 <= i
< n
?"
n = malloc-min = offsetof(struct posix_acl, a_entries) + acl->a_count * sizeof(struct posix_acl_entry) = 36
If alloc_size
is known that might be bigger.
__bdos(acl->a_entries, 0)
and __bdos(acl->a_entries, 2)
:
Edit: This answer is not correct. Would be correct for type 1 and type 3.
Both should be:
n = acl->a_count * sizeof(struct posix_acl_entry) = 8
(nit: AIUI, GCC considers this to be a bug in the face of Take a look at this: https://godbolt.org/z/b1ev4eP9G
This shows how GCC will adjust the |
You can also lie to gcc if you do // gcc and clang: 40
void local_storage_lies_fam(void)
{
struct foo local = { .counter = 10 };
struct foo *p = &local;
emit_length(__builtin_dynamic_object_size(p->array, 1));
} I did some more digging and at least the comments in the tests from gcc seem to confirm the difference in behavior between looking at Whole struct only uses the size from a known call to malloc, while bdos on FAM uses the counted_by attribute. p->foo = index + SIZE_BUMP;
/* When checking the observed access p->array, we have info on both
observered allocation and observed access,
A.1 from observed allocation:
allocated_size - offsetof (struct annotated, array[0])
A.2 from the counted-by attribute:
p->foo * sizeof (char)
We always use the latest value that is hold by the counted-by field.
*/
EXPECT(__builtin_dynamic_object_size(p->array, 0),
(p->foo) * sizeof(char));
EXPECT(__builtin_dynamic_object_size(p->array, 1),
(p->foo) * sizeof(char));
EXPECT(__builtin_dynamic_object_size(p->array, 2),
(p->foo) * sizeof(char));
EXPECT(__builtin_dynamic_object_size(p->array, 3),
(p->foo) * sizeof(char));
/* When checking the pointer p, we only have info on the observed
allocation. So, the object size info can only been obtained from
the call to malloc. */
EXPECT(__builtin_dynamic_object_size(p, 0), allocated_size);
EXPECT(__builtin_dynamic_object_size(p, 1), allocated_size);
EXPECT(__builtin_dynamic_object_size(p, 2), allocated_size);
EXPECT(__builtin_dynamic_object_size(p, 3), allocated_size); clang on the other hand will always do it's counted_by calculation if the attribute is there. Both for bdos on the FAM, as well as bdos on the whole struct. |
Also see these comments in the main function of the same test file: int main ()
{
struct annotated *p, *q;
p = alloc_buf_more (10);
q = alloc_buf_less (10);
/* When checking the access p->array, we only have info on the counted-by
value. */
EXPECT(__builtin_dynamic_object_size(p->array, 0), p->foo * sizeof(char));
EXPECT(__builtin_dynamic_object_size(p->array, 1), p->foo * sizeof(char));
EXPECT(__builtin_dynamic_object_size(p->array, 2), p->foo * sizeof(char));
EXPECT(__builtin_dynamic_object_size(p->array, 3), p->foo * sizeof(char));
/* When checking the pointer p, we have no observed allocation nor observed
access, therefore, we cannot determine the size info here. */
EXPECT(__builtin_dynamic_object_size(p, 0), -1);
EXPECT(__builtin_dynamic_object_size(p, 1), -1);
EXPECT(__builtin_dynamic_object_size(p, 2), 0);
EXPECT(__builtin_dynamic_object_size(p, 3), 0);
/* When checking the access p->array, we only have info on the counted-by
value. */
EXPECT(__builtin_dynamic_object_size(q->array, 0), q->foo * sizeof(char));
EXPECT(__builtin_dynamic_object_size(q->array, 1), q->foo * sizeof(char));
EXPECT(__builtin_dynamic_object_size(q->array, 2), q->foo * sizeof(char));
EXPECT(__builtin_dynamic_object_size(q->array, 3), q->foo * sizeof(char));
/* When checking the pointer p, we have no observed allocation nor observed
access, therefore, we cannot determine the size info here. */
EXPECT(__builtin_dynamic_object_size(q, 0), -1);
EXPECT(__builtin_dynamic_object_size(q, 1), -1);
EXPECT(__builtin_dynamic_object_size(q, 2), 0);
EXPECT(__builtin_dynamic_object_size(q, 3), 0);
DONE ();
} |
I think this is because GCC hasn't (yet) modified their version of |
This just turns off |
[snip]
For what it's worth, GCC's return value isn't wrong, according to them. They want to return something when |
Changing the
Additionally I think the kernel's code is not unreasonable as this is a common way to allocate structs with flexible array members. It's even how the example in the C11 standard does it. So it's likely that other projects are also expecting the same behavior. I think it would be way easier to get clang to follow what the kernel currently expects. While I think clang's current behavior is not quite correct and the maximum size should be calculated differently [3], that doesn't solve the compatibility with the linux kernel for all cases. So ideally we should introduce the behavior that this PR calls for:
via an option. I see a few ways this could be accomplished:
I prefer option 2. Should this be coordinated with gcc? Currently they don't implement I'm happy to adjust this PR to hide it behind an option. [1] https://github.com/torvalds/linux/blob/b983b271662bd6104d429b0fd97af3333ba760bf/include/linux/overflow.h#L354-L373 |
Fixes: #111009
Change the behavior of __bdos to be in-line with gcc by changing the behvaior from:
to: