Skip to content

[-Wunsafe-buffer-usage][BoundsSafety] support __null_terminated in the interop mode #10060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 23, 2025
Merged
6 changes: 4 additions & 2 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -13198,8 +13198,10 @@ def warn_bounds_safety_incompatible_non_terminated_by_to_terminated_by : Warning
"sending type to parameter of incompatible type}0,1|"
"%diff{casting $ to incompatible type $|"
"casting type to incompatible type}0,1|"
"}2 is an unsafe operation; use '%select{__unsafe_terminated_by_from_indexable|__unsafe_null_terminated_from_indexable}3()' "
"or '%select{__unsafe_forge_terminated_by|__unsafe_forge_null_terminated}3()' to perform this conversion">, InGroup<BoundsSafetyStrictTerminatedByCast>, DefaultError;
"}2 is an unsafe operation"
"%select{; use '__unsafe_terminated_by_from_indexable()' or '__unsafe_forge_terminated_by()' to perform this conversion|"
"; use '__unsafe_null_terminated_from_indexable()' or '__unsafe_forge_null_terminated()' to perform this conversion|"
"}3">, InGroup<BoundsSafetyStrictTerminatedByCast>, DefaultError;
def err_bounds_safety_incompatible_non_terminated_by_to_terminated_by : Error<warn_bounds_safety_incompatible_non_terminated_by_to_terminated_by.Summary>;
def err_bounds_safety_incompatible_terminated_by_to_non_terminated_by_mismatch : Error<
"%select{"
Expand Down
7 changes: 7 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -1560,6 +1560,13 @@ class Sema final : public SemaBase {
/// be controlled with \#pragma clang abi_ptr_attr set(*ATTR*). Defaults to
/// __single in user code and __unsafe_indexable in system headers.
BoundsSafetyPointerAttributes CurPointerAbi;

/// \return true iff `-Wunsafe-buffer-usage` is enabled for `Loc` and Bounds
/// Safety attribute-only mode is on.
bool isCXXSafeBuffersBoundsSafetyInteropEnabledAt(SourceLocation Loc) const {
return LangOpts.CPlusPlus && LangOpts.isBoundsSafetyAttributeOnlyMode() &&
!Diags.isIgnored(diag::warn_unsafe_buffer_operation, Loc);
}
/* TO_UPSTREAM(BoundsSafety) OFF */

/// pragma clang section kind
Expand Down
15 changes: 11 additions & 4 deletions clang/lib/Sema/SemaCast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,9 @@ namespace {
/// int *__bidi_indexable p1;
/// int *__indexable p2 = (int *__indexable)p1;
/// \endcode
if (Self.getLangOpts().BoundsSafety) {
if (Self.getLangOpts().BoundsSafety ||
Self.isCXXSafeBuffersBoundsSafetyInteropEnabledAt(
SrcExpr.get()->getBeginLoc())) {
unsigned DiagKind = 0;
bool isInvalid = false;
// The type error may be nested, so any pointer can result in VT errors

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to assert that Self.getLangOpts().BoundsSafety || ConvertTy == Sema::Compatible || (ConvertTy == Sema::IncompatibleNonValueTerminatedToValueTerminatedPointer && InCXXBoundsSafetyMode)? Especially since CheckValueTerminatedAssignmentConstraints used to only run under BoundsSafety, but now it runs under BoundsSafetyAttributes even for C and in C++ without safe buffers mode. We really don't want it to accidentally start returning errors in these modes. Otherwise LGTM

Copy link
Author

@ziqingluo-90 ziqingluo-90 Feb 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point. But maybe the following is enough?
Self.getLangOpts().BoundsSafety || Self.isCXXSafeBuffersBoundsSafetyInteropEnabledAt(SrcExpr.get()->getBeginLoc()) ,
where isCXXSafeBuffersBoundsSafetyInteropEnabledAt is defined by

  bool isCXXSafeBuffersBoundsSafetyInteropEnabledAt(SourceLocation Loc) const {
    return LangOpts.CPlusPlus && LangOpts.isBoundsSafetyAttributeOnlyMode() &&
           isCXXSafeBuffersEnabledAt(Loc);
  }

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That works for me 👍

Expand Down Expand Up @@ -238,7 +240,9 @@ namespace {
const auto *DstPointerType = DestType->getAs<ValueTerminatedType>();
IsDstNullTerm =
DstPointerType->getTerminatorValue(Self.Context).isZero();
if (Self.getLangOpts().BoundsSafetyRelaxedSystemHeaders) {
if (Self.getLangOpts().BoundsSafetyRelaxedSystemHeaders ||
Self.isCXXSafeBuffersBoundsSafetyInteropEnabledAt(
OpRange.getBegin())) {
DiagKind = diag::
warn_bounds_safety_incompatible_non_terminated_by_to_terminated_by;
isInvalid = false;
Expand Down Expand Up @@ -283,8 +287,11 @@ namespace {
warn_bounds_safety_incompatible_non_terminated_by_to_terminated_by) {
auto FDiag = Self.Diag(OpRange.getBegin(), DiagKind)
<< SrcType << DestType << AssignmentAction::Casting
<< (IsDstNullTerm ? /*null_terminated*/ 1
: /*terminated_by*/ 0);
<< (!Self.isCXXSafeBuffersBoundsSafetyInteropEnabledAt(
OpRange.getBegin())
? (IsDstNullTerm ? /*null_terminated*/ 1
: /*terminated_by*/ 0)
: 2 /* cut message irrelevant to that mode*/);
} else {
Self.Diag(OpRange.getBegin(), DiagKind)
<< SrcType << DestType << AssignmentAction::Casting;
Expand Down
43 changes: 37 additions & 6 deletions clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12186,13 +12186,39 @@ Sema::CheckValueTerminatedAssignmentConstraints(QualType LHSType,
Context.hasSameUnqualifiedType(
Context.getCorrespondingSignedType(LPointee),
Context.getCorrespondingSignedType(RPointee)));
if (CompatiblePointees &&
if (CompatiblePointees && RHSExpr->isPRValue() &&
RHSExpr->tryEvaluateTerminatorElement(Evald, Context) &&
Evald.Val.isInt() &&
llvm::APSInt::isSameValue(LVTT->getTerminatorValue(Context),
Evald.Val.getInt())) {
return Sema::Compatible;
}
// If in C++ Safe Buffers/Bounds Safety interoperation mode, the RHS can
// be 'std::string::c_str/data':
bool isNullTerm = LVTT->getTerminatorValue(Context).isZero();

if (isNullTerm && isCXXSafeBuffersBoundsSafetyInteropEnabledAt(
RHSExpr->getBeginLoc())) {
if (const auto *MCE =
dyn_cast<CXXMemberCallExpr>(RHSExpr->IgnoreParenImpCasts())) {
IdentifierInfo *MethodDeclII = nullptr, *ObjTypeII = nullptr;

if (MCE->getMethodDecl())
MethodDeclII = MCE->getMethodDecl()->getIdentifier();
if (const auto *RecordDecl =
MCE->getObjectType()->getAsRecordDecl();
RecordDecl && RecordDecl->isInStdNamespace())
// If not in std namespace, let `ObjTypeII` be null so that the
// match fails:
ObjTypeII = RecordDecl->getIdentifier();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there a reason we support std::string::c_str() but not std::string::data()?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::string::data() is not null-termination guaranteed as I remember.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://en.cppreference.com/w/cpp/string/basic_string/c_str
c_str() and data() perform the same function. (since C++11)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, it's changed since C++11.

if (MethodDeclII && ObjTypeII &&
(MethodDeclII->getName() == "c_str" ||

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work if LHS is terminated by a value different than 0, e.g. const char *__terminated_by(42)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Need to check it is 0-terminator.

// std::string::data is also null-terminated since C++11:
MethodDeclII->getName() == "data") &&
ObjTypeII->getName() == "basic_string")
return Sema::Compatible;
}
}
}
return Nested
? Sema::
Expand Down Expand Up @@ -20783,9 +20809,11 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
const auto *DstPointerType = DstType->getAs<ValueTerminatedType>();
IsDstNullTerm =
DstPointerType->getTerminatorValue(getASTContext()).isZero();
if (getLangOpts().BoundsSafetyRelaxedSystemHeaders) {
DiagKind =
diag::warn_bounds_safety_incompatible_non_terminated_by_to_terminated_by;

if (getLangOpts().BoundsSafetyRelaxedSystemHeaders ||
isCXXSafeBuffersBoundsSafetyInteropEnabledAt(Loc)) {
DiagKind = diag::
warn_bounds_safety_incompatible_non_terminated_by_to_terminated_by;
isInvalid = false;
} else {
DiagKind =
Expand Down Expand Up @@ -21061,8 +21089,11 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
diag::
warn_bounds_safety_incompatible_non_terminated_by_to_terminated_by) {
FDiag << FirstType << SecondType << ActionForDiag
<< SrcExpr->getSourceRange() <<
(IsDstNullTerm ? /*null_terminated*/ 1 : /*terminated_by*/ 0);
<< SrcExpr->getSourceRange()
<< (!isCXXSafeBuffersBoundsSafetyInteropEnabledAt(Loc)
? (IsDstNullTerm ? /*null_terminated*/ 1
: /*terminated_by*/ 0)
: 2 /* cut message irrelevant to that mode*/);
} else {
FDiag << FirstType << SecondType << ActionForDiag
<< SrcExpr->getSourceRange();
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Sema/SemaExprCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4225,6 +4225,15 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
const ImplicitConversionSequence &ICS,
AssignmentAction Action,
CheckedConversionKind CCK) {
if (ToType->isPointerType() &&
isCXXSafeBuffersBoundsSafetyInteropEnabledAt(From->getBeginLoc())) {
// In C++ Safe Buffers/Bounds Safety interop mode, warn about implicit
// conversions from non-`__null_terminated` to `__null_terminated`:
AssignConvertType ConvTy =
CheckValueTerminatedAssignmentConstraints(ToType, From);
DiagnoseAssignmentResult(ConvTy, From->getExprLoc(), ToType,
From->getType(), From, Action);
}
// C++ [over.match.oper]p7: [...] operands of class type are converted [...]
if (CCK == CheckedConversionKind::ForBuiltinOverloadedOp &&
!From->getType()->isRecordType())
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9046,7 +9046,8 @@ ExprResult InitializationSequence::Perform(Sema &S,
Entity.getKind() == InitializedEntity::EK_Result);

/* TO_UPSTREAM(BoundsSafety) ON*/
if (S.LangOpts.BoundsSafety) {
if (S.LangOpts.BoundsSafety || S.isCXXSafeBuffersBoundsSafetyInteropEnabledAt(
CurInit.get()->getBeginLoc())) {
auto *Init = CurInit.get();
const auto K = Entity.getKind();
if (Init && Entity.getParent() == nullptr &&
Expand Down
205 changes: 205 additions & 0 deletions clang/test/SemaCXX/warn-unsafe-buffer-usage-null-terminated.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
// RUN: %clang_cc1 -fsyntax-only -std=c++20 -Wno-all -Wunsafe-buffer-usage -fsafe-buffer-usage-suggestions -fexperimental-bounds-safety-attributes -verify %s

#include <ptrcheck.h>

typedef unsigned size_t;
namespace std {
template <typename CharT>
struct basic_string {
const CharT *data() const noexcept;
CharT *data() noexcept;
const CharT *c_str() const noexcept;
size_t size() const noexcept;
size_t length() const noexcept;
};

typedef basic_string<char> string;

template <typename CharT>
struct basic_string_view {
basic_string_view(basic_string<CharT> str);
const CharT *data() const noexcept;
size_t size() const noexcept;
size_t length() const noexcept;
};

typedef basic_string_view<char> string_view;
} // namespace std

void nt_parm(const char * __null_terminated);
const char * __null_terminated get_nt(const char *, size_t);

void basics(const char * cstr, size_t cstr_len, std::string cxxstr) {
const char * __null_terminated p = "hello"; // safe init

nt_parm(p);
nt_parm(get_nt(cstr, cstr_len));

const char * __null_terminated p2 = cxxstr.c_str(); // safe init
const char * __null_terminated p3;

p3 = cxxstr.c_str(); // safe assignment
p3 = "hello"; // safe assignment
p3 = p; // safe assignment
p3 = get_nt(cstr, cstr_len); // safe assignment

// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'const char *' is an unsafe operation}}
const char * __null_terminated p4 = cstr; // warn

// expected-error@+1 {{passing 'const char *' to parameter of incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
nt_parm(cstr); // warn
// expected-error@+1 {{assigning to 'const char * __terminated_by(0)' (aka 'const char *') from incompatible type 'const char *' is an unsafe operation}}
p4 = cstr; // warn

std::string_view view{cxxstr};

// expected-error@+1 {{assigning to 'const char * __terminated_by(0)' (aka 'const char *') from incompatible type 'const char *' is an unsafe operation}}
p4 = view.data(); // warn
// expected-error@+1 {{passing 'const char *' to parameter of incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
nt_parm(view.data()); // warn

const char * __null_terminated p5 = 0; // nullptr is ok
// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'const char *' is an unsafe operation}}
const char * __null_terminated p6 = (const char *)1; // other integer literal is unsafe

// (non-0)-terminated pointer is NOT compatible with 'std::string::c_str':
// expected-error@+1 {{initializing 'const char * __terminated_by(42)' (aka 'const char *') with an expression of incompatible type 'const char *' is an unsafe operation}}
const char * __terminated_by(42) p7 = cxxstr.c_str();
}

void test_explicit_cast(char * p, const char * q) {
// expected-error@+1 {{casting 'char *' to incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
const char * __null_terminated nt = (const char * __null_terminated) p;
const char * __null_terminated nt2 = reinterpret_cast<const char * __null_terminated> (p); // FP for now

// expected-error@+1 {{casting 'char *' to incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
nt = (const char * __null_terminated) p;
nt2 = reinterpret_cast<const char * __null_terminated> (p); // FP for now
// expected-error@+1 {{casting 'char *' to incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
nt2 = static_cast<const char * __null_terminated> (p); // FP for now

// expected-error@+1 {{casting 'const char *' to incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
const char * __null_terminated nt3 = (const char * __null_terminated) q;
const char * __null_terminated nt4 = reinterpret_cast<const char * __null_terminated> (q); // FP for now

// expected-error@+1 {{casting 'const char *' to incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
nt3 = (const char * __null_terminated) q;
nt4 = reinterpret_cast<const char * __null_terminated> (q); // FP for now
// expected-error@+1 {{casting 'const char *' to incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
nt4 = static_cast<const char * __null_terminated> (q);

// OK cases for C-style casts
char * __null_terminated nt_p;
const char * __null_terminated nt_p2;
int * __null_terminated nt_p3;
const int * __null_terminated nt_p4;

const char * __null_terminated nt5 = (const char * __null_terminated) nt_p;
const char * __null_terminated nt6 = (const char * __null_terminated) nt_p2;
const char * __null_terminated nt7 = (const char * __null_terminated) nt_p3;
const char * __null_terminated nt8 = (const char * __null_terminated) nt_p4;

nt5 = (const char * __null_terminated) nt_p;
nt6 = (const char * __null_terminated) nt_p2;
nt7 = (const char * __null_terminated) nt_p3;
nt8 = (const char * __null_terminated) nt_p4;


char * __null_terminated nt9 = (char * __null_terminated) nt_p;
char * __null_terminated nt10 = (char * __null_terminated) nt_p2;
char * __null_terminated nt11 = (char * __null_terminated) nt_p3;
char * __null_terminated nt12 = (char * __null_terminated) nt_p4;

nt9 = (char * __null_terminated) nt_p;
nt10 = (char * __null_terminated) nt_p2;
nt11 = (char * __null_terminated) nt_p3;
nt12 = (char * __null_terminated) nt_p4;
}

const char * __null_terminated test_return(const char * p, char * q, std::string &str) {
if (p)
return p; // expected-error {{returning 'const char *' from a function with incompatible result type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
if (q)
return q; // expected-error {{returning 'char *' from a function with incompatible result type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
return str.c_str();
}

void test_array(char * cstr) {
const char arr[__null_terminated 3] = {'h', 'i', '\0'};
// expected-error@+1 {{array 'arr2' with '__terminated_by' attribute is initialized with an incorrect terminator (expected: 0; got 'i')}}
const char arr2[__null_terminated 2] = {'h', 'i'};
const char * __null_terminated arr3[] = {"hello", "world"};
// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
const char * __null_terminated arr4[] = {"hello", "world", cstr};

// expected-error@+1 {{assigning to 'const char * __terminated_by(0)' (aka 'const char *') from incompatible type 'char *' is an unsafe operation}}
arr3[0] = cstr;
}

struct T {
int a;
const char * __null_terminated p;
struct TT {
int a;
const char * __null_terminated p;
} tt;
};
void test_compound(char * cstr) {
std::string str;
T t = {42, "hello"};
T t2 = {.a = 42};
T t3 = {.p = str.c_str()};
// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
T t4 = {42, "hello", {.p = cstr}};

// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
t4 = (struct T){42, "hello", {.p = cstr}};
}

// expected-error@+3 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
class C {
const char * __null_terminated p;
const char * __null_terminated q = (char *) 1; // warn
struct T t;
public:
// expected-error@+1 2{{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
C(char * p): p(p), t({0, p}) {};
C(const char * __null_terminated p, struct T t);
};

void f(const C &c);
C test_class(char * cstr) {
// expected-error@+2 {{passing 'char *' to parameter of incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
C c{cstr, {0, cstr}};
// expected-error@+2 {{passing 'char *' to parameter of incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
C c1(cstr, {0, cstr});
// expected-error@+2 {{passing 'char *' to parameter of incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
C *c2 = new C(cstr, {0, cstr});
// expected-error@+2 {{passing 'char *' to parameter of incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
// expected-error@+1 {{initializing 'const char * __terminated_by(0)' (aka 'const char *') with an expression of incompatible type 'char *' is an unsafe operation}}
f(C{cstr, {0, cstr}});

C("hello", {0, "hello"});
if (1-1)
return {cstr, {}}; // expected-error {{passing 'char *' to parameter of incompatible type 'const char * __terminated_by(0)' (aka 'const char *') is an unsafe operation}}
return {"hello", {}};
}


// Test input/output __null_terminated parameter.
// expected-note@+1 {{candidate function not viable:}}
void g(const char * __null_terminated *p);
void test_output(const char * __null_terminated p) {
const char * __null_terminated local_nt = p;
const char * const_local;
char * local;

g(&local_nt); // safe
// expected-error@+1 {{passing 'const char **' to parameter of incompatible type 'const char * __terminated_by(0)*' (aka 'const char **') that adds '__terminated_by' attribute is not allowed}}
g(&const_local);
// expected-error@+1 {{no matching function for call to 'g'}}
g(&local);
}