Skip to content

[-Wunterminated-string-initialization] Handle C string literals ending with explicit '\0' #143487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ C Language Changes

char buf1[3] = "foo"; // -Wunterminated-string-initialization
char buf2[3] = "flarp"; // -Wexcess-initializers
char buf3[3] = "fo\0"; // This is fine, no warning.

This diagnostic can be suppressed by adding the new ``nonstring`` attribute
to the field or variable being initialized. #GH137705
Expand Down
52 changes: 30 additions & 22 deletions clang/lib/Sema/SemaInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,29 +260,37 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
diag::ext_initializer_string_for_char_array_too_long)
<< Str->getSourceRange();
else if (StrLength - 1 == ArrayLen) {
// If the entity being initialized has the nonstring attribute, then
// silence the "missing nonstring" diagnostic. If there's no entity,
// check whether we're initializing an array of arrays; if so, walk the
// parents to find an entity.
auto FindCorrectEntity =
[](const InitializedEntity *Entity) -> const ValueDecl * {
while (Entity) {
if (const ValueDecl *VD = Entity->getDecl())
return VD;
if (!Entity->getType()->isArrayType())
return nullptr;
Entity = Entity->getParent();
}

return nullptr;
};
if (const ValueDecl *D = FindCorrectEntity(&Entity);
!D || !D->hasAttr<NonStringAttr>())
S.Diag(
Str->getBeginLoc(),
diag::warn_initializer_string_for_char_array_too_long_no_nonstring)
<< ArrayLen << StrLength << Str->getSourceRange();
// In C, if the string literal is null-terminated explicitly, e.g., `char
// a[4] = "ABC\0"`, there should be no warning:
const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens());
bool IsSLSafe = SL && SL->getLength() > 0 &&
SL->getCodeUnit(SL->getLength() - 1) == 0;

if (!IsSLSafe) {
// If the entity being initialized has the nonstring attribute, then
// silence the "missing nonstring" diagnostic. If there's no entity,
// check whether we're initializing an array of arrays; if so, walk the
// parents to find an entity.
auto FindCorrectEntity =
[](const InitializedEntity *Entity) -> const ValueDecl * {
while (Entity) {
if (const ValueDecl *VD = Entity->getDecl())
return VD;
if (!Entity->getType()->isArrayType())
return nullptr;
Entity = Entity->getParent();
}

return nullptr;
};
if (const ValueDecl *D = FindCorrectEntity(&Entity);
!D || !D->hasAttr<NonStringAttr>())
S.Diag(
Str->getBeginLoc(),
diag::
warn_initializer_string_for_char_array_too_long_no_nonstring)
<< ArrayLen << StrLength << Str->getSourceRange();
}
// Always emit the C++ compatibility diagnostic.
S.Diag(Str->getBeginLoc(),
diag::warn_initializer_string_for_char_array_too_long_for_cpp)
Expand Down
50 changes: 50 additions & 0 deletions clang/test/Sema/attr-nonstring_safe.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// RUN: %clang_cc1 -fsyntax-only -verify=compat,expected -Wunterminated-string-initialization %s -x c
// RUN: %clang_cc1 -fsyntax-only -verify=cxx -Wunterminated-string-initialization %s -x c++

#ifndef __cplusplus
typedef unsigned short char16_t;
typedef unsigned int char32_t;
typedef __WCHAR_TYPE__ wchar_t;
#endif

// C++ is stricter so the following cases should be warned about. In
// C, the following examples are fine.

char foo3[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
char foo1[1] = "\0"; // cxx-error {{initializer-string for char array is too long, array size is 1 but initializer has size 2 (including the null terminating character)}}

struct S {
char buf[3];
char fub[3];
} s = { "ba\0", "bo\0" }; // cxx-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}

#pragma clang diagnostic push
#pragma clang diagnostic warning "-Wc++-compat"
// Test different encodings:
signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}}
unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}}
wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
compat-warning {{identifier 'wchar_t' conflicts with a C++ keyword}}
char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
compat-warning {{identifier 'char16_t' conflicts with a C++ keyword}}
char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
compat-warning {{identifier 'char32_t' conflicts with a C++ keyword}}
#pragma clang diagnostic pop

// Test list initializer:
signed char scfoo_lst[3] = {'f', 'o', '\0'};
unsigned char ucfoo_lst[3] = {'f', 'o', '\0'};
wchar_t wcfoo_lst[3] = {L'f', L'o', L'\0'};
char16_t c16foo_lst[3] = {u'f', u'o', u'\0'};
char32_t c32foo_lst[3] = {U'f', U'o', U'\0'};

// Declaring an array of size 0 is invalid by C standard but compilers
// may allow it:
char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \
cxx-error {{initializer-string for char array is too long, array size is 0 but initializer has size 1 (including the null terminating character)}}
char b[1] = ""; // no warn
Loading