Skip to content

Commit 9903c19

Browse files
authored
[-Wunterminated-string-initialization] Handle C string literals ending with explicit '\0' (#143487)
In C, a char array needs no "nonstring" attribute, if its initializer is a string literal that 1) explicitly ends with '\0' and 2) fits in the array after a possible truncation. For example `char a[4] = "ABC\0"; // fine, needs no "nonstring" attr` rdar://152506883
1 parent 1b914e1 commit 9903c19

File tree

3 files changed

+81
-22
lines changed

3 files changed

+81
-22
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ C Language Changes
222222
223223
char buf1[3] = "foo"; // -Wunterminated-string-initialization
224224
char buf2[3] = "flarp"; // -Wexcess-initializers
225+
char buf3[3] = "fo\0"; // This is fine, no warning.
225226
226227
This diagnostic can be suppressed by adding the new ``nonstring`` attribute
227228
to the field or variable being initialized. #GH137705

clang/lib/Sema/SemaInit.cpp

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -260,29 +260,37 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
260260
diag::ext_initializer_string_for_char_array_too_long)
261261
<< Str->getSourceRange();
262262
else if (StrLength - 1 == ArrayLen) {
263-
// If the entity being initialized has the nonstring attribute, then
264-
// silence the "missing nonstring" diagnostic. If there's no entity,
265-
// check whether we're initializing an array of arrays; if so, walk the
266-
// parents to find an entity.
267-
auto FindCorrectEntity =
268-
[](const InitializedEntity *Entity) -> const ValueDecl * {
269-
while (Entity) {
270-
if (const ValueDecl *VD = Entity->getDecl())
271-
return VD;
272-
if (!Entity->getType()->isArrayType())
273-
return nullptr;
274-
Entity = Entity->getParent();
275-
}
276-
277-
return nullptr;
278-
};
279-
if (const ValueDecl *D = FindCorrectEntity(&Entity);
280-
!D || !D->hasAttr<NonStringAttr>())
281-
S.Diag(
282-
Str->getBeginLoc(),
283-
diag::warn_initializer_string_for_char_array_too_long_no_nonstring)
284-
<< ArrayLen << StrLength << Str->getSourceRange();
263+
// In C, if the string literal is null-terminated explicitly, e.g., `char
264+
// a[4] = "ABC\0"`, there should be no warning:
265+
const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens());
266+
bool IsSLSafe = SL && SL->getLength() > 0 &&
267+
SL->getCodeUnit(SL->getLength() - 1) == 0;
268+
269+
if (!IsSLSafe) {
270+
// If the entity being initialized has the nonstring attribute, then
271+
// silence the "missing nonstring" diagnostic. If there's no entity,
272+
// check whether we're initializing an array of arrays; if so, walk the
273+
// parents to find an entity.
274+
auto FindCorrectEntity =
275+
[](const InitializedEntity *Entity) -> const ValueDecl * {
276+
while (Entity) {
277+
if (const ValueDecl *VD = Entity->getDecl())
278+
return VD;
279+
if (!Entity->getType()->isArrayType())
280+
return nullptr;
281+
Entity = Entity->getParent();
282+
}
285283

284+
return nullptr;
285+
};
286+
if (const ValueDecl *D = FindCorrectEntity(&Entity);
287+
!D || !D->hasAttr<NonStringAttr>())
288+
S.Diag(
289+
Str->getBeginLoc(),
290+
diag::
291+
warn_initializer_string_for_char_array_too_long_no_nonstring)
292+
<< ArrayLen << StrLength << Str->getSourceRange();
293+
}
286294
// Always emit the C++ compatibility diagnostic.
287295
S.Diag(Str->getBeginLoc(),
288296
diag::warn_initializer_string_for_char_array_too_long_for_cpp)

clang/test/Sema/attr-nonstring_safe.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// RUN: %clang_cc1 -fsyntax-only -verify=compat,expected -Wunterminated-string-initialization %s -x c
2+
// RUN: %clang_cc1 -fsyntax-only -verify=cxx -Wunterminated-string-initialization %s -x c++
3+
4+
#ifndef __cplusplus
5+
typedef unsigned short char16_t;
6+
typedef unsigned int char32_t;
7+
typedef __WCHAR_TYPE__ wchar_t;
8+
#endif
9+
10+
// C++ is stricter so the following cases should be warned about. In
11+
// C, the following examples are fine.
12+
13+
char foo3[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
14+
char foo1[1] = "\0"; // cxx-error {{initializer-string for char array is too long, array size is 1 but initializer has size 2 (including the null terminating character)}}
15+
16+
struct S {
17+
char buf[3];
18+
char fub[3];
19+
} s = { "ba\0", "bo\0" }; // cxx-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}}
20+
21+
#pragma clang diagnostic push
22+
#pragma clang diagnostic warning "-Wc++-compat"
23+
// Test different encodings:
24+
signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
25+
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}}
26+
unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
27+
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}}
28+
wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
29+
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
30+
compat-warning {{identifier 'wchar_t' conflicts with a C++ keyword}}
31+
char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
32+
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
33+
compat-warning {{identifier 'char16_t' conflicts with a C++ keyword}}
34+
char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} \
35+
compat-warning {{initializer-string for character array is too long for C++, array size is 3 but initializer has size 4 (including the null terminating character)}} \
36+
compat-warning {{identifier 'char32_t' conflicts with a C++ keyword}}
37+
#pragma clang diagnostic pop
38+
39+
// Test list initializer:
40+
signed char scfoo_lst[3] = {'f', 'o', '\0'};
41+
unsigned char ucfoo_lst[3] = {'f', 'o', '\0'};
42+
wchar_t wcfoo_lst[3] = {L'f', L'o', L'\0'};
43+
char16_t c16foo_lst[3] = {u'f', u'o', u'\0'};
44+
char32_t c32foo_lst[3] = {U'f', U'o', U'\0'};
45+
46+
// Declaring an array of size 0 is invalid by C standard but compilers
47+
// may allow it:
48+
char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} \
49+
cxx-error {{initializer-string for char array is too long, array size is 0 but initializer has size 1 (including the null terminating character)}}
50+
char b[1] = ""; // no warn

0 commit comments

Comments
 (0)