Skip to content

Commit 7a086e1

Browse files
authored
[clang][CodeGen] Zero init unspecified fields in initializers in C (#97121)
When an initializer is provided to a variable, the Linux kernel relied on the compiler to zero-initialize unspecified fields, as clarified in https://www.spinics.net/lists/netdev/msg1007244.html. But clang doesn't guarantee this: 1. For a union type, if an empty initializer is given, clang only initializes bytes for the first field, left bytes for other (larger) fields are marked as undef. Accessing those undef bytes can lead to undefined behaviors. 2. For a union type, if an initializer explicitly sets a field, left bytes for other (larger) fields are marked as undef. 3. When an initializer is given, clang doesn't zero initialize padding. So this patch makes the following change: 1. In C, when an initializer is provided for a variable, zero-initialize undef and padding fields in the initializer. 2. Document the change in LanguageExtensions.rst. As suggested in #78034 (comment), the change isn't required by C23, but it's standards conforming to do so. Fixes: #97459
1 parent 0a42c7c commit 7a086e1

23 files changed

+658
-96
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5860,3 +5860,26 @@ specify the starting offset to begin embedding from. The resources is treated
58605860
as being empty if the specified offset is larger than the number of bytes in
58615861
the resource. The offset will be applied *before* any ``limit`` parameters are
58625862
applied.
5863+
5864+
Union and aggregate initialization in C
5865+
=======================================
5866+
5867+
In C23 (N2900), when an object is initialized from initializer ``= {}``, all
5868+
elements of arrays, all members of structs, and the first members of unions are
5869+
empty-initialized recursively. In addition, all padding bits are initialized to
5870+
zero.
5871+
5872+
Clang guarantees the following behaviors:
5873+
5874+
* ``1:`` Clang supports initializer ``= {}`` mentioned above in all C
5875+
standards.
5876+
5877+
* ``2:`` When unions are initialized from initializer ``= {}``, bytes outside
5878+
of the first members of unions are also initialized to zero.
5879+
5880+
* ``3:`` When unions, structures and arrays are initialized from initializer
5881+
``= { initializer-list }``, all members not explicitly initialized in
5882+
the initializer list are empty-initialized recursively. In addition, all
5883+
padding bits are initialized to zero.
5884+
5885+
Currently, the above extension only applies to C source code, not C++.

clang/lib/CodeGen/CGExprAgg.cpp

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,6 +1698,17 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
16981698
// Prepare a 'this' for CXXDefaultInitExprs.
16991699
CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());
17001700

1701+
const bool ZeroInitPadding =
1702+
CGF.CGM.shouldZeroInitPadding() && !Dest.isZeroed();
1703+
const Address BaseLoc = Dest.getAddress().withElementType(CGF.Int8Ty);
1704+
auto DoZeroInitPadding = [&](CharUnits Offset, CharUnits Size) {
1705+
if (Size.isPositive()) {
1706+
Address Loc = CGF.Builder.CreateConstGEP(BaseLoc, Offset.getQuantity());
1707+
llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity());
1708+
CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false);
1709+
}
1710+
};
1711+
17011712
if (record->isUnion()) {
17021713
// Only initialize one field of a union. The field itself is
17031714
// specified by the initializer list.
@@ -1722,17 +1733,37 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17221733
if (NumInitElements) {
17231734
// Store the initializer into the field
17241735
EmitInitializationToLValue(InitExprs[0], FieldLoc);
1736+
if (ZeroInitPadding) {
1737+
CharUnits TotalSize =
1738+
Dest.getPreferredSize(CGF.getContext(), DestLV.getType());
1739+
CharUnits FieldSize =
1740+
CGF.getContext().getTypeSizeInChars(FieldLoc.getType());
1741+
DoZeroInitPadding(FieldSize, TotalSize - FieldSize);
1742+
}
17251743
} else {
17261744
// Default-initialize to null.
1727-
EmitNullInitializationToLValue(FieldLoc);
1745+
if (ZeroInitPadding)
1746+
EmitNullInitializationToLValue(DestLV);
1747+
else
1748+
EmitNullInitializationToLValue(FieldLoc);
17281749
}
1729-
17301750
return;
17311751
}
17321752

17331753
// Here we iterate over the fields; this makes it simpler to both
17341754
// default-initialize fields and skip over unnamed fields.
1755+
const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(record);
1756+
CharUnits SizeSoFar = CharUnits::Zero();
17351757
for (const auto *field : record->fields()) {
1758+
if (ZeroInitPadding) {
1759+
unsigned FieldNo = field->getFieldIndex();
1760+
CharUnits Offset =
1761+
CGF.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo));
1762+
DoZeroInitPadding(SizeSoFar, Offset - SizeSoFar);
1763+
CharUnits FieldSize =
1764+
CGF.getContext().getTypeSizeInChars(field->getType());
1765+
SizeSoFar = Offset + FieldSize;
1766+
}
17361767
// We're done once we hit the flexible array member.
17371768
if (field->getType()->isIncompleteArrayType())
17381769
break;
@@ -1774,6 +1805,11 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17741805
}
17751806
}
17761807
}
1808+
if (ZeroInitPadding) {
1809+
CharUnits TotalSize =
1810+
Dest.getPreferredSize(CGF.getContext(), DestLV.getType());
1811+
DoZeroInitPadding(SizeSoFar, TotalSize - SizeSoFar);
1812+
}
17771813
}
17781814

17791815
void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,

clang/lib/CodeGen/CGExprConstant.cpp

Lines changed: 82 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,16 @@ using namespace CodeGen;
4242
namespace {
4343
class ConstExprEmitter;
4444

45+
llvm::Constant *getPadding(const CodeGenModule &CGM, CharUnits PadSize) {
46+
llvm::Type *Ty = CGM.CharTy;
47+
if (PadSize > CharUnits::One())
48+
Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity());
49+
if (CGM.shouldZeroInitPadding()) {
50+
return llvm::Constant::getNullValue(Ty);
51+
}
52+
return llvm::UndefValue::get(Ty);
53+
}
54+
4555
struct ConstantAggregateBuilderUtils {
4656
CodeGenModule &CGM;
4757

@@ -61,10 +71,7 @@ struct ConstantAggregateBuilderUtils {
6171
}
6272

6373
llvm::Constant *getPadding(CharUnits PadSize) const {
64-
llvm::Type *Ty = CGM.CharTy;
65-
if (PadSize > CharUnits::One())
66-
Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity());
67-
return llvm::UndefValue::get(Ty);
74+
return ::getPadding(CGM, PadSize);
6875
}
6976

7077
llvm::Constant *getZeroes(CharUnits ZeroSize) const {
@@ -591,6 +598,11 @@ class ConstStructBuilder {
591598
bool Build(const InitListExpr *ILE, bool AllowOverwrite);
592599
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
593600
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
601+
bool DoZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo,
602+
const FieldDecl &Field, bool AllowOverwrite,
603+
CharUnits &FieldSize, CharUnits &SizeSoFar);
604+
bool DoZeroInitPadding(const ASTRecordLayout &Layout, bool AllowOverwrite,
605+
CharUnits &SizeSoFar);
594606
llvm::Constant *Finalize(QualType Ty);
595607
};
596608

@@ -715,6 +727,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
715727
if (CXXRD->getNumBases())
716728
return false;
717729

730+
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
731+
CharUnits FieldSize = CharUnits::Zero();
732+
CharUnits SizeSoFar = CharUnits::Zero();
733+
718734
for (FieldDecl *Field : RD->fields()) {
719735
++FieldNo;
720736

@@ -732,8 +748,13 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
732748
const Expr *Init = nullptr;
733749
if (ElementNo < ILE->getNumInits())
734750
Init = ILE->getInit(ElementNo++);
735-
if (isa_and_nonnull<NoInitExpr>(Init))
751+
if (isa_and_nonnull<NoInitExpr>(Init)) {
752+
if (ZeroInitPadding &&
753+
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize,
754+
SizeSoFar))
755+
return false;
736756
continue;
757+
}
737758

738759
// Zero-sized fields are not emitted, but their initializers may still
739760
// prevent emission of this struct as a constant.
@@ -743,6 +764,11 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
743764
continue;
744765
}
745766

767+
if (ZeroInitPadding &&
768+
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize,
769+
SizeSoFar))
770+
return false;
771+
746772
// When emitting a DesignatedInitUpdateExpr, a nested InitListExpr
747773
// represents additional overwriting of our current constant value, and not
748774
// a new constant to emit independently.
@@ -768,6 +794,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
768794
if (!EltInit)
769795
return false;
770796

797+
if (ZeroInitPadding && FieldSize.isZero())
798+
SizeSoFar += CharUnits::fromQuantity(
799+
CGM.getDataLayout().getTypeAllocSize(EltInit->getType()));
800+
771801
if (!Field->isBitField()) {
772802
// Handle non-bitfield members.
773803
if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit,
@@ -785,6 +815,9 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
785815
}
786816
}
787817

818+
if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar))
819+
return false;
820+
788821
return true;
789822
}
790823

@@ -849,6 +882,9 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
849882

850883
unsigned FieldNo = 0;
851884
uint64_t OffsetBits = CGM.getContext().toBits(Offset);
885+
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
886+
CharUnits FieldSize = CharUnits::Zero();
887+
CharUnits SizeSoFar = CharUnits::Zero();
852888

853889
bool AllowOverwrite = false;
854890
for (RecordDecl::field_iterator Field = RD->field_begin(),
@@ -870,6 +906,15 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
870906
if (!EltInit)
871907
return false;
872908

909+
if (ZeroInitPadding) {
910+
if (!DoZeroInitPadding(Layout, FieldNo, **Field, AllowOverwrite,
911+
FieldSize, SizeSoFar))
912+
return false;
913+
if (FieldSize.isZero())
914+
SizeSoFar += CharUnits::fromQuantity(
915+
CGM.getDataLayout().getTypeAllocSize(EltInit->getType()));
916+
}
917+
873918
if (!Field->isBitField()) {
874919
// Handle non-bitfield members.
875920
if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits,
@@ -886,7 +931,35 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
886931
return false;
887932
}
888933
}
934+
if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar))
935+
return false;
936+
937+
return true;
938+
}
889939

940+
bool ConstStructBuilder::DoZeroInitPadding(
941+
const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field,
942+
bool AllowOverwrite, CharUnits &FieldSize, CharUnits &SizeSoFar) {
943+
CharUnits Offset =
944+
CGM.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo));
945+
if (SizeSoFar < Offset)
946+
if (!AppendBytes(SizeSoFar, getPadding(CGM, Offset - SizeSoFar),
947+
AllowOverwrite))
948+
return false;
949+
FieldSize = CGM.getContext().getTypeSizeInChars(Field.getType());
950+
SizeSoFar = Offset + FieldSize;
951+
return true;
952+
}
953+
954+
bool ConstStructBuilder::DoZeroInitPadding(const ASTRecordLayout &Layout,
955+
bool AllowOverwrite,
956+
CharUnits &SizeSoFar) {
957+
CharUnits TotalSize = Layout.getSize();
958+
if (SizeSoFar < TotalSize)
959+
if (!AppendBytes(SizeSoFar, getPadding(CGM, TotalSize - SizeSoFar),
960+
AllowOverwrite))
961+
return false;
962+
SizeSoFar = TotalSize;
890963
return true;
891964
}
892965

@@ -1127,12 +1200,10 @@ class ConstExprEmitter
11271200

11281201
assert(CurSize <= TotalSize && "Union size mismatch!");
11291202
if (unsigned NumPadBytes = TotalSize - CurSize) {
1130-
llvm::Type *Ty = CGM.CharTy;
1131-
if (NumPadBytes > 1)
1132-
Ty = llvm::ArrayType::get(Ty, NumPadBytes);
1133-
1134-
Elts.push_back(llvm::UndefValue::get(Ty));
1135-
Types.push_back(Ty);
1203+
llvm::Constant *Padding =
1204+
getPadding(CGM, CharUnits::fromQuantity(NumPadBytes));
1205+
Elts.push_back(Padding);
1206+
Types.push_back(Padding->getType());
11361207
}
11371208

11381209
llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false);

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,6 +1676,57 @@ class CodeGenModule : public CodeGenTypeCache {
16761676
MustTailCallUndefinedGlobals.insert(Global);
16771677
}
16781678

1679+
bool shouldZeroInitPadding() const {
1680+
// In C23 (N3096) $6.7.10:
1681+
// """
1682+
// If any object is initialized with an empty iniitializer, then it is
1683+
// subject to default initialization:
1684+
// - if it is an aggregate, every member is initialized (recursively)
1685+
// according to these rules, and any padding is initialized to zero bits;
1686+
// - if it is a union, the first named member is initialized (recursively)
1687+
// according to these rules, and any padding is initialized to zero bits.
1688+
//
1689+
// If the aggregate or union contains elements or members that are
1690+
// aggregates or unions, these rules apply recursively to the subaggregates
1691+
// or contained unions.
1692+
//
1693+
// If there are fewer initializers in a brace-enclosed list than there are
1694+
// elements or members of an aggregate, or fewer characters in a string
1695+
// literal used to initialize an array of known size than there are elements
1696+
// in the array, the remainder of the aggregate is subject to default
1697+
// initialization.
1698+
// """
1699+
//
1700+
// From my understanding, the standard is ambiguous in the following two
1701+
// areas:
1702+
// 1. For a union type with empty initializer, if the first named member is
1703+
// not the largest member, then the bytes comes after the first named member
1704+
// but before padding are left unspecified. An example is:
1705+
// union U { int a; long long b;};
1706+
// union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left
1707+
// unspecified.
1708+
//
1709+
// 2. It only mentions padding for empty initializer, but doesn't mention
1710+
// padding for a non empty initialization list. And if the aggregation or
1711+
// union contains elements or members that are aggregates or unions, and
1712+
// some are non empty initializers, while others are empty initiailizers,
1713+
// the padding initialization is unclear. An example is:
1714+
// struct S1 { int a; long long b; };
1715+
// struct S2 { char c; struct S1 s1; };
1716+
// // The values for paddings between s2.c and s2.s1.a, between s2.s1.a
1717+
// and s2.s1.b are unclear.
1718+
// struct S2 s2 = { 'c' };
1719+
//
1720+
// Here we choose to zero initiailize left bytes of a union type. Because
1721+
// projects like the Linux kernel are relying on this behavior. If we don't
1722+
// explicitly zero initialize them, the undef values can be optimized to
1723+
// return gabage data. We also choose to zero initialize paddings for
1724+
// aggregates and unions, no matter they are initialized by empty
1725+
// initializers or non empty initializers. This can provide a consistent
1726+
// behavior. So projects like the Linux kernel can rely on it.
1727+
return !getLangOpts().CPlusPlus;
1728+
}
1729+
16791730
private:
16801731
bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const;
16811732

clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ struct et7 {
88
52,
99
};
1010

11-
// CHECK: @yv7 ={{.*}} global %struct.et7 { [0 x float] zeroinitializer, i8 52 }
11+
// CHECK: @yv7 ={{.*}} global { [0 x float], i8, [3 x i8] } { [0 x float] zeroinitializer, i8 52, [3 x i8] zeroinitializer }

clang/test/CodeGen/2008-08-07-AlignPadding1.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ struct gc_generation {
2020

2121
#define GEN_HEAD(n) (&generations[n].head)
2222

23-
// The idea is that there are 6 undefs in this structure initializer to cover
23+
// The idea is that there are 6 zeroinitializers in this structure initializer to cover
2424
// the padding between elements.
25-
// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] undef }, i32 700, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }]
25+
// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] zeroinitializer }, i32 700, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }]
2626
/* linked lists of container objects */
2727
struct gc_generation generations[3] = {
2828
/* PyGC_Head, threshold, count */

clang/test/CodeGen/2009-06-14-anonymous-union-init.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ struct sysfs_dirent {
77
};
88
struct sysfs_dirent sysfs_root = { {}, 16877 };
99

10-
// CHECK: @sysfs_root = {{.*}}global %struct.sysfs_dirent { %union.anon zeroinitializer, i16 16877 }
10+
// CHECK: @sysfs_root = {{.*}}global { %union.anon, i16, [2 x i8] } { %union.anon zeroinitializer, i16 16877, [2 x i8] zeroinitializer }
1111

1212
struct Foo {
1313
union { struct empty {} x; };
@@ -16,4 +16,4 @@ struct Foo {
1616
struct Foo foo = { {}, 16877 };
1717

1818
// EMPTY: @foo = {{.*}}global %struct.Foo { i16 16877 }
19-
// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] undef, i16 16877 }
19+
// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] zeroinitializer, i16 16877 }

0 commit comments

Comments
 (0)