Skip to content

Commit 112b018

Browse files
committed
Reapply "[clang][CodeGen] Zero init unspecified fields in initializers in C" (llvm#109898)
This reverts commit d50eaac. Also fixes a bug calculating offsets for bit fields in the original patch.
1 parent 1c1bb77 commit 112b018

23 files changed

+730
-96
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5860,3 +5860,26 @@ specify the starting offset to begin embedding from. The resources is treated
58605860
as being empty if the specified offset is larger than the number of bytes in
58615861
the resource. The offset will be applied *before* any ``limit`` parameters are
58625862
applied.
5863+
5864+
Union and aggregate initialization in C
5865+
=======================================
5866+
5867+
In C23 (N2900), when an object is initialized from initializer ``= {}``, all
5868+
elements of arrays, all members of structs, and the first members of unions are
5869+
empty-initialized recursively. In addition, all padding bits are initialized to
5870+
zero.
5871+
5872+
Clang guarantees the following behaviors:
5873+
5874+
* ``1:`` Clang supports initializer ``= {}`` mentioned above in all C
5875+
standards.
5876+
5877+
* ``2:`` When unions are initialized from initializer ``= {}``, bytes outside
5878+
of the first members of unions are also initialized to zero.
5879+
5880+
* ``3:`` When unions, structures and arrays are initialized from initializer
5881+
``= { initializer-list }``, all members not explicitly initialized in
5882+
the initializer list are empty-initialized recursively. In addition, all
5883+
padding bits are initialized to zero.
5884+
5885+
Currently, the above extension only applies to C source code, not C++.

clang/lib/CodeGen/CGExprAgg.cpp

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "CGCXXABI.h"
1414
#include "CGObjCRuntime.h"
15+
#include "CGRecordLayout.h"
1516
#include "CodeGenFunction.h"
1617
#include "CodeGenModule.h"
1718
#include "ConstantEmitter.h"
@@ -1698,6 +1699,17 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
16981699
// Prepare a 'this' for CXXDefaultInitExprs.
16991700
CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());
17001701

1702+
const bool ZeroInitPadding =
1703+
CGF.CGM.shouldZeroInitPadding() && !Dest.isZeroed();
1704+
const Address BaseLoc = Dest.getAddress().withElementType(CGF.Int8Ty);
1705+
auto DoZeroInitPadding = [&](CharUnits Offset, CharUnits Size) {
1706+
if (Size.isPositive()) {
1707+
Address Loc = CGF.Builder.CreateConstGEP(BaseLoc, Offset.getQuantity());
1708+
llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity());
1709+
CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false);
1710+
}
1711+
};
1712+
17011713
if (record->isUnion()) {
17021714
// Only initialize one field of a union. The field itself is
17031715
// specified by the initializer list.
@@ -1722,16 +1734,27 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17221734
if (NumInitElements) {
17231735
// Store the initializer into the field
17241736
EmitInitializationToLValue(InitExprs[0], FieldLoc);
1737+
if (ZeroInitPadding) {
1738+
CharUnits TotalSize =
1739+
Dest.getPreferredSize(CGF.getContext(), DestLV.getType());
1740+
CharUnits FieldSize =
1741+
CGF.getContext().getTypeSizeInChars(FieldLoc.getType());
1742+
DoZeroInitPadding(FieldSize, TotalSize - FieldSize);
1743+
}
17251744
} else {
17261745
// Default-initialize to null.
1727-
EmitNullInitializationToLValue(FieldLoc);
1746+
if (ZeroInitPadding)
1747+
EmitNullInitializationToLValue(DestLV);
1748+
else
1749+
EmitNullInitializationToLValue(FieldLoc);
17281750
}
1729-
17301751
return;
17311752
}
17321753

17331754
// Here we iterate over the fields; this makes it simpler to both
17341755
// default-initialize fields and skip over unnamed fields.
1756+
const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(record);
1757+
CharUnits SizeSoFar = CharUnits::Zero();
17351758
for (const auto *field : record->fields()) {
17361759
// We're done once we hit the flexible array member.
17371760
if (field->getType()->isIncompleteArrayType())
@@ -1748,6 +1771,26 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17481771
CGF.getTypes().isZeroInitializable(ExprToVisit->getType()))
17491772
break;
17501773

1774+
if (ZeroInitPadding) {
1775+
uint64_t StartBitOffset = Layout.getFieldOffset(field->getFieldIndex());
1776+
CharUnits StartOffset =
1777+
CGF.getContext().toCharUnitsFromBits(StartBitOffset);
1778+
DoZeroInitPadding(SizeSoFar, StartOffset - SizeSoFar);
1779+
if (!field->isBitField()) {
1780+
CharUnits FieldSize =
1781+
CGF.getContext().getTypeSizeInChars(field->getType());
1782+
SizeSoFar = StartOffset + FieldSize;
1783+
} else {
1784+
const CGRecordLayout &RL =
1785+
CGF.getTypes().getCGRecordLayout(field->getParent());
1786+
const CGBitFieldInfo &Info = RL.getBitFieldInfo(field);
1787+
uint64_t EndBitOffset = StartBitOffset + Info.Size;
1788+
SizeSoFar = CGF.getContext().toCharUnitsFromBits(EndBitOffset);
1789+
if (EndBitOffset % CGF.getContext().getCharWidth() != 0) {
1790+
SizeSoFar++;
1791+
}
1792+
}
1793+
}
17511794

17521795
LValue LV = CGF.EmitLValueForFieldInitialization(DestLV, field);
17531796
// We never generate write-barries for initialized fields.
@@ -1774,6 +1817,11 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17741817
}
17751818
}
17761819
}
1820+
if (ZeroInitPadding) {
1821+
CharUnits TotalSize =
1822+
Dest.getPreferredSize(CGF.getContext(), DestLV.getType());
1823+
DoZeroInitPadding(SizeSoFar, TotalSize - SizeSoFar);
1824+
}
17771825
}
17781826

17791827
void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,

clang/lib/CodeGen/CGExprConstant.cpp

Lines changed: 96 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,16 @@ using namespace CodeGen;
4242
namespace {
4343
class ConstExprEmitter;
4444

45+
llvm::Constant *getPadding(const CodeGenModule &CGM, CharUnits PadSize) {
46+
llvm::Type *Ty = CGM.CharTy;
47+
if (PadSize > CharUnits::One())
48+
Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity());
49+
if (CGM.shouldZeroInitPadding()) {
50+
return llvm::Constant::getNullValue(Ty);
51+
}
52+
return llvm::UndefValue::get(Ty);
53+
}
54+
4555
struct ConstantAggregateBuilderUtils {
4656
CodeGenModule &CGM;
4757

@@ -61,10 +71,7 @@ struct ConstantAggregateBuilderUtils {
6171
}
6272

6373
llvm::Constant *getPadding(CharUnits PadSize) const {
64-
llvm::Type *Ty = CGM.CharTy;
65-
if (PadSize > CharUnits::One())
66-
Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity());
67-
return llvm::UndefValue::get(Ty);
74+
return ::getPadding(CGM, PadSize);
6875
}
6976

7077
llvm::Constant *getZeroes(CharUnits ZeroSize) const {
@@ -591,6 +598,11 @@ class ConstStructBuilder {
591598
bool Build(const InitListExpr *ILE, bool AllowOverwrite);
592599
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
593600
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
601+
bool DoZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo,
602+
const FieldDecl &Field, bool AllowOverwrite,
603+
CharUnits &SizeSoFar, bool &ZeroFieldSize);
604+
bool DoZeroInitPadding(const ASTRecordLayout &Layout, bool AllowOverwrite,
605+
CharUnits SizeSoFar);
594606
llvm::Constant *Finalize(QualType Ty);
595607
};
596608

@@ -715,6 +727,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
715727
if (CXXRD->getNumBases())
716728
return false;
717729

730+
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
731+
bool ZeroFieldSize = false;
732+
CharUnits SizeSoFar = CharUnits::Zero();
733+
718734
for (FieldDecl *Field : RD->fields()) {
719735
++FieldNo;
720736

@@ -732,8 +748,13 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
732748
const Expr *Init = nullptr;
733749
if (ElementNo < ILE->getNumInits())
734750
Init = ILE->getInit(ElementNo++);
735-
if (isa_and_nonnull<NoInitExpr>(Init))
751+
if (isa_and_nonnull<NoInitExpr>(Init)) {
752+
if (ZeroInitPadding &&
753+
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, SizeSoFar,
754+
ZeroFieldSize))
755+
return false;
736756
continue;
757+
}
737758

738759
// Zero-sized fields are not emitted, but their initializers may still
739760
// prevent emission of this struct as a constant.
@@ -743,6 +764,11 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
743764
continue;
744765
}
745766

767+
if (ZeroInitPadding &&
768+
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, SizeSoFar,
769+
ZeroFieldSize))
770+
return false;
771+
746772
// When emitting a DesignatedInitUpdateExpr, a nested InitListExpr
747773
// represents additional overwriting of our current constant value, and not
748774
// a new constant to emit independently.
@@ -768,6 +794,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
768794
if (!EltInit)
769795
return false;
770796

797+
if (ZeroInitPadding && ZeroFieldSize)
798+
SizeSoFar += CharUnits::fromQuantity(
799+
CGM.getDataLayout().getTypeAllocSize(EltInit->getType()));
800+
771801
if (!Field->isBitField()) {
772802
// Handle non-bitfield members.
773803
if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit,
@@ -785,6 +815,9 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
785815
}
786816
}
787817

818+
if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar))
819+
return false;
820+
788821
return true;
789822
}
790823

@@ -849,6 +882,9 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
849882

850883
unsigned FieldNo = 0;
851884
uint64_t OffsetBits = CGM.getContext().toBits(Offset);
885+
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
886+
bool ZeroFieldSize = false;
887+
CharUnits SizeSoFar = CharUnits::Zero();
852888

853889
bool AllowOverwrite = false;
854890
for (RecordDecl::field_iterator Field = RD->field_begin(),
@@ -870,6 +906,15 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
870906
if (!EltInit)
871907
return false;
872908

909+
if (ZeroInitPadding) {
910+
if (!DoZeroInitPadding(Layout, FieldNo, **Field, AllowOverwrite,
911+
SizeSoFar, ZeroFieldSize))
912+
return false;
913+
if (ZeroFieldSize)
914+
SizeSoFar += CharUnits::fromQuantity(
915+
CGM.getDataLayout().getTypeAllocSize(EltInit->getType()));
916+
}
917+
873918
if (!Field->isBitField()) {
874919
// Handle non-bitfield members.
875920
if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits,
@@ -886,7 +931,49 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
886931
return false;
887932
}
888933
}
934+
if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar))
935+
return false;
936+
937+
return true;
938+
}
939+
940+
bool ConstStructBuilder::DoZeroInitPadding(
941+
const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field,
942+
bool AllowOverwrite, CharUnits &SizeSoFar, bool &ZeroFieldSize) {
943+
uint64_t StartBitOffset = Layout.getFieldOffset(FieldNo);
944+
CharUnits StartOffset = CGM.getContext().toCharUnitsFromBits(StartBitOffset);
945+
if (SizeSoFar < StartOffset)
946+
if (!AppendBytes(SizeSoFar, getPadding(CGM, StartOffset - SizeSoFar),
947+
AllowOverwrite))
948+
return false;
949+
950+
if (!Field.isBitField()) {
951+
CharUnits FieldSize = CGM.getContext().getTypeSizeInChars(Field.getType());
952+
SizeSoFar = StartOffset + FieldSize;
953+
ZeroFieldSize = FieldSize.isZero();
954+
} else {
955+
const CGRecordLayout &RL =
956+
CGM.getTypes().getCGRecordLayout(Field.getParent());
957+
const CGBitFieldInfo &Info = RL.getBitFieldInfo(&Field);
958+
uint64_t EndBitOffset = StartBitOffset + Info.Size;
959+
SizeSoFar = CGM.getContext().toCharUnitsFromBits(EndBitOffset);
960+
if (EndBitOffset % CGM.getContext().getCharWidth() != 0) {
961+
SizeSoFar++;
962+
}
963+
ZeroFieldSize = Info.Size == 0;
964+
}
965+
return true;
966+
}
889967

968+
bool ConstStructBuilder::DoZeroInitPadding(const ASTRecordLayout &Layout,
969+
bool AllowOverwrite,
970+
CharUnits SizeSoFar) {
971+
CharUnits TotalSize = Layout.getSize();
972+
if (SizeSoFar < TotalSize)
973+
if (!AppendBytes(SizeSoFar, getPadding(CGM, TotalSize - SizeSoFar),
974+
AllowOverwrite))
975+
return false;
976+
SizeSoFar = TotalSize;
890977
return true;
891978
}
892979

@@ -1127,12 +1214,10 @@ class ConstExprEmitter
11271214

11281215
assert(CurSize <= TotalSize && "Union size mismatch!");
11291216
if (unsigned NumPadBytes = TotalSize - CurSize) {
1130-
llvm::Type *Ty = CGM.CharTy;
1131-
if (NumPadBytes > 1)
1132-
Ty = llvm::ArrayType::get(Ty, NumPadBytes);
1133-
1134-
Elts.push_back(llvm::UndefValue::get(Ty));
1135-
Types.push_back(Ty);
1217+
llvm::Constant *Padding =
1218+
getPadding(CGM, CharUnits::fromQuantity(NumPadBytes));
1219+
Elts.push_back(Padding);
1220+
Types.push_back(Padding->getType());
11361221
}
11371222

11381223
llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false);

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,6 +1676,57 @@ class CodeGenModule : public CodeGenTypeCache {
16761676
MustTailCallUndefinedGlobals.insert(Global);
16771677
}
16781678

1679+
bool shouldZeroInitPadding() const {
1680+
// In C23 (N3096) $6.7.10:
1681+
// """
1682+
// If any object is initialized with an empty iniitializer, then it is
1683+
// subject to default initialization:
1684+
// - if it is an aggregate, every member is initialized (recursively)
1685+
// according to these rules, and any padding is initialized to zero bits;
1686+
// - if it is a union, the first named member is initialized (recursively)
1687+
// according to these rules, and any padding is initialized to zero bits.
1688+
//
1689+
// If the aggregate or union contains elements or members that are
1690+
// aggregates or unions, these rules apply recursively to the subaggregates
1691+
// or contained unions.
1692+
//
1693+
// If there are fewer initializers in a brace-enclosed list than there are
1694+
// elements or members of an aggregate, or fewer characters in a string
1695+
// literal used to initialize an array of known size than there are elements
1696+
// in the array, the remainder of the aggregate is subject to default
1697+
// initialization.
1698+
// """
1699+
//
1700+
// From my understanding, the standard is ambiguous in the following two
1701+
// areas:
1702+
// 1. For a union type with empty initializer, if the first named member is
1703+
// not the largest member, then the bytes comes after the first named member
1704+
// but before padding are left unspecified. An example is:
1705+
// union U { int a; long long b;};
1706+
// union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left
1707+
// unspecified.
1708+
//
1709+
// 2. It only mentions padding for empty initializer, but doesn't mention
1710+
// padding for a non empty initialization list. And if the aggregation or
1711+
// union contains elements or members that are aggregates or unions, and
1712+
// some are non empty initializers, while others are empty initiailizers,
1713+
// the padding initialization is unclear. An example is:
1714+
// struct S1 { int a; long long b; };
1715+
// struct S2 { char c; struct S1 s1; };
1716+
// // The values for paddings between s2.c and s2.s1.a, between s2.s1.a
1717+
// and s2.s1.b are unclear.
1718+
// struct S2 s2 = { 'c' };
1719+
//
1720+
// Here we choose to zero initiailize left bytes of a union type. Because
1721+
// projects like the Linux kernel are relying on this behavior. If we don't
1722+
// explicitly zero initialize them, the undef values can be optimized to
1723+
// return gabage data. We also choose to zero initialize paddings for
1724+
// aggregates and unions, no matter they are initialized by empty
1725+
// initializers or non empty initializers. This can provide a consistent
1726+
// behavior. So projects like the Linux kernel can rely on it.
1727+
return !getLangOpts().CPlusPlus;
1728+
}
1729+
16791730
private:
16801731
bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const;
16811732

clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ struct et7 {
88
52,
99
};
1010

11-
// CHECK: @yv7 ={{.*}} global %struct.et7 { [0 x float] zeroinitializer, i8 52 }
11+
// CHECK: @yv7 ={{.*}} global { [0 x float], i8, [3 x i8] } { [0 x float] zeroinitializer, i8 52, [3 x i8] zeroinitializer }

clang/test/CodeGen/2008-08-07-AlignPadding1.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ struct gc_generation {
2020

2121
#define GEN_HEAD(n) (&generations[n].head)
2222

23-
// The idea is that there are 6 undefs in this structure initializer to cover
23+
// The idea is that there are 6 zeroinitializers in this structure initializer to cover
2424
// the padding between elements.
25-
// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] undef }, i32 700, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }]
25+
// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] zeroinitializer }, i32 700, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }]
2626
/* linked lists of container objects */
2727
struct gc_generation generations[3] = {
2828
/* PyGC_Head, threshold, count */

0 commit comments

Comments
 (0)