Skip to content

Commit 25e5206

Browse files
yabincDanielCChen
authored andcommitted
Reapply "[clang][CodeGen] Zero init unspecified fields in initializers in C" (llvm#109898) (llvm#110051)
This reverts commit d50eaac. Also fixes a bug calculating offsets for bit fields in the original patch.
1 parent 11ea135 commit 25e5206

23 files changed

+754
-96
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5883,3 +5883,26 @@ specify the starting offset to begin embedding from. The resources is treated
58835883
as being empty if the specified offset is larger than the number of bytes in
58845884
the resource. The offset will be applied *before* any ``limit`` parameters are
58855885
applied.
5886+
5887+
Union and aggregate initialization in C
5888+
=======================================
5889+
5890+
In C23 (N2900), when an object is initialized from initializer ``= {}``, all
5891+
elements of arrays, all members of structs, and the first members of unions are
5892+
empty-initialized recursively. In addition, all padding bits are initialized to
5893+
zero.
5894+
5895+
Clang guarantees the following behaviors:
5896+
5897+
* ``1:`` Clang supports initializer ``= {}`` mentioned above in all C
5898+
standards.
5899+
5900+
* ``2:`` When unions are initialized from initializer ``= {}``, bytes outside
5901+
of the first members of unions are also initialized to zero.
5902+
5903+
* ``3:`` When unions, structures and arrays are initialized from initializer
5904+
``= { initializer-list }``, all members not explicitly initialized in
5905+
the initializer list are empty-initialized recursively. In addition, all
5906+
padding bits are initialized to zero.
5907+
5908+
Currently, the above extension only applies to C source code, not C++.

clang/lib/CodeGen/CGExprAgg.cpp

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "CGCXXABI.h"
1414
#include "CGObjCRuntime.h"
15+
#include "CGRecordLayout.h"
1516
#include "CodeGenFunction.h"
1617
#include "CodeGenModule.h"
1718
#include "ConstantEmitter.h"
@@ -64,6 +65,9 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
6465
void withReturnValueSlot(const Expr *E,
6566
llvm::function_ref<RValue(ReturnValueSlot)> Fn);
6667

68+
void DoZeroInitPadding(uint64_t &PaddingStart, uint64_t PaddingEnd,
69+
const FieldDecl *NextField);
70+
6771
public:
6872
AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest, bool IsResultUnused)
6973
: CGF(cgf), Builder(CGF.Builder), Dest(Dest),
@@ -1698,6 +1702,9 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
16981702
// Prepare a 'this' for CXXDefaultInitExprs.
16991703
CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());
17001704

1705+
const bool ZeroInitPadding =
1706+
CGF.CGM.shouldZeroInitPadding() && !Dest.isZeroed();
1707+
17011708
if (record->isUnion()) {
17021709
// Only initialize one field of a union. The field itself is
17031710
// specified by the initializer list.
@@ -1722,16 +1729,27 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17221729
if (NumInitElements) {
17231730
// Store the initializer into the field
17241731
EmitInitializationToLValue(InitExprs[0], FieldLoc);
1732+
if (ZeroInitPadding) {
1733+
uint64_t TotalSize = CGF.getContext().toBits(
1734+
Dest.getPreferredSize(CGF.getContext(), DestLV.getType()));
1735+
uint64_t FieldSize = CGF.getContext().getTypeSize(FieldLoc.getType());
1736+
DoZeroInitPadding(FieldSize, TotalSize, nullptr);
1737+
}
17251738
} else {
17261739
// Default-initialize to null.
1727-
EmitNullInitializationToLValue(FieldLoc);
1740+
if (ZeroInitPadding)
1741+
EmitNullInitializationToLValue(DestLV);
1742+
else
1743+
EmitNullInitializationToLValue(FieldLoc);
17281744
}
1729-
17301745
return;
17311746
}
17321747

17331748
// Here we iterate over the fields; this makes it simpler to both
17341749
// default-initialize fields and skip over unnamed fields.
1750+
const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(record);
1751+
uint64_t PaddingStart = 0;
1752+
17351753
for (const auto *field : record->fields()) {
17361754
// We're done once we hit the flexible array member.
17371755
if (field->getType()->isIncompleteArrayType())
@@ -1748,6 +1766,9 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17481766
CGF.getTypes().isZeroInitializable(ExprToVisit->getType()))
17491767
break;
17501768

1769+
if (ZeroInitPadding)
1770+
DoZeroInitPadding(PaddingStart,
1771+
Layout.getFieldOffset(field->getFieldIndex()), field);
17511772

17521773
LValue LV = CGF.EmitLValueForFieldInitialization(DestLV, field);
17531774
// We never generate write-barries for initialized fields.
@@ -1774,6 +1795,54 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr(
17741795
}
17751796
}
17761797
}
1798+
if (ZeroInitPadding) {
1799+
uint64_t TotalSize = CGF.getContext().toBits(
1800+
Dest.getPreferredSize(CGF.getContext(), DestLV.getType()));
1801+
DoZeroInitPadding(PaddingStart, TotalSize, nullptr);
1802+
}
1803+
}
1804+
1805+
void AggExprEmitter::DoZeroInitPadding(uint64_t &PaddingStart,
1806+
uint64_t PaddingEnd,
1807+
const FieldDecl *NextField) {
1808+
1809+
auto InitBytes = [&](uint64_t StartBit, uint64_t EndBit) {
1810+
CharUnits Start = CGF.getContext().toCharUnitsFromBits(StartBit);
1811+
CharUnits End = CGF.getContext().toCharUnitsFromBits(EndBit);
1812+
Address Addr = Dest.getAddress().withElementType(CGF.CharTy);
1813+
if (!Start.isZero())
1814+
Addr = Builder.CreateConstGEP(Addr, Start.getQuantity());
1815+
llvm::Constant *SizeVal = Builder.getInt64((End - Start).getQuantity());
1816+
CGF.Builder.CreateMemSet(Addr, Builder.getInt8(0), SizeVal, false);
1817+
};
1818+
1819+
if (NextField != nullptr && NextField->isBitField()) {
1820+
// For bitfield, zero init StorageSize before storing the bits. So we don't
1821+
// need to handle big/little endian.
1822+
const CGRecordLayout &RL =
1823+
CGF.getTypes().getCGRecordLayout(NextField->getParent());
1824+
const CGBitFieldInfo &Info = RL.getBitFieldInfo(NextField);
1825+
uint64_t StorageStart = CGF.getContext().toBits(Info.StorageOffset);
1826+
if (StorageStart + Info.StorageSize > PaddingStart) {
1827+
if (StorageStart > PaddingStart)
1828+
InitBytes(PaddingStart, StorageStart);
1829+
Address Addr = Dest.getAddress();
1830+
if (!Info.StorageOffset.isZero())
1831+
Addr = Builder.CreateConstGEP(Addr.withElementType(CGF.CharTy),
1832+
Info.StorageOffset.getQuantity());
1833+
Addr = Addr.withElementType(
1834+
llvm::Type::getIntNTy(CGF.getLLVMContext(), Info.StorageSize));
1835+
Builder.CreateStore(Builder.getIntN(Info.StorageSize, 0), Addr);
1836+
PaddingStart = StorageStart + Info.StorageSize;
1837+
}
1838+
return;
1839+
}
1840+
1841+
if (PaddingStart < PaddingEnd)
1842+
InitBytes(PaddingStart, PaddingEnd);
1843+
if (NextField != nullptr)
1844+
PaddingStart =
1845+
PaddingEnd + CGF.getContext().getTypeSize(NextField->getType());
17771846
}
17781847

17791848
void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,

clang/lib/CodeGen/CGExprConstant.cpp

Lines changed: 96 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,16 @@ using namespace CodeGen;
4242
namespace {
4343
class ConstExprEmitter;
4444

45+
llvm::Constant *getPadding(const CodeGenModule &CGM, CharUnits PadSize) {
46+
llvm::Type *Ty = CGM.CharTy;
47+
if (PadSize > CharUnits::One())
48+
Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity());
49+
if (CGM.shouldZeroInitPadding()) {
50+
return llvm::Constant::getNullValue(Ty);
51+
}
52+
return llvm::UndefValue::get(Ty);
53+
}
54+
4555
struct ConstantAggregateBuilderUtils {
4656
CodeGenModule &CGM;
4757

@@ -61,10 +71,7 @@ struct ConstantAggregateBuilderUtils {
6171
}
6272

6373
llvm::Constant *getPadding(CharUnits PadSize) const {
64-
llvm::Type *Ty = CGM.CharTy;
65-
if (PadSize > CharUnits::One())
66-
Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity());
67-
return llvm::UndefValue::get(Ty);
74+
return ::getPadding(CGM, PadSize);
6875
}
6976

7077
llvm::Constant *getZeroes(CharUnits ZeroSize) const {
@@ -591,6 +598,11 @@ class ConstStructBuilder {
591598
bool Build(const InitListExpr *ILE, bool AllowOverwrite);
592599
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
593600
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
601+
bool DoZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo,
602+
const FieldDecl &Field, bool AllowOverwrite,
603+
CharUnits &SizeSoFar, bool &ZeroFieldSize);
604+
bool DoZeroInitPadding(const ASTRecordLayout &Layout, bool AllowOverwrite,
605+
CharUnits SizeSoFar);
594606
llvm::Constant *Finalize(QualType Ty);
595607
};
596608

@@ -715,6 +727,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
715727
if (CXXRD->getNumBases())
716728
return false;
717729

730+
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
731+
bool ZeroFieldSize = false;
732+
CharUnits SizeSoFar = CharUnits::Zero();
733+
718734
for (FieldDecl *Field : RD->fields()) {
719735
++FieldNo;
720736

@@ -732,8 +748,13 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
732748
const Expr *Init = nullptr;
733749
if (ElementNo < ILE->getNumInits())
734750
Init = ILE->getInit(ElementNo++);
735-
if (isa_and_nonnull<NoInitExpr>(Init))
751+
if (isa_and_nonnull<NoInitExpr>(Init)) {
752+
if (ZeroInitPadding &&
753+
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, SizeSoFar,
754+
ZeroFieldSize))
755+
return false;
736756
continue;
757+
}
737758

738759
// Zero-sized fields are not emitted, but their initializers may still
739760
// prevent emission of this struct as a constant.
@@ -743,6 +764,11 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
743764
continue;
744765
}
745766

767+
if (ZeroInitPadding &&
768+
!DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, SizeSoFar,
769+
ZeroFieldSize))
770+
return false;
771+
746772
// When emitting a DesignatedInitUpdateExpr, a nested InitListExpr
747773
// represents additional overwriting of our current constant value, and not
748774
// a new constant to emit independently.
@@ -768,6 +794,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
768794
if (!EltInit)
769795
return false;
770796

797+
if (ZeroInitPadding && ZeroFieldSize)
798+
SizeSoFar += CharUnits::fromQuantity(
799+
CGM.getDataLayout().getTypeAllocSize(EltInit->getType()));
800+
771801
if (!Field->isBitField()) {
772802
// Handle non-bitfield members.
773803
if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit,
@@ -785,6 +815,9 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
785815
}
786816
}
787817

818+
if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar))
819+
return false;
820+
788821
return true;
789822
}
790823

@@ -849,6 +882,9 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
849882

850883
unsigned FieldNo = 0;
851884
uint64_t OffsetBits = CGM.getContext().toBits(Offset);
885+
const bool ZeroInitPadding = CGM.shouldZeroInitPadding();
886+
bool ZeroFieldSize = false;
887+
CharUnits SizeSoFar = CharUnits::Zero();
852888

853889
bool AllowOverwrite = false;
854890
for (RecordDecl::field_iterator Field = RD->field_begin(),
@@ -870,6 +906,15 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
870906
if (!EltInit)
871907
return false;
872908

909+
if (ZeroInitPadding) {
910+
if (!DoZeroInitPadding(Layout, FieldNo, **Field, AllowOverwrite,
911+
SizeSoFar, ZeroFieldSize))
912+
return false;
913+
if (ZeroFieldSize)
914+
SizeSoFar += CharUnits::fromQuantity(
915+
CGM.getDataLayout().getTypeAllocSize(EltInit->getType()));
916+
}
917+
873918
if (!Field->isBitField()) {
874919
// Handle non-bitfield members.
875920
if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits,
@@ -886,7 +931,49 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
886931
return false;
887932
}
888933
}
934+
if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar))
935+
return false;
936+
937+
return true;
938+
}
939+
940+
bool ConstStructBuilder::DoZeroInitPadding(
941+
const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field,
942+
bool AllowOverwrite, CharUnits &SizeSoFar, bool &ZeroFieldSize) {
943+
uint64_t StartBitOffset = Layout.getFieldOffset(FieldNo);
944+
CharUnits StartOffset = CGM.getContext().toCharUnitsFromBits(StartBitOffset);
945+
if (SizeSoFar < StartOffset)
946+
if (!AppendBytes(SizeSoFar, getPadding(CGM, StartOffset - SizeSoFar),
947+
AllowOverwrite))
948+
return false;
949+
950+
if (!Field.isBitField()) {
951+
CharUnits FieldSize = CGM.getContext().getTypeSizeInChars(Field.getType());
952+
SizeSoFar = StartOffset + FieldSize;
953+
ZeroFieldSize = FieldSize.isZero();
954+
} else {
955+
const CGRecordLayout &RL =
956+
CGM.getTypes().getCGRecordLayout(Field.getParent());
957+
const CGBitFieldInfo &Info = RL.getBitFieldInfo(&Field);
958+
uint64_t EndBitOffset = StartBitOffset + Info.Size;
959+
SizeSoFar = CGM.getContext().toCharUnitsFromBits(EndBitOffset);
960+
if (EndBitOffset % CGM.getContext().getCharWidth() != 0) {
961+
SizeSoFar++;
962+
}
963+
ZeroFieldSize = Info.Size == 0;
964+
}
965+
return true;
966+
}
889967

968+
bool ConstStructBuilder::DoZeroInitPadding(const ASTRecordLayout &Layout,
969+
bool AllowOverwrite,
970+
CharUnits SizeSoFar) {
971+
CharUnits TotalSize = Layout.getSize();
972+
if (SizeSoFar < TotalSize)
973+
if (!AppendBytes(SizeSoFar, getPadding(CGM, TotalSize - SizeSoFar),
974+
AllowOverwrite))
975+
return false;
976+
SizeSoFar = TotalSize;
890977
return true;
891978
}
892979

@@ -1127,12 +1214,10 @@ class ConstExprEmitter
11271214

11281215
assert(CurSize <= TotalSize && "Union size mismatch!");
11291216
if (unsigned NumPadBytes = TotalSize - CurSize) {
1130-
llvm::Type *Ty = CGM.CharTy;
1131-
if (NumPadBytes > 1)
1132-
Ty = llvm::ArrayType::get(Ty, NumPadBytes);
1133-
1134-
Elts.push_back(llvm::UndefValue::get(Ty));
1135-
Types.push_back(Ty);
1217+
llvm::Constant *Padding =
1218+
getPadding(CGM, CharUnits::fromQuantity(NumPadBytes));
1219+
Elts.push_back(Padding);
1220+
Types.push_back(Padding->getType());
11361221
}
11371222

11381223
llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false);

0 commit comments

Comments
 (0)