Skip to content

Commit cb64639

Browse files
zoecarverhuixie90
authored andcommitted
[Builtin] Add __builtin_clear_padding
Adds `__builtin_clear_padding` to zero all padding bits of a struct. This builtin should match the behavior of those in NVCC and GCC (and MSVC?). There are some tests in this patch but hopefully we'll also get tests from other compilers (so all builtins can be as similar as possible). I'm planning to add support for unions, bitfields (both as members and members of sub-objects), and booleans as follow up patches. Differential Revision: https://reviews.llvm.org/D87974 overlapping subobjects + opague pointer union, rename, scalar types
1 parent 8dc8b9f commit cb64639

File tree

6 files changed

+1178
-0
lines changed

6 files changed

+1178
-0
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,12 @@ def IsConstantEvaluated : LangBuiltin<"CXX_LANG"> {
932932
let Prototype = "bool()";
933933
}
934934

935+
def ClearPadding : LangBuiltin<"CXX_LANG"> {
936+
let Spellings = ["__builtin_clear_padding"];
937+
let Attributes = [NoThrow];
938+
let Prototype = "void(...)";
939+
}
940+
935941
// GCC exception builtins
936942
def EHReturn : Builtin {
937943
let Spellings = ["__builtin_eh_return"];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
#include "llvm/Support/ScopedPrinter.h"
6464
#include "llvm/TargetParser/AArch64TargetParser.h"
6565
#include "llvm/TargetParser/X86TargetParser.h"
66+
#include <algorithm>
6667
#include <optional>
6768
#include <sstream>
6869

@@ -2538,6 +2539,205 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
25382539
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
25392540
}
25402541

2542+
template <class T>
2543+
void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2544+
size_t CurrentStartOffset,
2545+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2546+
bool VisitVirtualBase);
2547+
2548+
template <class T>
2549+
void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2550+
StructType *ST, size_t CurrentStartOffset,
2551+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2552+
bool VisitVirtualBase) {
2553+
llvm::dbgs() << "clear padding struct: " << ST->getName().data() << '\n';
2554+
const auto &DL = CGF.CGM.getModule().getDataLayout();
2555+
auto *SL = DL.getStructLayout(ST);
2556+
auto *R = dyn_cast<CXXRecordDecl>(Ty->getAsRecordDecl());
2557+
if (!R) {
2558+
llvm::dbgs() << "Not a CXXRecordDecl\n";
2559+
return;
2560+
}
2561+
const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
2562+
if (ASTLayout.hasOwnVFPtr()) {
2563+
llvm::dbgs() << "vtable ptr. Incrementing RunningOffset from "
2564+
<< RunningOffset << " to "
2565+
<< RunningOffset + DL.getPointerSizeInBits() / 8 << '\n';
2566+
RunningOffset += DL.getPointerSizeInBits() / 8;
2567+
}
2568+
std::vector<std::pair<size_t, CXXBaseSpecifier>> Bases;
2569+
Bases.reserve(R->getNumBases());
2570+
// todo get vbases
2571+
for (auto Base : R->bases()) {
2572+
auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
2573+
if (!Base.isVirtual()) {
2574+
auto Offset = static_cast<size_t>(
2575+
ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
2576+
Bases.emplace_back(Offset, Base);
2577+
}
2578+
}
2579+
2580+
auto VisitBases =
2581+
[&](std::vector<std::pair<size_t, CXXBaseSpecifier>> &BasesToVisit) {
2582+
std::sort(
2583+
BasesToVisit.begin(), BasesToVisit.end(),
2584+
[](const auto &P1, const auto &P2) { return P1.first < P2.first; });
2585+
for (const auto &Pair : BasesToVisit) {
2586+
// is it OK to use structured binding in clang? what is the language
2587+
// version?
2588+
auto Offset = Pair.first;
2589+
auto Base = Pair.second;
2590+
2591+
llvm::dbgs() << "visiting base at offset " << Offset << '\n';
2592+
// Recursively zero out base classes.
2593+
auto Index = SL->getElementContainingOffset(Offset);
2594+
Value *Idx = CGF.Builder.getSize(Index);
2595+
llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
2596+
Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
2597+
RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(),
2598+
CurrentStartOffset + Offset,
2599+
RunningOffset, WriteZeroAtOffset, false);
2600+
}
2601+
};
2602+
2603+
VisitBases(Bases);
2604+
2605+
size_t NumFields = std::distance(R->field_begin(), R->field_end());
2606+
std::vector<size_t> FieldOffsets;
2607+
FieldOffsets.reserve(NumFields);
2608+
auto CurrentField = R->field_begin();
2609+
for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
2610+
// Size needs to be in bytes so we can compare it later.
2611+
auto Offset = ASTLayout.getFieldOffset(I) / 8;
2612+
llvm::dbgs() << "visiting field at offset " << Offset << '\n';
2613+
auto Index = SL->getElementContainingOffset(Offset);
2614+
Value *Idx = CGF.Builder.getSize(Index);
2615+
llvm::Type *CurrentFieldType =
2616+
CGF.ConvertTypeForMem(CurrentField->getType());
2617+
Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
2618+
RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(),
2619+
CurrentStartOffset + Offset, RunningOffset,
2620+
WriteZeroAtOffset, true);
2621+
}
2622+
2623+
if (VisitVirtualBase) {
2624+
2625+
std::vector<std::pair<size_t, CXXBaseSpecifier>> VBases;
2626+
VBases.reserve(R->getNumVBases());
2627+
for (auto VBase : R->vbases()) {
2628+
auto *BaseRecord =
2629+
cast<CXXRecordDecl>(VBase.getType()->getAsRecordDecl());
2630+
auto Offset = static_cast<size_t>(
2631+
ASTLayout.getVBaseClassOffset(BaseRecord).getQuantity());
2632+
VBases.emplace_back(Offset, VBase);
2633+
}
2634+
2635+
VisitBases(VBases);
2636+
}
2637+
}
2638+
2639+
template <class T>
2640+
void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr,
2641+
llvm::Type *Type, ConstantArrayType const *AT,
2642+
size_t CurrentStartOffset, size_t &RunningOffset,
2643+
T &&WriteZeroAtOffset) {
2644+
llvm::dbgs() << "clear padding constant array\n";
2645+
for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
2646+
++ArrIndex) {
2647+
2648+
QualType ElementQualType = AT->getElementType();
2649+
2650+
auto *ElementRecord = ElementQualType->getAsRecordDecl();
2651+
if (!ElementRecord) {
2652+
llvm::dbgs() << "null!\n";
2653+
}
2654+
auto ElementAlign =
2655+
ElementRecord
2656+
? CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment()
2657+
: CGF.getContext().getTypeAlignInChars(ElementQualType);
2658+
2659+
Address FieldElementAddr{Ptr, Type, ElementAlign};
2660+
2661+
auto Element = CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
2662+
auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
2663+
auto AllocSize =
2664+
CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
2665+
llvm::dbgs() << "clearing array index! " << ArrIndex << '\n';
2666+
RecursivelyClearPaddingImpl(CGF, Element.getBasePointer(), ElementQualType,
2667+
CurrentStartOffset +
2668+
ArrIndex * AllocSize.getKnownMinValue(),
2669+
RunningOffset, WriteZeroAtOffset, true);
2670+
}
2671+
}
2672+
2673+
template <class T>
2674+
void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
2675+
size_t CurrentStartOffset,
2676+
size_t &RunningOffset, T &&WriteZeroAtOffset,
2677+
bool VisitVirtualBase) {
2678+
2679+
llvm::dbgs() << "clear padding before current [" << RunningOffset << ", "
2680+
<< CurrentStartOffset << ")\n";
2681+
for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
2682+
WriteZeroAtOffset(RunningOffset);
2683+
}
2684+
auto *Type = CGF.ConvertTypeForMem(Ty);
2685+
auto Size = CGF.CGM.getModule()
2686+
.getDataLayout()
2687+
.getTypeSizeInBits(Type)
2688+
.getKnownMinValue() /
2689+
8;
2690+
2691+
if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
2692+
ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset,
2693+
RunningOffset, WriteZeroAtOffset);
2694+
} else if (auto *ST = dyn_cast<StructType>(Type); ST && Ty->isRecordType()) {
2695+
ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset,
2696+
WriteZeroAtOffset, VisitVirtualBase);
2697+
} else if (Ty->isAtomicType()) {
2698+
RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(),
2699+
CurrentStartOffset, RunningOffset,
2700+
WriteZeroAtOffset, true);
2701+
} else {
2702+
llvm::dbgs() << "increment running offset from: " << RunningOffset << " to "
2703+
<< RunningOffset + Size << '\n';
2704+
RunningOffset =
2705+
std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size));
2706+
}
2707+
}
2708+
2709+
static void RecursivelyClearPadding(CodeGenFunction &CGF, Value *Ptr,
2710+
QualType Ty) {
2711+
auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
2712+
auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
2713+
auto WriteZeroAtOffset = [&](uint64_t Offset) {
2714+
auto *Index = ConstantInt::get(CGF.IntTy, Offset);
2715+
auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
2716+
CGF.Builder.CreateAlignedStore(
2717+
Zero, Element,
2718+
CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
2719+
};
2720+
2721+
size_t RunningOffset = 0;
2722+
2723+
RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset,
2724+
true);
2725+
2726+
// Clear tail padding
2727+
auto *Type = CGF.ConvertTypeForMem(Ty);
2728+
2729+
auto Size = CGF.CGM.getModule()
2730+
.getDataLayout()
2731+
.getTypeAllocSize(Type)
2732+
.getKnownMinValue();
2733+
2734+
llvm::dbgs() << "clear tail padding [" << RunningOffset << ", " << Size
2735+
<< ")\n";
2736+
for (; RunningOffset < Size; ++RunningOffset) {
2737+
WriteZeroAtOffset(RunningOffset);
2738+
}
2739+
}
2740+
25412741
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
25422742
const CallExpr *E,
25432743
ReturnValueSlot ReturnValue) {
@@ -4462,6 +4662,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
44624662

44634663
return RValue::get(Ptr);
44644664
}
4665+
case Builtin::BI__builtin_clear_padding: {
4666+
const Expr *Op = E->getArg(0);
4667+
Value *Address = EmitScalarExpr(Op);
4668+
auto PointeeTy = Op->getType()->getPointeeType();
4669+
RecursivelyClearPadding(*this, Address, PointeeTy);
4670+
return RValue::get(nullptr);
4671+
}
44654672
case Builtin::BI__sync_fetch_and_add:
44664673
case Builtin::BI__sync_fetch_and_sub:
44674674
case Builtin::BI__sync_fetch_and_or:

clang/lib/Sema/SemaChecking.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,6 +2655,37 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
26552655
}
26562656
case Builtin::BI__builtin_launder:
26572657
return BuiltinLaunder(*this, TheCall);
2658+
case Builtin::BI__builtin_clear_padding: {
2659+
const auto numArgs = TheCall->getNumArgs();
2660+
if (numArgs < 1) {
2661+
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_one)
2662+
<< 0 /*function call*/ << "T*" << 0;
2663+
return ExprError();
2664+
}
2665+
if (numArgs > 1) {
2666+
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_one)
2667+
<< 0 /*function call*/ << "T*" << numArgs << 0;
2668+
return ExprError();
2669+
}
2670+
2671+
const Expr *PtrArg = TheCall->getArg(0);
2672+
const QualType PtrArgType = PtrArg->getType();
2673+
if (!PtrArgType->isPointerType()) {
2674+
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
2675+
<< PtrArgType << "pointer" << 1 << 0 << 3 << 1 << PtrArgType
2676+
<< "pointer";
2677+
return ExprError();
2678+
}
2679+
if (PtrArgType->getPointeeType().isConstQualified()) {
2680+
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
2681+
<< TheCall->getSourceRange() << 5 /*ConstUnknown*/;
2682+
return ExprError();
2683+
}
2684+
if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
2685+
diag::err_typecheck_decl_incomplete_type))
2686+
return ExprError();
2687+
break;
2688+
}
26582689
case Builtin::BI__sync_fetch_and_add:
26592690
case Builtin::BI__sync_fetch_and_add_1:
26602691
case Builtin::BI__sync_fetch_and_add_2:
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
2+
3+
struct alignas(4) Foo {
4+
char a;
5+
alignas(2) char b;
6+
};
7+
8+
struct alignas(4) Bar {
9+
char c;
10+
alignas(2) char d;
11+
};
12+
13+
struct alignas(4) Baz : Foo {
14+
char e;
15+
Bar f;
16+
};
17+
18+
// Baz structure:
19+
// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
20+
// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
21+
// %struct.Foo = type { i8, i8, i8, i8 }
22+
// %struct.Bar = type { i8, i8, i8, i8 }
23+
24+
// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
25+
// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
26+
// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
27+
// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
28+
// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
29+
30+
// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
31+
// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
32+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
33+
// CHECK: store i8 0, i8* [[PAD_1]]
34+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
35+
// CHECK: store i8 0, i8* [[PAD_2]]
36+
37+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
38+
// CHECK: store i8 0, i8* [[PAD_3]]
39+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
40+
// CHECK: store i8 0, i8* [[PAD_4]]
41+
// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
42+
// CHECK: store i8 0, i8* [[PAD_5]]
43+
44+
// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
45+
// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
46+
// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
47+
// CHECK: store i8 0, i8* [[PAD_6]]
48+
// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
49+
// CHECK: store i8 0, i8* [[PAD_7]]
50+
// CHECK: ret void
51+
void testBaz(Baz *baz) {
52+
__builtin_clear_padding(baz);
53+
}
54+
55+
struct UnsizedTail {
56+
int size;
57+
alignas(8) char buf[];
58+
59+
UnsizedTail(int size) : size(size) {}
60+
};
61+
62+
// UnsizedTail structure:
63+
// "size", PAD_1, PAD_2, PAD_3, PAD_4
64+
// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
65+
66+
// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
67+
// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
68+
// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
69+
// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
70+
// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
71+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
72+
// CHECK: store i8 0, i8* [[PAD_1]]
73+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
74+
// CHECK: store i8 0, i8* [[PAD_2]]
75+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
76+
// CHECK: store i8 0, i8* [[PAD_3]]
77+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
78+
// CHECK: store i8 0, i8* [[PAD_4]]
79+
// CHECK: ret void
80+
void testUnsizedTail(UnsizedTail *u) {
81+
__builtin_clear_padding(u);
82+
}
83+
84+
struct ArrOfStructsWithPadding {
85+
Bar bars[2];
86+
};
87+
88+
// ArrOfStructsWithPadding structure:
89+
// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
90+
// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
91+
92+
// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
93+
// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
94+
// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
95+
// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
96+
// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
97+
// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
98+
// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
99+
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
100+
// CHECK: store i8 0, i8* [[PAD_1]]
101+
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
102+
// CHECK: store i8 0, i8* [[PAD_2]]
103+
// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
104+
// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
105+
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
106+
// CHECK: store i8 0, i8* [[PAD_3]]
107+
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
108+
// CHECK: store i8 0, i8* [[PAD_4]]
109+
// CHECK: ret void
110+
void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
111+
__builtin_clear_padding(arr);
112+
}

0 commit comments

Comments
 (0)