Skip to content

Commit b47928a

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:237adfca4ef8 into amd-gfx:5c6a35ee21ea
Local branch amd-gfx 5c6a35e Merged main:dbd00c3b5d8a into amd-gfx:bf826207d2df Remote branch main 237adfc [OpenMP] Rework handling of global ctor/dtors in OpenMP (llvm#71739)
2 parents 5c6a35e + 237adfc commit b47928a

File tree

83 files changed

+1396
-349
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+1396
-349
lines changed

clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,13 @@ void IncludeSorter::addInclude(StringRef FileName, bool IsAngled,
134134
int Offset = findNextLine(SourceMgr->getCharacterData(EndLocation));
135135

136136
// Record the relevant location information for this inclusion directive.
137-
IncludeLocations[FileName].push_back(
137+
auto &IncludeLocation = IncludeLocations[FileName];
138+
IncludeLocation.push_back(
138139
SourceRange(HashLocation, EndLocation.getLocWithOffset(Offset)));
139-
SourceLocations.push_back(IncludeLocations[FileName].back());
140+
SourceLocations.push_back(IncludeLocation.back());
140141

141142
// Stop if this inclusion is a duplicate.
142-
if (IncludeLocations[FileName].size() > 1)
143+
if (IncludeLocation.size() > 1)
143144
return;
144145

145146
// Add the included file's name to the appropriate bucket.

clang/include/clang/Basic/LangOptions.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@ class LangOptions : public LangOptionsBase {
597597
return !requiresStrictPrototypes() && !OpenCL;
598598
}
599599

600+
/// Returns true if the language supports calling the 'atexit' function.
601+
bool hasAtExit() const { return !(OpenMP && OpenMPIsTargetDevice); }
602+
600603
/// Returns true if implicit int is part of the language requirements.
601604
bool isImplicitIntRequired() const { return !CPlusPlus && !C99; }
602605

clang/lib/Basic/Targets/BPF.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,37 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
2929
MacroBuilder &Builder) const {
3030
Builder.defineMacro("__bpf__");
3131
Builder.defineMacro("__BPF__");
32+
33+
std::string CPU = getTargetOpts().CPU;
34+
if (CPU == "probe") {
35+
Builder.defineMacro("__BPF_CPU_VERSION__", "0");
36+
return;
37+
}
38+
if (CPU.empty() || CPU == "generic" || CPU == "v1") {
39+
Builder.defineMacro("__BPF_CPU_VERSION__", "1");
40+
return;
41+
}
42+
43+
std::string CpuVerNumStr = CPU.substr(1);
44+
Builder.defineMacro("__BPF_CPU_VERSION__", CpuVerNumStr);
45+
46+
int CpuVerNum = std::stoi(CpuVerNumStr);
47+
if (CpuVerNum >= 2)
48+
Builder.defineMacro("__BPF_FEATURE_JMP_EXT");
49+
50+
if (CpuVerNum >= 3) {
51+
Builder.defineMacro("__BPF_FEATURE_JMP32");
52+
Builder.defineMacro("__BPF_FEATURE_ALU32");
53+
}
54+
55+
if (CpuVerNum >= 4) {
56+
Builder.defineMacro("__BPF_FEATURE_LDSX");
57+
Builder.defineMacro("__BPF_FEATURE_MOVSX");
58+
Builder.defineMacro("__BPF_FEATURE_BSWAP");
59+
Builder.defineMacro("__BPF_FEATURE_SDIV_SMOD");
60+
Builder.defineMacro("__BPF_FEATURE_GOTOL");
61+
Builder.defineMacro("__BPF_FEATURE_ST");
62+
}
3263
}
3364

3465
static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2",

clang/lib/CodeGen/CGDeclCXX.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,15 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
327327
registerGlobalDtorWithAtExit(dtorStub);
328328
}
329329

330+
/// Register a global destructor using the LLVM 'llvm.global_dtors' global.
331+
void CodeGenFunction::registerGlobalDtorWithLLVM(const VarDecl &VD,
332+
llvm::FunctionCallee Dtor,
333+
llvm::Constant *Addr) {
334+
// Create a function which calls the destructor.
335+
llvm::Function *dtorStub = createAtExitStub(VD, Dtor, Addr);
336+
CGM.AddGlobalDtor(dtorStub);
337+
}
338+
330339
void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) {
331340
// extern "C" int atexit(void (*f)(void));
332341
assert(dtorStub->getType() ==
@@ -519,10 +528,6 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
519528
D->hasAttr<CUDASharedAttr>()))
520529
return;
521530

522-
if (getLangOpts().OpenMP &&
523-
getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))
524-
return;
525-
526531
// Check if we've already initialized this decl.
527532
auto I = DelayedCXXInitPosition.find(D);
528533
if (I != DelayedCXXInitPosition.end() && I->second == ~0U)

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 0 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,136 +1747,6 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
17471747
return nullptr;
17481748
}
17491749

1750-
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1751-
llvm::GlobalVariable *Addr,
1752-
bool PerformInit) {
1753-
if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1754-
!CGM.getLangOpts().OpenMPIsTargetDevice)
1755-
return false;
1756-
std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1757-
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1758-
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1759-
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1760-
*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1761-
HasRequiresUnifiedSharedMemory))
1762-
return CGM.getLangOpts().OpenMPIsTargetDevice;
1763-
VD = VD->getDefinition(CGM.getContext());
1764-
assert(VD && "Unknown VarDecl");
1765-
1766-
if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1767-
return CGM.getLangOpts().OpenMPIsTargetDevice;
1768-
1769-
QualType ASTTy = VD->getType();
1770-
SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1771-
1772-
// Produce the unique prefix to identify the new target regions. We use
1773-
// the source location of the variable declaration which we know to not
1774-
// conflict with any target region.
1775-
llvm::TargetRegionEntryInfo EntryInfo =
1776-
getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName());
1777-
SmallString<128> Buffer, Out;
1778-
OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1779-
1780-
const Expr *Init = VD->getAnyInitializer();
1781-
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1782-
llvm::Constant *Ctor;
1783-
llvm::Constant *ID;
1784-
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1785-
// Generate function that re-emits the declaration's initializer into
1786-
// the threadprivate copy of the variable VD
1787-
CodeGenFunction CtorCGF(CGM);
1788-
1789-
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1790-
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1791-
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1792-
FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1793-
llvm::GlobalValue::WeakODRLinkage);
1794-
Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1795-
if (CGM.getTriple().isAMDGCN())
1796-
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1797-
auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1798-
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1799-
FunctionArgList(), Loc, Loc);
1800-
auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1801-
llvm::Constant *AddrInAS0 = Addr;
1802-
if (Addr->getAddressSpace() != 0)
1803-
AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1804-
Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1805-
CtorCGF.EmitAnyExprToMem(Init,
1806-
Address(AddrInAS0, Addr->getValueType(),
1807-
CGM.getContext().getDeclAlign(VD)),
1808-
Init->getType().getQualifiers(),
1809-
/*IsInitializer=*/true);
1810-
CtorCGF.FinishFunction();
1811-
Ctor = Fn;
1812-
ID = Fn;
1813-
} else {
1814-
Ctor = new llvm::GlobalVariable(
1815-
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1816-
llvm::GlobalValue::PrivateLinkage,
1817-
llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1818-
ID = Ctor;
1819-
}
1820-
1821-
// Register the information for the entry associated with the constructor.
1822-
Out.clear();
1823-
auto CtorEntryInfo = EntryInfo;
1824-
CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1825-
OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1826-
CtorEntryInfo, Ctor, ID,
1827-
llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1828-
}
1829-
if (VD->getType().isDestructedType() != QualType::DK_none) {
1830-
llvm::Constant *Dtor;
1831-
llvm::Constant *ID;
1832-
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1833-
// Generate function that emits destructor call for the threadprivate
1834-
// copy of the variable VD
1835-
CodeGenFunction DtorCGF(CGM);
1836-
1837-
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1838-
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1839-
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1840-
FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1841-
llvm::GlobalValue::WeakODRLinkage);
1842-
Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1843-
if (CGM.getTriple().isAMDGCN())
1844-
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1845-
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1846-
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1847-
FunctionArgList(), Loc, Loc);
1848-
// Create a scope with an artificial location for the body of this
1849-
// function.
1850-
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1851-
llvm::Constant *AddrInAS0 = Addr;
1852-
if (Addr->getAddressSpace() != 0)
1853-
AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1854-
Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1855-
DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1856-
CGM.getContext().getDeclAlign(VD)),
1857-
ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1858-
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1859-
DtorCGF.FinishFunction();
1860-
Dtor = Fn;
1861-
ID = Fn;
1862-
} else {
1863-
Dtor = new llvm::GlobalVariable(
1864-
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1865-
llvm::GlobalValue::PrivateLinkage,
1866-
llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1867-
ID = Dtor;
1868-
}
1869-
// Register the information for the entry associated with the destructor.
1870-
Out.clear();
1871-
auto DtorEntryInfo = EntryInfo;
1872-
DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1873-
OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1874-
DtorEntryInfo, Dtor, ID,
1875-
llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1876-
}
1877-
return CGM.getLangOpts().OpenMPIsTargetDevice;
1878-
}
1879-
18801750
void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
18811751
llvm::GlobalValue *GV) {
18821752
std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,14 +1089,6 @@ class CGOpenMPRuntime {
10891089
SourceLocation Loc, bool PerformInit,
10901090
CodeGenFunction *CGF = nullptr);
10911091

1092-
/// Emit a code for initialization of declare target variable.
1093-
/// \param VD Declare target variable.
1094-
/// \param Addr Address of the global variable \a VD.
1095-
/// \param PerformInit true if initialization expression is not constant.
1096-
virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD,
1097-
llvm::GlobalVariable *Addr,
1098-
bool PerformInit);
1099-
11001092
/// Emit code for handling declare target functions in the runtime.
11011093
/// \param FD Declare target function.
11021094
/// \param Addr Address of the global \a FD.

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4536,6 +4536,11 @@ class CodeGenFunction : public CodeGenTypeCache {
45364536
void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn,
45374537
llvm::Constant *addr);
45384538

4539+
/// Registers the dtor using 'llvm.global_dtors' for platforms that do not
4540+
/// support an 'atexit()' function.
4541+
void registerGlobalDtorWithLLVM(const VarDecl &D, llvm::FunctionCallee fn,
4542+
llvm::Constant *addr);
4543+
45394544
/// Call atexit() with function dtorStub.
45404545
void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub);
45414546

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,6 +1570,13 @@ class CodeGenModule : public CodeGenTypeCache {
15701570
const VarDecl *D,
15711571
ForDefinition_t IsForDefinition = NotForDefinition);
15721572

1573+
// FIXME: Hardcoding priority here is gross.
1574+
void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
1575+
unsigned LexOrder = ~0U,
1576+
llvm::Constant *AssociatedData = nullptr);
1577+
void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
1578+
bool IsDtorAttrFunc = false);
1579+
15731580
private:
15741581
llvm::Constant *GetOrCreateLLVMFunction(
15751582
StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
@@ -1641,13 +1648,6 @@ class CodeGenModule : public CodeGenTypeCache {
16411648
void EmitPointerToInitFunc(const VarDecl *VD, llvm::GlobalVariable *Addr,
16421649
llvm::Function *InitFunc, InitSegAttr *ISA);
16431650

1644-
// FIXME: Hardcoding priority here is gross.
1645-
void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
1646-
unsigned LexOrder = ~0U,
1647-
llvm::Constant *AssociatedData = nullptr);
1648-
void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
1649-
bool IsDtorAttrFunc = false);
1650-
16511651
/// EmitCtorList - Generates a global array of functions and priorities using
16521652
/// the given list and name. This array will have appending linkage and is
16531653
/// suitable for use as a LLVM constructor or destructor array. Clears Fns.

clang/lib/CodeGen/ItaniumCXXABI.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2794,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
27942794
if (D.isNoDestroy(CGM.getContext()))
27952795
return;
27962796

2797+
// OpenMP offloading supports C++ constructors and destructors but we do not
2798+
// always have 'atexit' available. Instead lower these to use the LLVM global
2799+
// destructors which we can handle directly in the runtime. Note that this is
2800+
// not strictly 1-to-1 with using `atexit` because we no longer tear down
2801+
// globals in reverse order of when they were constructed.
2802+
if (!CGM.getLangOpts().hasAtExit() && !D.isStaticLocal())
2803+
return CGF.registerGlobalDtorWithLLVM(D, dtor, addr);
2804+
27972805
// emitGlobalDtorWithCXAAtExit will emit a call to either __cxa_thread_atexit
27982806
// or __cxa_atexit depending on whether this VarDecl is a thread-local storage
27992807
// or not. CXAAtExit controls only __cxa_atexit, so use it if it is enabled.

0 commit comments

Comments
 (0)