Skip to content

Commit 06704ab

Browse files
jhuber6ronlieb
authored andcommitted
[OpenMP] Rework handling of global ctor/dtors in OpenMP (llvm#71739)
Summary: This patch reworks how we handle global constructors in OpenMP. Previously, we emitted individual kernels that were all registered and called individually. In order to provide more generic support, this patch moves all handling of this to the target backend and the runtime plugin. This has the benefit of supporting the GNU extensions for constructors an destructors, removing a class of failures related to shared library destruction order, and allows targets other than OpenMP to use the same support without needing to change the frontend. This is primarily done by calling kernels that the backend emits to iterate a list of ctor / dtor functions. For x64, this is automatic and we get it for free with the standard `dlopen` handling. For AMDGPU, we emit `amdgcn.device.init` and `amdgcn.device.fini` functions which handle everything atuomatically and simply need to be called. For NVPTX, a patch llvm#71549 provides the kernels to call, but the runtime needs to set up the array manually by pulling out all the known constructor / destructor functions. One concession that this patch requires is the change that for GPU targets in OpenMP offloading we will use `llvm.global_dtors` instead of using `atexit`. This is because `atexit` is a separate runtime function that does not mesh well with the handling we're trying to do here. This should be equivalent in all cases except for cases where we would need to destruct manually such as: ``` struct S { ~S() { foo(); } }; void foo() { static S s; } ``` However this is broken in many other ways on the GPU, so it is not regressing any support, simply increasing the scope of what we can handle. This changes the handling of ctors / dtors. This patch now outputs a information message regarding the deprecation if the old format is used. This will be completely removed in a later release. Depends on: llvm#71549 Change-Id: I99d449b4ca8c590a99fbd84774c673a4d49300a4
1 parent d9610da commit 06704ab

File tree

21 files changed

+328
-218
lines changed

21 files changed

+328
-218
lines changed

clang/include/clang/Basic/LangOptions.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@ class LangOptions : public LangOptionsBase {
597597
return !requiresStrictPrototypes() && !OpenCL;
598598
}
599599

600+
/// Returns true if the language supports calling the 'atexit' function.
601+
bool hasAtExit() const { return !(OpenMP && OpenMPIsTargetDevice); }
602+
600603
/// Returns true if implicit int is part of the language requirements.
601604
bool isImplicitIntRequired() const { return !CPlusPlus && !C99; }
602605

clang/lib/CodeGen/CGDeclCXX.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,15 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
327327
registerGlobalDtorWithAtExit(dtorStub);
328328
}
329329

330+
/// Register a global destructor using the LLVM 'llvm.global_dtors' global.
331+
void CodeGenFunction::registerGlobalDtorWithLLVM(const VarDecl &VD,
332+
llvm::FunctionCallee Dtor,
333+
llvm::Constant *Addr) {
334+
// Create a function which calls the destructor.
335+
llvm::Function *dtorStub = createAtExitStub(VD, Dtor, Addr);
336+
CGM.AddGlobalDtor(dtorStub);
337+
}
338+
330339
void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) {
331340
// extern "C" int atexit(void (*f)(void));
332341
assert(dtorStub->getType() ==
@@ -519,10 +528,6 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
519528
D->hasAttr<CUDASharedAttr>()))
520529
return;
521530

522-
if (getLangOpts().OpenMP &&
523-
getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))
524-
return;
525-
526531
// Check if we've already initialized this decl.
527532
auto I = DelayedCXXInitPosition.find(D);
528533
if (I != DelayedCXXInitPosition.end() && I->second == ~0U)

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 0 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1768,136 +1768,6 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
17681768
return nullptr;
17691769
}
17701770

1771-
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1772-
llvm::GlobalVariable *Addr,
1773-
bool PerformInit) {
1774-
if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1775-
!CGM.getLangOpts().OpenMPIsTargetDevice)
1776-
return false;
1777-
std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1778-
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1779-
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1780-
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1781-
*Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1782-
HasRequiresUnifiedSharedMemory))
1783-
return CGM.getLangOpts().OpenMPIsTargetDevice;
1784-
VD = VD->getDefinition(CGM.getContext());
1785-
assert(VD && "Unknown VarDecl");
1786-
1787-
if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1788-
return CGM.getLangOpts().OpenMPIsTargetDevice;
1789-
1790-
QualType ASTTy = VD->getType();
1791-
SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1792-
1793-
// Produce the unique prefix to identify the new target regions. We use
1794-
// the source location of the variable declaration which we know to not
1795-
// conflict with any target region.
1796-
llvm::TargetRegionEntryInfo EntryInfo =
1797-
getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName());
1798-
SmallString<128> Buffer, Out;
1799-
OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1800-
1801-
const Expr *Init = VD->getAnyInitializer();
1802-
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1803-
llvm::Constant *Ctor;
1804-
llvm::Constant *ID;
1805-
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1806-
// Generate function that re-emits the declaration's initializer into
1807-
// the threadprivate copy of the variable VD
1808-
CodeGenFunction CtorCGF(CGM);
1809-
1810-
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1811-
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1812-
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1813-
FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1814-
llvm::GlobalValue::WeakODRLinkage);
1815-
Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1816-
if (CGM.getTriple().isAMDGCN())
1817-
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1818-
auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1819-
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1820-
FunctionArgList(), Loc, Loc);
1821-
auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1822-
llvm::Constant *AddrInAS0 = Addr;
1823-
if (Addr->getAddressSpace() != 0)
1824-
AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1825-
Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1826-
CtorCGF.EmitAnyExprToMem(Init,
1827-
Address(AddrInAS0, Addr->getValueType(),
1828-
CGM.getContext().getDeclAlign(VD)),
1829-
Init->getType().getQualifiers(),
1830-
/*IsInitializer=*/true);
1831-
CtorCGF.FinishFunction();
1832-
Ctor = Fn;
1833-
ID = Fn;
1834-
} else {
1835-
Ctor = new llvm::GlobalVariable(
1836-
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1837-
llvm::GlobalValue::PrivateLinkage,
1838-
llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1839-
ID = Ctor;
1840-
}
1841-
1842-
// Register the information for the entry associated with the constructor.
1843-
Out.clear();
1844-
auto CtorEntryInfo = EntryInfo;
1845-
CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1846-
OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1847-
CtorEntryInfo, Ctor, ID,
1848-
llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1849-
}
1850-
if (VD->getType().isDestructedType() != QualType::DK_none) {
1851-
llvm::Constant *Dtor;
1852-
llvm::Constant *ID;
1853-
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1854-
// Generate function that emits destructor call for the threadprivate
1855-
// copy of the variable VD
1856-
CodeGenFunction DtorCGF(CGM);
1857-
1858-
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1859-
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1860-
llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1861-
FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1862-
llvm::GlobalValue::WeakODRLinkage);
1863-
Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1864-
if (CGM.getTriple().isAMDGCN())
1865-
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1866-
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1867-
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1868-
FunctionArgList(), Loc, Loc);
1869-
// Create a scope with an artificial location for the body of this
1870-
// function.
1871-
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1872-
llvm::Constant *AddrInAS0 = Addr;
1873-
if (Addr->getAddressSpace() != 0)
1874-
AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1875-
Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1876-
DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1877-
CGM.getContext().getDeclAlign(VD)),
1878-
ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1879-
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1880-
DtorCGF.FinishFunction();
1881-
Dtor = Fn;
1882-
ID = Fn;
1883-
} else {
1884-
Dtor = new llvm::GlobalVariable(
1885-
CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1886-
llvm::GlobalValue::PrivateLinkage,
1887-
llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1888-
ID = Dtor;
1889-
}
1890-
// Register the information for the entry associated with the destructor.
1891-
Out.clear();
1892-
auto DtorEntryInfo = EntryInfo;
1893-
DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1894-
OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1895-
DtorEntryInfo, Dtor, ID,
1896-
llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1897-
}
1898-
return CGM.getLangOpts().OpenMPIsTargetDevice;
1899-
}
1900-
19011771
void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
19021772
llvm::GlobalValue *GV) {
19031773
std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,14 +1090,6 @@ class CGOpenMPRuntime {
10901090
SourceLocation Loc, bool PerformInit,
10911091
CodeGenFunction *CGF = nullptr);
10921092

1093-
/// Emit a code for initialization of declare target variable.
1094-
/// \param VD Declare target variable.
1095-
/// \param Addr Address of the global variable \a VD.
1096-
/// \param PerformInit true if initialization expression is not constant.
1097-
virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD,
1098-
llvm::GlobalVariable *Addr,
1099-
bool PerformInit);
1100-
11011093
/// Emit code for handling declare target functions in the runtime.
11021094
/// \param FD Declare target function.
11031095
/// \param Addr Address of the global \a FD.

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4605,6 +4605,11 @@ class CodeGenFunction : public CodeGenTypeCache {
46054605
void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn,
46064606
llvm::Constant *addr);
46074607

4608+
/// Registers the dtor using 'llvm.global_dtors' for platforms that do not
4609+
/// support an 'atexit()' function.
4610+
void registerGlobalDtorWithLLVM(const VarDecl &D, llvm::FunctionCallee fn,
4611+
llvm::Constant *addr);
4612+
46084613
/// Call atexit() with function dtorStub.
46094614
void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub);
46104615

clang/lib/CodeGen/CodeGenModule.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1939,6 +1939,13 @@ class CodeGenModule : public CodeGenTypeCache {
19391939
const VarDecl *D,
19401940
ForDefinition_t IsForDefinition = NotForDefinition);
19411941

1942+
// FIXME: Hardcoding priority here is gross.
1943+
void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
1944+
unsigned LexOrder = ~0U,
1945+
llvm::Constant *AssociatedData = nullptr);
1946+
void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
1947+
bool IsDtorAttrFunc = false);
1948+
19421949
private:
19431950
llvm::Constant *GetOrCreateLLVMFunction(
19441951
StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
@@ -2010,13 +2017,6 @@ class CodeGenModule : public CodeGenTypeCache {
20102017
void EmitPointerToInitFunc(const VarDecl *VD, llvm::GlobalVariable *Addr,
20112018
llvm::Function *InitFunc, InitSegAttr *ISA);
20122019

2013-
// FIXME: Hardcoding priority here is gross.
2014-
void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
2015-
unsigned LexOrder = ~0U,
2016-
llvm::Constant *AssociatedData = nullptr);
2017-
void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
2018-
bool IsDtorAttrFunc = false);
2019-
20202020
/// EmitCtorList - Generates a global array of functions and priorities using
20212021
/// the given list and name. This array will have appending linkage and is
20222022
/// suitable for use as a LLVM constructor or destructor array. Clears Fns.

clang/lib/CodeGen/ItaniumCXXABI.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2794,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
27942794
if (D.isNoDestroy(CGM.getContext()))
27952795
return;
27962796

2797+
// OpenMP offloading supports C++ constructors and destructors but we do not
2798+
// always have 'atexit' available. Instead lower these to use the LLVM global
2799+
// destructors which we can handle directly in the runtime. Note that this is
2800+
// not strictly 1-to-1 with using `atexit` because we no longer tear down
2801+
// globals in reverse order of when they were constructed.
2802+
if (!CGM.getLangOpts().hasAtExit() && !D.isStaticLocal())
2803+
return CGF.registerGlobalDtorWithLLVM(D, dtor, addr);
2804+
27972805
// emitGlobalDtorWithCXAAtExit will emit a call to either __cxa_thread_atexit
27982806
// or __cxa_atexit depending on whether this VarDecl is a thread-local storage
27992807
// or not. CXAAtExit controls only __cxa_atexit, so use it if it is enabled.

clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
3838

3939

4040
#pragma omp end declare target
41-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fabsf_f32_l14_ctor
41+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
4242
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
4343
// CHECK-NEXT: entry:
4444
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -52,7 +52,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
5252
// CHECK-NEXT: ret void
5353
//
5454
//
55-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fabs_f32_l15_ctor
55+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
5656
// CHECK-SAME: () #[[ATTR0]] {
5757
// CHECK-NEXT: entry:
5858
// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -72,7 +72,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
7272
// CHECK-NEXT: ret void
7373
//
7474
//
75-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_sinf_f32_l17_ctor
75+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
7676
// CHECK-SAME: () #[[ATTR0]] {
7777
// CHECK-NEXT: entry:
7878
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -86,7 +86,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
8686
// CHECK-NEXT: ret void
8787
//
8888
//
89-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_sin_f32_l18_ctor
89+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
9090
// CHECK-SAME: () #[[ATTR0]] {
9191
// CHECK-NEXT: entry:
9292
// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -106,7 +106,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
106106
// CHECK-NEXT: ret void
107107
//
108108
//
109-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_cosf_f32_l20_ctor
109+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
110110
// CHECK-SAME: () #[[ATTR0]] {
111111
// CHECK-NEXT: entry:
112112
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -120,7 +120,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
120120
// CHECK-NEXT: ret void
121121
//
122122
//
123-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_cos_f32_l21_ctor
123+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.5
124124
// CHECK-SAME: () #[[ATTR0]] {
125125
// CHECK-NEXT: entry:
126126
// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -140,7 +140,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
140140
// CHECK-NEXT: ret void
141141
//
142142
//
143-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fmaf_f32_l23_ctor
143+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.6
144144
// CHECK-SAME: () #[[ATTR0]] {
145145
// CHECK-NEXT: entry:
146146
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -162,7 +162,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
162162
// CHECK-NEXT: ret void
163163
//
164164
//
165-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fma_f32_l24_ctor
165+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.7
166166
// CHECK-SAME: () #[[ATTR0]] {
167167
// CHECK-NEXT: entry:
168168
// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -198,7 +198,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
198198
// CHECK-NEXT: ret void
199199
//
200200
//
201-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_min_f32_l27_ctor
201+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.8
202202
// CHECK-SAME: () #[[ATTR0]] {
203203
// CHECK-NEXT: entry:
204204
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -216,7 +216,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
216216
// CHECK-NEXT: ret void
217217
//
218218
//
219-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_max_f32_l28_ctor
219+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.9
220220
// CHECK-SAME: () #[[ATTR0]] {
221221
// CHECK-NEXT: entry:
222222
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -234,23 +234,23 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
234234
// CHECK-NEXT: ret void
235235
//
236236
//
237-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fmin_f32_l30_ctor
237+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.10
238238
// CHECK-SAME: () #[[ATTR0]] {
239239
// CHECK-NEXT: entry:
240240
// CHECK-NEXT: [[CALL:%.*]] = call noundef float @_Z4fminff(float noundef 2.000000e+00, float noundef -4.000000e+00) #[[ATTR4:[0-9]+]]
241241
// CHECK-NEXT: store float [[CALL]], ptr addrspacecast (ptr addrspace(1) @_ZL18constexpr_fmin_f32 to ptr), align 4
242242
// CHECK-NEXT: ret void
243243
//
244244
//
245-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fmax_f32_l31_ctor
245+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.11
246246
// CHECK-SAME: () #[[ATTR0]] {
247247
// CHECK-NEXT: entry:
248248
// CHECK-NEXT: [[CALL:%.*]] = call noundef float @_Z4fmaxff(float noundef 2.000000e+00, float noundef -4.000000e+00) #[[ATTR4]]
249249
// CHECK-NEXT: store float [[CALL]], ptr addrspacecast (ptr addrspace(1) @_ZL18constexpr_fmax_f32 to ptr), align 4
250250
// CHECK-NEXT: ret void
251251
//
252252
//
253-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fminf_f32_l33_ctor
253+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.12
254254
// CHECK-SAME: () #[[ATTR0]] {
255255
// CHECK-NEXT: entry:
256256
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -268,7 +268,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
268268
// CHECK-NEXT: ret void
269269
//
270270
//
271-
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fmaxf_f32_l34_ctor
271+
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.13
272272
// CHECK-SAME: () #[[ATTR0]] {
273273
// CHECK-NEXT: entry:
274274
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
@@ -285,3 +285,23 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
285285
// CHECK-NEXT: store float [[TMP2]], ptr addrspacecast (ptr addrspace(1) @_ZL19constexpr_fmaxf_f32 to ptr), align 4
286286
// CHECK-NEXT: ret void
287287
//
288+
//
289+
// CHECK-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_amdgcn_openmp_device_math_constexpr.cpp
290+
// CHECK-SAME: () #[[ATTR0]] {
291+
// CHECK-NEXT: entry:
292+
// CHECK-NEXT: call void @__cxx_global_var_init()
293+
// CHECK-NEXT: call void @__cxx_global_var_init.1()
294+
// CHECK-NEXT: call void @__cxx_global_var_init.2()
295+
// CHECK-NEXT: call void @__cxx_global_var_init.3()
296+
// CHECK-NEXT: call void @__cxx_global_var_init.4()
297+
// CHECK-NEXT: call void @__cxx_global_var_init.5()
298+
// CHECK-NEXT: call void @__cxx_global_var_init.6()
299+
// CHECK-NEXT: call void @__cxx_global_var_init.7()
300+
// CHECK-NEXT: call void @__cxx_global_var_init.8()
301+
// CHECK-NEXT: call void @__cxx_global_var_init.9()
302+
// CHECK-NEXT: call void @__cxx_global_var_init.10()
303+
// CHECK-NEXT: call void @__cxx_global_var_init.11()
304+
// CHECK-NEXT: call void @__cxx_global_var_init.12()
305+
// CHECK-NEXT: call void @__cxx_global_var_init.13()
306+
// CHECK-NEXT: ret void
307+
//

0 commit comments

Comments
 (0)