@@ -1972,6 +1972,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
1972
1972
return ;
1973
1973
if (const RecordDecl *GlobalizedVarsRecord = I->getSecond ().GlobalRecord ) {
1974
1974
QualType GlobalRecTy = CGM.getContext ().getRecordType (GlobalizedVarsRecord);
1975
+ QualType SecGlobalRecTy;
1975
1976
1976
1977
// Recover pointer to this function's global record. The runtime will
1977
1978
// handle the specifics of the allocation of the memory.
@@ -1986,11 +1987,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
1986
1987
llvm::PointerType *GlobalRecPtrTy =
1987
1988
CGF.ConvertTypeForMem (GlobalRecTy)->getPointerTo ();
1988
1989
llvm::Value *GlobalRecCastAddr;
1990
+ llvm::Value *IsTTD = nullptr ;
1989
1991
if (WithSPMDCheck ||
1990
1992
getExecutionMode () == CGOpenMPRuntimeNVPTX::EM_Unknown) {
1991
1993
llvm::BasicBlock *ExitBB = CGF.createBasicBlock (" .exit" );
1992
1994
llvm::BasicBlock *SPMDBB = CGF.createBasicBlock (" .spmd" );
1993
1995
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock (" .non-spmd" );
1996
+ if (I->getSecond ().SecondaryGlobalRecord .hasValue ()) {
1997
+ llvm::Value *RTLoc = emitUpdateLocation (CGF, Loc);
1998
+ llvm::Value *ThreadID = getThreadID (CGF, Loc);
1999
+ llvm::Value *PL = CGF.EmitRuntimeCall (
2000
+ createNVPTXRuntimeFunction (OMPRTL_NVPTX__kmpc_parallel_level),
2001
+ {RTLoc, ThreadID});
2002
+ IsTTD = Bld.CreateIsNull (PL);
2003
+ }
1994
2004
llvm::Value *IsSPMD = Bld.CreateIsNotNull (CGF.EmitNounwindRuntimeCall (
1995
2005
createNVPTXRuntimeFunction (OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
1996
2006
Bld.CreateCondBr (IsSPMD, SPMDBB, NonSPMDBB);
@@ -2003,11 +2013,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
2003
2013
// There is no need to emit line number for unconditional branch.
2004
2014
(void )ApplyDebugLocation::CreateEmpty (CGF);
2005
2015
CGF.EmitBlock (NonSPMDBB);
2016
+ llvm::Value *Size = llvm::ConstantInt::get (CGM.SizeTy , GlobalRecordSize);
2017
+ if (const RecordDecl *SecGlobalizedVarsRecord =
2018
+ I->getSecond ().SecondaryGlobalRecord .getValueOr (nullptr )) {
2019
+ SecGlobalRecTy =
2020
+ CGM.getContext ().getRecordType (SecGlobalizedVarsRecord);
2021
+
2022
+ // Recover pointer to this function's global record. The runtime will
2023
+ // handle the specifics of the allocation of the memory.
2024
+ // Use actual memory size of the record including the padding
2025
+ // for alignment purposes.
2026
+ unsigned Alignment =
2027
+ CGM.getContext ().getTypeAlignInChars (SecGlobalRecTy).getQuantity ();
2028
+ unsigned GlobalRecordSize =
2029
+ CGM.getContext ().getTypeSizeInChars (SecGlobalRecTy).getQuantity ();
2030
+ GlobalRecordSize = llvm::alignTo (GlobalRecordSize, Alignment);
2031
+ Size = Bld.CreateSelect (
2032
+ IsTTD, llvm::ConstantInt::get (CGM.SizeTy , GlobalRecordSize), Size);
2033
+ }
2006
2034
// TODO: allow the usage of shared memory to be controlled by
2007
2035
// the user, for now, default to global.
2008
2036
llvm::Value *GlobalRecordSizeArg[] = {
2009
- llvm::ConstantInt::get (CGM.SizeTy , GlobalRecordSize),
2010
- CGF.Builder .getInt16 (/* UseSharedMemory=*/ 0 )};
2037
+ Size, CGF.Builder .getInt16 (/* UseSharedMemory=*/ 0 )};
2011
2038
llvm::Value *GlobalRecValue =
2012
2039
CGF.EmitRuntimeCall (createNVPTXRuntimeFunction (
2013
2040
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
@@ -2042,6 +2069,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
2042
2069
2043
2070
// Emit the "global alloca" which is a GEP from the global declaration
2044
2071
// record using the pointer returned by the runtime.
2072
+ LValue SecBase;
2073
+ decltype (I->getSecond ().LocalVarData )::const_iterator SecIt;
2074
+ if (IsTTD) {
2075
+ SecIt = I->getSecond ().SecondaryLocalVarData ->begin ();
2076
+ llvm::PointerType *SecGlobalRecPtrTy =
2077
+ CGF.ConvertTypeForMem (SecGlobalRecTy)->getPointerTo ();
2078
+ SecBase = CGF.MakeNaturalAlignPointeeAddrLValue (
2079
+ Bld.CreatePointerBitCastOrAddrSpaceCast (
2080
+ I->getSecond ().GlobalRecordAddr , SecGlobalRecPtrTy),
2081
+ SecGlobalRecTy);
2082
+ }
2045
2083
for (auto &Rec : I->getSecond ().LocalVarData ) {
2046
2084
bool EscapedParam = I->getSecond ().EscapedParameters .count (Rec.first );
2047
2085
llvm::Value *ParValue;
@@ -2055,23 +2093,32 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
2055
2093
// Emit VarAddr basing on lane-id if required.
2056
2094
QualType VarTy;
2057
2095
if (Rec.second .IsOnePerTeam ) {
2058
- Rec.second .PrivateAddr = VarAddr.getAddress ();
2059
2096
VarTy = Rec.second .FD ->getType ();
2060
2097
} else {
2061
2098
llvm::Value *Ptr = CGF.Builder .CreateInBoundsGEP (
2062
2099
VarAddr.getAddress ().getPointer (),
2063
2100
{Bld.getInt32 (0 ), getNVPTXLaneID (CGF)});
2064
- Rec.second .PrivateAddr =
2065
- Address (Ptr, CGM.getContext ().getDeclAlign (Rec.first ));
2066
2101
VarTy =
2067
2102
Rec.second .FD ->getType ()->castAsArrayTypeUnsafe ()->getElementType ();
2068
- VarAddr = CGF.MakeAddrLValue (Rec.second .PrivateAddr , VarTy,
2069
- AlignmentSource::Decl);
2103
+ VarAddr = CGF.MakeAddrLValue (
2104
+ Address (Ptr, CGM.getContext ().getDeclAlign (Rec.first )), VarTy,
2105
+ AlignmentSource::Decl);
2070
2106
}
2107
+ Rec.second .PrivateAddr = VarAddr.getAddress ();
2071
2108
if (WithSPMDCheck ||
2072
- getExecutionMode () == CGOpenMPRuntimeNVPTX::EM_Unknown) {
2109
+ getExecutionMode () == CGOpenMPRuntimeNVPTX::EM_Unknown) {
2073
2110
assert (I->getSecond ().IsInSPMDModeFlag &&
2074
2111
" Expected unknown execution mode or required SPMD check." );
2112
+ if (IsTTD) {
2113
+ assert (SecIt->second .IsOnePerTeam &&
2114
+ " Secondary glob data must be one per team." );
2115
+ LValue SecVarAddr = CGF.EmitLValueForField (SecBase, SecIt->second .FD );
2116
+ VarAddr.setAddress (
2117
+ Address (Bld.CreateSelect (IsTTD, SecVarAddr.getPointer (),
2118
+ VarAddr.getPointer ()),
2119
+ VarAddr.getAlignment ()));
2120
+ Rec.second .PrivateAddr = VarAddr.getAddress ();
2121
+ }
2075
2122
Address GlobalPtr = Rec.second .PrivateAddr ;
2076
2123
Address LocalAddr = CGF.CreateMemTemp (VarTy, Rec.second .FD ->getName ());
2077
2124
Rec.second .PrivateAddr = Address (
@@ -2084,6 +2131,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
2084
2131
CGF.EmitStoreOfScalar (ParValue, VarAddr);
2085
2132
I->getSecond ().MappedParams ->setVarAddr (CGF, VD, VarAddr.getAddress ());
2086
2133
}
2134
+ ++SecIt;
2087
2135
}
2088
2136
}
2089
2137
for (const ValueDecl *VD : I->getSecond ().EscapedVariableLengthDecls ) {
@@ -4115,6 +4163,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
4115
4163
Data.insert (
4116
4164
std::make_pair (VD, MappedVarData (FD, IsInTargetMasterThreadRegion)));
4117
4165
}
4166
+ if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization &&
4167
+ !IsInParallelRegion) {
4168
+ CheckVarsEscapingDeclContext VarChecker (CGF);
4169
+ VarChecker.Visit (Body);
4170
+ I->getSecond ().SecondaryGlobalRecord =
4171
+ VarChecker.getGlobalizedRecord (/* IsInTargetMasterThreadRegion=*/ true );
4172
+ I->getSecond ().SecondaryLocalVarData .emplace ();
4173
+ DeclToAddrMapTy &Data = I->getSecond ().SecondaryLocalVarData .getValue ();
4174
+ for (const ValueDecl *VD : VarChecker.getEscapedDecls ()) {
4175
+ assert (VD->isCanonicalDecl () && " Expected canonical declaration" );
4176
+ const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar (VD);
4177
+ Data.insert (std::make_pair (
4178
+ VD, MappedVarData (FD, /* IsInTargetMasterThreadRegion=*/ true )));
4179
+ }
4180
+ }
4118
4181
if (!NeedToDelayGlobalization) {
4119
4182
emitGenericVarsProlog (CGF, D->getBeginLoc (), /* WithSPMDCheck=*/ true );
4120
4183
struct GlobalizationScope final : EHScopeStack::Cleanup {
0 commit comments