Skip to content

Commit 90b54fa

Browse files
committed
[OPENMP50]Codegen for use_device_addr clauses.
Summary: Added codegen for use_device_addr clause. The components of the list items are mapped as a kind of RETURN components and then the returned base address is used instead of the real address of the base declaration used in the use_device_addr expressions. Reviewers: jdoerfert Subscribers: yaxunl, guansong, sstefan1, cfe-commits, caomhin Tags: #clang Differential Revision: https://reviews.llvm.org/D80730
1 parent 69bdfb0 commit 90b54fa

File tree

6 files changed

+433
-54
lines changed

6 files changed

+433
-54
lines changed

clang/lib/AST/OpenMPClause.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,8 +1207,8 @@ OMPUseDevicePtrClause *OMPUseDevicePtrClause::Create(
12071207
Sizes.NumComponents = getComponentsTotalNumber(ComponentLists);
12081208

12091209
// We need to allocate:
1210-
// 3 x NumVars x Expr* - we have an original list expression for each clause
1211-
// list entry and an equal number of private copies and inits.
1210+
// NumVars x Expr* - we have an original list expression for each clause
1211+
// list entry.
12121212
// NumUniqueDeclarations x ValueDecl* - unique base declarations associated
12131213
// with each component list.
12141214
// (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 130 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -7031,7 +7031,7 @@ class MappableExprsHandler {
70317031
OMP_MAP_TARGET_PARAM = 0x20,
70327032
/// Signal that the runtime library has to return the device pointer
70337033
/// in the current position for the data being mapped. Used when we have the
7034-
/// use_device_ptr clause.
7034+
/// use_device_ptr or use_device_addr clause.
70357035
OMP_MAP_RETURN_PARAM = 0x40,
70367036
/// This flag signals that the reference being passed is a pointer to
70377037
/// private data.
@@ -7099,26 +7099,30 @@ class MappableExprsHandler {
70997099
ArrayRef<OpenMPMapModifierKind> MapModifiers;
71007100
bool ReturnDevicePointer = false;
71017101
bool IsImplicit = false;
7102+
bool ForDeviceAddr = false;
71027103

71037104
MapInfo() = default;
71047105
MapInfo(
71057106
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
71067107
OpenMPMapClauseKind MapType,
7107-
ArrayRef<OpenMPMapModifierKind> MapModifiers,
7108-
bool ReturnDevicePointer, bool IsImplicit)
7108+
ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7109+
bool IsImplicit, bool ForDeviceAddr = false)
71097110
: Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7110-
ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7111+
ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7112+
ForDeviceAddr(ForDeviceAddr) {}
71117113
};
71127114

7113-
/// If use_device_ptr is used on a pointer which is a struct member and there
7114-
/// is no map information about it, then emission of that entry is deferred
7115-
/// until the whole struct has been processed.
7115+
/// If use_device_ptr or use_device_addr is used on a decl which is a struct
7116+
/// member and there is no map information about it, then emission of that
7117+
/// entry is deferred until the whole struct has been processed.
71167118
struct DeferredDevicePtrEntryTy {
71177119
const Expr *IE = nullptr;
71187120
const ValueDecl *VD = nullptr;
7121+
bool ForDeviceAddr = false;
71197122

7120-
DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7121-
: IE(IE), VD(VD) {}
7123+
DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7124+
bool ForDeviceAddr)
7125+
: IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
71227126
};
71237127

71247128
/// The target directive from where the mappable clauses were extracted. It
@@ -7306,13 +7310,12 @@ class MappableExprsHandler {
73067310
/// \a IsFirstComponent should be set to true if the provided set of
73077311
/// components is the first associated with a capture.
73087312
void generateInfoForComponentList(
7309-
OpenMPMapClauseKind MapType,
7310-
ArrayRef<OpenMPMapModifierKind> MapModifiers,
7313+
OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
73117314
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
73127315
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
73137316
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
73147317
StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7315-
bool IsImplicit,
7318+
bool IsImplicit, bool ForDeviceAddr = false,
73167319
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
73177320
OverlappedElements = llvm::None) const {
73187321
// The following summarizes what has to be generated for each map and the
@@ -7623,8 +7626,8 @@ class MappableExprsHandler {
76237626
// If this component is a pointer inside the base struct then we don't
76247627
// need to create any entry for it - it will be combined with the object
76257628
// it is pointing to into a single PTR_AND_OBJ entry.
7626-
bool IsMemberPointer =
7627-
IsPointer && EncounteredME &&
7629+
bool IsMemberPointerOrAddr =
7630+
(IsPointer || ForDeviceAddr) && EncounteredME &&
76287631
(dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
76297632
EncounteredME);
76307633
if (!OverlappedElements.empty()) {
@@ -7691,7 +7694,7 @@ class MappableExprsHandler {
76917694
break;
76927695
}
76937696
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7694-
if (!IsMemberPointer) {
7697+
if (!IsMemberPointerOrAddr) {
76957698
BasePointers.push_back(BP.getPointer());
76967699
Pointers.push_back(LB.getPointer());
76977700
Sizes.push_back(
@@ -7952,17 +7955,18 @@ class MappableExprsHandler {
79527955

79537956
// Helper function to fill the information map for the different supported
79547957
// clauses.
7955-
auto &&InfoGen = [&Info](
7956-
const ValueDecl *D,
7957-
OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7958-
OpenMPMapClauseKind MapType,
7959-
ArrayRef<OpenMPMapModifierKind> MapModifiers,
7960-
bool ReturnDevicePointer, bool IsImplicit) {
7961-
const ValueDecl *VD =
7962-
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7963-
Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7964-
IsImplicit);
7965-
};
7958+
auto &&InfoGen =
7959+
[&Info](const ValueDecl *D,
7960+
OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7961+
OpenMPMapClauseKind MapType,
7962+
ArrayRef<OpenMPMapModifierKind> MapModifiers,
7963+
bool ReturnDevicePointer, bool IsImplicit,
7964+
bool ForDeviceAddr = false) {
7965+
const ValueDecl *VD =
7966+
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7967+
Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7968+
IsImplicit, ForDeviceAddr);
7969+
};
79667970

79677971
assert(CurDir.is<const OMPExecutableDirective *>() &&
79687972
"Expect a executable directive");
@@ -8032,7 +8036,7 @@ class MappableExprsHandler {
80328036
// partial struct.
80338037
InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
80348038
/*ReturnDevicePointer=*/false, C->isImplicit());
8035-
DeferredInfo[nullptr].emplace_back(IE, VD);
8039+
DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
80368040
} else {
80378041
llvm::Value *Ptr =
80388042
CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
@@ -8044,6 +8048,70 @@ class MappableExprsHandler {
80448048
}
80458049
}
80468050

8051+
// Look at the use_device_addr clause information and mark the existing map
8052+
// entries as such. If there is no map information for an entry in the
8053+
// use_device_addr list, we create one with map type 'alloc' and zero size
8054+
// section. It is the user fault if that was not mapped before. If there is
8055+
// no map information and the pointer is a struct member, then we defer the
8056+
// emission of that entry until the whole struct has been processed.
8057+
llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8058+
for (const auto *C :
8059+
CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8060+
for (const auto L : C->component_lists()) {
8061+
assert(!L.second.empty() && "Not expecting empty list of components!");
8062+
const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8063+
if (!Processed.insert(VD).second)
8064+
continue;
8065+
VD = cast<ValueDecl>(VD->getCanonicalDecl());
8066+
const Expr *IE = L.second.back().getAssociatedExpression();
8067+
// If the first component is a member expression, we have to look into
8068+
// 'this', which maps to null in the map of map information. Otherwise
8069+
// look directly for the information.
8070+
auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8071+
8072+
// We potentially have map information for this declaration already.
8073+
// Look for the first set of components that refer to it.
8074+
if (It != Info.end()) {
8075+
auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8076+
return MI.Components.back().getAssociatedDeclaration() == VD;
8077+
});
8078+
// If we found a map entry, signal that the pointer has to be returned
8079+
// and move on to the next declaration.
8080+
if (CI != It->second.end()) {
8081+
CI->ReturnDevicePointer = true;
8082+
continue;
8083+
}
8084+
}
8085+
8086+
// We didn't find any match in our map information - generate a zero
8087+
// size array section - if the pointer is a struct member we defer this
8088+
// action until the whole struct has been processed.
8089+
if (isa<MemberExpr>(IE)) {
8090+
// Insert the pointer into Info to be processed by
8091+
// generateInfoForComponentList. Because it is a member pointer
8092+
// without a pointee, no entry will be generated for it, therefore
8093+
// we need to generate one after the whole struct has been processed.
8094+
// Nonetheless, generateInfoForComponentList must be called to take
8095+
// the pointer into account for the calculation of the range of the
8096+
// partial struct.
8097+
InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8098+
/*ReturnDevicePointer=*/false, C->isImplicit(),
8099+
/*ForDeviceAddr=*/true);
8100+
DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8101+
} else {
8102+
llvm::Value *Ptr;
8103+
if (IE->isGLValue())
8104+
Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8105+
else
8106+
Ptr = CGF.EmitScalarExpr(IE);
8107+
BasePointers.emplace_back(Ptr, VD);
8108+
Pointers.push_back(Ptr);
8109+
Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8110+
Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8111+
}
8112+
}
8113+
}
8114+
80478115
for (const auto &M : Info) {
80488116
// We need to know when we generate information for the first component
80498117
// associated with a capture, because the mapping flags depend on it.
@@ -8062,10 +8130,10 @@ class MappableExprsHandler {
80628130

80638131
// Remember the current base pointer index.
80648132
unsigned CurrentBasePointersIdx = CurBasePointers.size();
8065-
generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8066-
CurBasePointers, CurPointers, CurSizes,
8067-
CurTypes, PartialStruct,
8068-
IsFirstComponentList, L.IsImplicit);
8133+
generateInfoForComponentList(
8134+
L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8135+
CurPointers, CurSizes, CurTypes, PartialStruct,
8136+
IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
80698137

80708138
// If this entry relates with a device pointer, set the relevant
80718139
// declaration and add the 'return pointer' flag.
@@ -8085,21 +8153,35 @@ class MappableExprsHandler {
80858153
}
80868154

80878155
// Append any pending zero-length pointers which are struct members and
8088-
// used with use_device_ptr.
8156+
// used with use_device_ptr or use_device_addr.
80898157
auto CI = DeferredInfo.find(M.first);
80908158
if (CI != DeferredInfo.end()) {
80918159
for (const DeferredDevicePtrEntryTy &L : CI->second) {
8092-
llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8093-
llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8094-
this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8160+
llvm::Value *BasePtr;
8161+
llvm::Value *Ptr;
8162+
if (L.ForDeviceAddr) {
8163+
if (L.IE->isGLValue())
8164+
Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8165+
else
8166+
Ptr = this->CGF.EmitScalarExpr(L.IE);
8167+
BasePtr = Ptr;
8168+
// Entry is RETURN_PARAM. Also, set the placeholder value
8169+
// MEMBER_OF=FFFF so that the entry is later updated with the
8170+
// correct value of MEMBER_OF.
8171+
CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8172+
} else {
8173+
BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8174+
Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8175+
L.IE->getExprLoc());
8176+
// Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8177+
// value MEMBER_OF=FFFF so that the entry is later updated with the
8178+
// correct value of MEMBER_OF.
8179+
CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8180+
OMP_MAP_MEMBER_OF);
8181+
}
80958182
CurBasePointers.emplace_back(BasePtr, L.VD);
80968183
CurPointers.push_back(Ptr);
80978184
CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8098-
// Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8099-
// value MEMBER_OF=FFFF so that the entry is later updated with the
8100-
// correct value of MEMBER_OF.
8101-
CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8102-
OMP_MAP_MEMBER_OF);
81038185
}
81048186
}
81058187

@@ -8168,10 +8250,10 @@ class MappableExprsHandler {
81688250
for (const MapInfo &L : M.second) {
81698251
assert(!L.Components.empty() &&
81708252
"Not expecting declaration with no component lists.");
8171-
generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8172-
CurBasePointers, CurPointers, CurSizes,
8173-
CurTypes, PartialStruct,
8174-
IsFirstComponentList, L.IsImplicit);
8253+
generateInfoForComponentList(
8254+
L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8255+
CurPointers, CurSizes, CurTypes, PartialStruct,
8256+
IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
81758257
IsFirstComponentList = false;
81768258
}
81778259

@@ -8437,10 +8519,10 @@ class MappableExprsHandler {
84378519
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
84388520
OverlappedComponents = Pair.getSecond();
84398521
bool IsFirstComponentList = true;
8440-
generateInfoForComponentList(MapType, MapModifiers, Components,
8441-
BasePointers, Pointers, Sizes, Types,
8442-
PartialStruct, IsFirstComponentList,
8443-
IsImplicit, OverlappedComponents);
8522+
generateInfoForComponentList(
8523+
MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8524+
Types, PartialStruct, IsFirstComponentList, IsImplicit,
8525+
/*ForDeviceAddr=*/false, OverlappedComponents);
84448526
}
84458527
// Go through other elements without overlapped elements.
84468528
bool IsFirstComponentList = OverlappedData.empty();

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ using namespace clang;
3131
using namespace CodeGen;
3232
using namespace llvm::omp;
3333

34+
static const VarDecl *getBaseDecl(const Expr *Ref);
35+
3436
namespace {
3537
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
3638
/// for captured expressions.
@@ -220,6 +222,12 @@ class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
220222
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
221223
CGF.EmitVarDecl(*OED);
222224
}
225+
} else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
226+
for (const Expr *E : UDP->varlists()) {
227+
const Decl *D = getBaseDecl(E);
228+
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
229+
CGF.EmitVarDecl(*OED);
230+
}
223231
}
224232
}
225233
if (!isOpenMPSimdDirective(S.getDirectiveKind()))
@@ -5804,9 +5812,8 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
58045812
}
58055813

58065814
void CodeGenFunction::EmitOMPUseDevicePtrClause(
5807-
const OMPClause &NC, OMPPrivateScope &PrivateScope,
5815+
const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
58085816
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
5809-
const auto &C = cast<OMPUseDevicePtrClause>(NC);
58105817
auto OrigVarIt = C.varlist_begin();
58115818
auto InitIt = C.inits().begin();
58125819
for (const Expr *PvtVarIt : C.private_copies()) {
@@ -5867,6 +5874,60 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
58675874
}
58685875
}
58695876

5877+
static const VarDecl *getBaseDecl(const Expr *Ref) {
5878+
const Expr *Base = Ref->IgnoreParenImpCasts();
5879+
while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
5880+
Base = OASE->getBase()->IgnoreParenImpCasts();
5881+
while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
5882+
Base = ASE->getBase()->IgnoreParenImpCasts();
5883+
return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
5884+
}
5885+
5886+
void CodeGenFunction::EmitOMPUseDeviceAddrClause(
5887+
const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
5888+
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
5889+
llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
5890+
for (const Expr *Ref : C.varlists()) {
5891+
const VarDecl *OrigVD = getBaseDecl(Ref);
5892+
if (!Processed.insert(OrigVD).second)
5893+
continue;
5894+
// In order to identify the right initializer we need to match the
5895+
// declaration used by the mapping logic. In some cases we may get
5896+
// OMPCapturedExprDecl that refers to the original declaration.
5897+
const ValueDecl *MatchingVD = OrigVD;
5898+
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
5899+
// OMPCapturedExprDecl are used to privative fields of the current
5900+
// structure.
5901+
const auto *ME = cast<MemberExpr>(OED->getInit());
5902+
assert(isa<CXXThisExpr>(ME->getBase()) &&
5903+
"Base should be the current struct!");
5904+
MatchingVD = ME->getMemberDecl();
5905+
}
5906+
5907+
// If we don't have information about the current list item, move on to
5908+
// the next one.
5909+
auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
5910+
if (InitAddrIt == CaptureDeviceAddrMap.end())
5911+
continue;
5912+
5913+
Address PrivAddr = InitAddrIt->getSecond();
5914+
// For declrefs and variable length array need to load the pointer for
5915+
// correct mapping, since the pointer to the data was passed to the runtime.
5916+
if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
5917+
MatchingVD->getType()->isArrayType())
5918+
PrivAddr =
5919+
EmitLoadOfPointer(PrivAddr, getContext()
5920+
.getPointerType(OrigVD->getType())
5921+
->castAs<PointerType>());
5922+
llvm::Type *RealTy =
5923+
ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
5924+
->getPointerTo();
5925+
PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
5926+
5927+
(void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
5928+
}
5929+
}
5930+
58705931
// Generate the instructions for '#pragma omp target data' directive.
58715932
void CodeGenFunction::EmitOMPTargetDataDirective(
58725933
const OMPTargetDataDirective &S) {
@@ -5911,6 +5972,9 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
59115972
for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
59125973
CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
59135974
Info.CaptureDeviceAddrMap);
5975+
for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
5976+
CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
5977+
Info.CaptureDeviceAddrMap);
59145978
(void)PrivateScope.Privatize();
59155979
RCG(CGF);
59165980
} else {

0 commit comments

Comments
 (0)