@@ -923,8 +923,30 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
923
923
if (!IsSPMD)
924
924
emitGenericVarsEpilog (CGF);
925
925
926
+ // This is temporary until we remove the fixed sized buffer.
927
+ ASTContext &C = CGM.getContext ();
928
+ RecordDecl *StaticRD = C.buildImplicitRecord (
929
+ " _openmp_teams_reduction_type_$_" , RecordDecl::TagKind::TTK_Union);
930
+ StaticRD->startDefinition ();
931
+ for (const RecordDecl *TeamReductionRec : TeamsReductions) {
932
+ QualType RecTy = C.getRecordType (TeamReductionRec);
933
+ auto *Field = FieldDecl::Create (
934
+ C, StaticRD, SourceLocation (), SourceLocation (), nullptr , RecTy,
935
+ C.getTrivialTypeSourceInfo (RecTy, SourceLocation ()),
936
+ /* BW=*/ nullptr , /* Mutable=*/ false ,
937
+ /* InitStyle=*/ ICIS_NoInit);
938
+ Field->setAccess (AS_public);
939
+ StaticRD->addDecl (Field);
940
+ }
941
+ StaticRD->completeDefinition ();
942
+ QualType StaticTy = C.getRecordType (StaticRD);
943
+ llvm::Type *LLVMReductionsBufferTy =
944
+ CGM.getTypes ().ConvertTypeForMem (StaticTy);
945
+ const auto &DL = CGM.getModule ().getDataLayout ();
946
+ uint64_t BufferSize =
947
+ DL.getTypeAllocSize (LLVMReductionsBufferTy).getFixedValue ();
926
948
CGBuilderTy &Bld = CGF.Builder ;
927
- OMPBuilder.createTargetDeinit (Bld);
949
+ OMPBuilder.createTargetDeinit (Bld, BufferSize );
928
950
}
929
951
930
952
void CGOpenMPRuntimeGPU::emitSPMDKernel (const OMPExecutableDirective &D,
@@ -3193,15 +3215,10 @@ void CGOpenMPRuntimeGPU::emitReduction(
3193
3215
CGM.getContext (), PrivatesReductions, std::nullopt, VarFieldMap,
3194
3216
C.getLangOpts ().OpenMPCUDAReductionBufNum );
3195
3217
TeamsReductions.push_back (TeamReductionRec);
3196
- if (!KernelTeamsReductionPtr) {
3197
- KernelTeamsReductionPtr = new llvm::GlobalVariable (
3198
- CGM.getModule (), CGM.VoidPtrTy , /* isConstant=*/ true ,
3199
- llvm::GlobalValue::InternalLinkage, nullptr ,
3200
- " _openmp_teams_reductions_buffer_$_$ptr" );
3201
- }
3202
- llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar (
3203
- Address (KernelTeamsReductionPtr, CGF.VoidPtrTy , CGM.getPointerAlign ()),
3204
- /* Volatile=*/ false , C.getPointerType (C.VoidPtrTy ), Loc);
3218
+ auto *KernelTeamsReductionPtr = CGF.EmitRuntimeCall (
3219
+ OMPBuilder.getOrCreateRuntimeFunction (
3220
+ CGM.getModule (), OMPRTL___kmpc_reduction_get_fixed_buffer),
3221
+ {}, " _openmp_teams_reductions_buffer_$_$ptr" );
3205
3222
llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction (
3206
3223
CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
3207
3224
llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction (
@@ -3216,7 +3233,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
3216
3233
llvm::Value *Args[] = {
3217
3234
RTLoc,
3218
3235
ThreadId,
3219
- GlobalBufferPtr ,
3236
+ KernelTeamsReductionPtr ,
3220
3237
CGF.Builder .getInt32 (C.getLangOpts ().OpenMPCUDAReductionBufNum ),
3221
3238
RL,
3222
3239
ShuffleAndReduceFn,
@@ -3859,42 +3876,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
3859
3876
CGOpenMPRuntime::processRequiresDirective (D);
3860
3877
}
3861
3878
3862
- void CGOpenMPRuntimeGPU::clear () {
3863
-
3864
- if (!TeamsReductions.empty ()) {
3865
- ASTContext &C = CGM.getContext ();
3866
- RecordDecl *StaticRD = C.buildImplicitRecord (
3867
- " _openmp_teams_reduction_type_$_" , RecordDecl::TagKind::TTK_Union);
3868
- StaticRD->startDefinition ();
3869
- for (const RecordDecl *TeamReductionRec : TeamsReductions) {
3870
- QualType RecTy = C.getRecordType (TeamReductionRec);
3871
- auto *Field = FieldDecl::Create (
3872
- C, StaticRD, SourceLocation (), SourceLocation (), nullptr , RecTy,
3873
- C.getTrivialTypeSourceInfo (RecTy, SourceLocation ()),
3874
- /* BW=*/ nullptr , /* Mutable=*/ false ,
3875
- /* InitStyle=*/ ICIS_NoInit);
3876
- Field->setAccess (AS_public);
3877
- StaticRD->addDecl (Field);
3878
- }
3879
- StaticRD->completeDefinition ();
3880
- QualType StaticTy = C.getRecordType (StaticRD);
3881
- llvm::Type *LLVMReductionsBufferTy =
3882
- CGM.getTypes ().ConvertTypeForMem (StaticTy);
3883
- // FIXME: nvlink does not handle weak linkage correctly (object with the
3884
- // different size are reported as erroneous).
3885
- // Restore CommonLinkage as soon as nvlink is fixed.
3886
- auto *GV = new llvm::GlobalVariable (
3887
- CGM.getModule (), LLVMReductionsBufferTy,
3888
- /* isConstant=*/ false , llvm::GlobalValue::InternalLinkage,
3889
- llvm::Constant::getNullValue (LLVMReductionsBufferTy),
3890
- " _openmp_teams_reductions_buffer_$_" );
3891
- KernelTeamsReductionPtr->setInitializer (
3892
- llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast (GV,
3893
- CGM.VoidPtrTy ));
3894
- }
3895
- CGOpenMPRuntime::clear ();
3896
- }
3897
-
3898
3879
llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads (CodeGenFunction &CGF) {
3899
3880
CGBuilderTy &Bld = CGF.Builder ;
3900
3881
llvm::Module *M = &CGF.CGM .getModule ();
0 commit comments