@@ -803,8 +803,30 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
803
803
if (!IsSPMD)
804
804
emitGenericVarsEpilog (CGF);
805
805
806
+ // This is temporary until we remove the fixed sized buffer.
807
+ ASTContext &C = CGM.getContext ();
808
+ RecordDecl *StaticRD = C.buildImplicitRecord (
809
+ " _openmp_teams_reduction_type_$_" , RecordDecl::TagKind::TTK_Union);
810
+ StaticRD->startDefinition ();
811
+ for (const RecordDecl *TeamReductionRec : TeamsReductions) {
812
+ QualType RecTy = C.getRecordType (TeamReductionRec);
813
+ auto *Field = FieldDecl::Create (
814
+ C, StaticRD, SourceLocation (), SourceLocation (), nullptr , RecTy,
815
+ C.getTrivialTypeSourceInfo (RecTy, SourceLocation ()),
816
+ /* BW=*/ nullptr , /* Mutable=*/ false ,
817
+ /* InitStyle=*/ ICIS_NoInit);
818
+ Field->setAccess (AS_public);
819
+ StaticRD->addDecl (Field);
820
+ }
821
+ StaticRD->completeDefinition ();
822
+ QualType StaticTy = C.getRecordType (StaticRD);
823
+ llvm::Type *LLVMReductionsBufferTy =
824
+ CGM.getTypes ().ConvertTypeForMem (StaticTy);
825
+ const auto &DL = CGM.getModule ().getDataLayout ();
826
+ uint64_t BufferSize =
827
+ DL.getTypeAllocSize (LLVMReductionsBufferTy).getFixedValue ();
806
828
CGBuilderTy &Bld = CGF.Builder ;
807
- OMPBuilder.createTargetDeinit (Bld);
829
+ OMPBuilder.createTargetDeinit (Bld, BufferSize );
808
830
}
809
831
810
832
void CGOpenMPRuntimeGPU::emitSPMDKernel (const OMPExecutableDirective &D,
@@ -2998,15 +3020,10 @@ void CGOpenMPRuntimeGPU::emitReduction(
2998
3020
CGM.getContext (), PrivatesReductions, std::nullopt, VarFieldMap,
2999
3021
C.getLangOpts ().OpenMPCUDAReductionBufNum );
3000
3022
TeamsReductions.push_back (TeamReductionRec);
3001
- if (!KernelTeamsReductionPtr) {
3002
- KernelTeamsReductionPtr = new llvm::GlobalVariable (
3003
- CGM.getModule (), CGM.VoidPtrTy , /* isConstant=*/ true ,
3004
- llvm::GlobalValue::InternalLinkage, nullptr ,
3005
- " _openmp_teams_reductions_buffer_$_$ptr" );
3006
- }
3007
- llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar (
3008
- Address (KernelTeamsReductionPtr, CGF.VoidPtrTy , CGM.getPointerAlign ()),
3009
- /* Volatile=*/ false , C.getPointerType (C.VoidPtrTy ), Loc);
3023
+ auto *KernelTeamsReductionPtr = CGF.EmitRuntimeCall (
3024
+ OMPBuilder.getOrCreateRuntimeFunction (
3025
+ CGM.getModule (), OMPRTL___kmpc_reduction_get_fixed_buffer),
3026
+ {}, " _openmp_teams_reductions_buffer_$_$ptr" );
3010
3027
llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction (
3011
3028
CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
3012
3029
llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction (
@@ -3021,7 +3038,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
3021
3038
llvm::Value *Args[] = {
3022
3039
RTLoc,
3023
3040
ThreadId,
3024
- GlobalBufferPtr ,
3041
+ KernelTeamsReductionPtr ,
3025
3042
CGF.Builder .getInt32 (C.getLangOpts ().OpenMPCUDAReductionBufNum ),
3026
3043
RL,
3027
3044
ShuffleAndReduceFn,
@@ -3654,42 +3671,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
3654
3671
CGOpenMPRuntime::processRequiresDirective (D);
3655
3672
}
3656
3673
3657
- void CGOpenMPRuntimeGPU::clear () {
3658
-
3659
- if (!TeamsReductions.empty ()) {
3660
- ASTContext &C = CGM.getContext ();
3661
- RecordDecl *StaticRD = C.buildImplicitRecord (
3662
- " _openmp_teams_reduction_type_$_" , RecordDecl::TagKind::TTK_Union);
3663
- StaticRD->startDefinition ();
3664
- for (const RecordDecl *TeamReductionRec : TeamsReductions) {
3665
- QualType RecTy = C.getRecordType (TeamReductionRec);
3666
- auto *Field = FieldDecl::Create (
3667
- C, StaticRD, SourceLocation (), SourceLocation (), nullptr , RecTy,
3668
- C.getTrivialTypeSourceInfo (RecTy, SourceLocation ()),
3669
- /* BW=*/ nullptr , /* Mutable=*/ false ,
3670
- /* InitStyle=*/ ICIS_NoInit);
3671
- Field->setAccess (AS_public);
3672
- StaticRD->addDecl (Field);
3673
- }
3674
- StaticRD->completeDefinition ();
3675
- QualType StaticTy = C.getRecordType (StaticRD);
3676
- llvm::Type *LLVMReductionsBufferTy =
3677
- CGM.getTypes ().ConvertTypeForMem (StaticTy);
3678
- // FIXME: nvlink does not handle weak linkage correctly (object with the
3679
- // different size are reported as erroneous).
3680
- // Restore CommonLinkage as soon as nvlink is fixed.
3681
- auto *GV = new llvm::GlobalVariable (
3682
- CGM.getModule (), LLVMReductionsBufferTy,
3683
- /* isConstant=*/ false , llvm::GlobalValue::InternalLinkage,
3684
- llvm::Constant::getNullValue (LLVMReductionsBufferTy),
3685
- " _openmp_teams_reductions_buffer_$_" );
3686
- KernelTeamsReductionPtr->setInitializer (
3687
- llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast (GV,
3688
- CGM.VoidPtrTy ));
3689
- }
3690
- CGOpenMPRuntime::clear ();
3691
- }
3692
-
3693
3674
llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads (CodeGenFunction &CGF) {
3694
3675
CGBuilderTy &Bld = CGF.Builder ;
3695
3676
llvm::Module *M = &CGF.CGM .getModule ();
0 commit comments