@@ -4907,11 +4907,255 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4907
4907
}
4908
4908
}
4909
4909
4910
+ static std::string generateUniqueName(CodeGenModule &CGM,
4911
+ llvm::StringRef Prefix, const Expr *Ref);
4912
+
4913
+ void CGOpenMPRuntime::emitPrivateReduction(
4914
+ CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4915
+ const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4916
+
4917
+ // Create a shared global variable (__shared_reduction_var) to accumulate the
4918
+ // final result.
4919
+ //
4920
+ // Call __kmpc_barrier to synchronize threads before initialization.
4921
+ //
4922
+ // The master thread (thread_id == 0) initializes __shared_reduction_var
4923
+ // with the identity value or initializer.
4924
+ //
4925
+ // Call __kmpc_barrier to synchronize before combining.
4926
+ // For each i:
4927
+ // - Thread enters critical section.
4928
+ // - Reads its private value from LHSExprs[i].
4929
+ // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4930
+ // Privates[i]).
4931
+ // - Exits critical section.
4932
+ //
4933
+ // Call __kmpc_barrier after combining.
4934
+ //
4935
+ // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4936
+ //
4937
+ // Final __kmpc_barrier to synchronize after broadcasting
4938
+ QualType PrivateType = Privates->getType();
4939
+ llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4940
+
4941
+ const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4942
+ std::string ReductionVarNameStr;
4943
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4944
+ ReductionVarNameStr =
4945
+ generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4946
+ else
4947
+ ReductionVarNameStr = "unnamed_priv_var";
4948
+
4949
+ // Create an internal shared variable
4950
+ std::string SharedName =
4951
+ CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4952
+ llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4953
+ LLVMType, ".omp.reduction." + SharedName);
4954
+
4955
+ SharedVar->setAlignment(
4956
+ llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4957
+
4958
+ Address SharedResult =
4959
+ CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4960
+
4961
+ llvm::Value *ThreadId = getThreadID(CGF, Loc);
4962
+ llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4963
+ llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4964
+
4965
+ llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4966
+ llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4967
+
4968
+ llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4969
+ ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4970
+ CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
4971
+
4972
+ CGF.EmitBlock(InitBB);
4973
+
4974
+ auto EmitSharedInit = [&]() {
4975
+ if (UDR) { // Check if it's a User-Defined Reduction
4976
+ if (const Expr *UDRInitExpr = UDR->getInitializer()) {
4977
+ std::pair<llvm::Function *, llvm::Function *> FnPair =
4978
+ getUserDefinedReduction(UDR);
4979
+ llvm::Function *InitializerFn = FnPair.second;
4980
+ if (InitializerFn) {
4981
+ if (const auto *CE =
4982
+ dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
4983
+ const auto *OutDRE = cast<DeclRefExpr>(
4984
+ cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
4985
+ ->getSubExpr());
4986
+ const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
4987
+
4988
+ CodeGenFunction::OMPPrivateScope LocalScope(CGF);
4989
+ LocalScope.addPrivate(OutVD, SharedResult);
4990
+
4991
+ (void)LocalScope.Privatize();
4992
+ if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
4993
+ CE->getCallee()->IgnoreParenImpCasts())) {
4994
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(
4995
+ CGF, OVE, RValue::get(InitializerFn));
4996
+ CGF.EmitIgnoredExpr(CE);
4997
+ } else {
4998
+ CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
4999
+ PrivateType.getQualifiers(),
5000
+ /*IsInitializer=*/true);
5001
+ }
5002
+ } else {
5003
+ CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5004
+ PrivateType.getQualifiers(),
5005
+ /*IsInitializer=*/true);
5006
+ }
5007
+ } else {
5008
+ CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5009
+ PrivateType.getQualifiers(),
5010
+ /*IsInitializer=*/true);
5011
+ }
5012
+ } else {
5013
+ // EmitNullInitialization handles default construction for C++ classes
5014
+ // and zeroing for scalars, which is a reasonable default.
5015
+ CGF.EmitNullInitialization(SharedResult, PrivateType);
5016
+ }
5017
+ return; // UDR initialization handled
5018
+ }
5019
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5020
+ if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5021
+ if (const Expr *InitExpr = VD->getInit()) {
5022
+ CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5023
+ PrivateType.getQualifiers(), true);
5024
+ return;
5025
+ }
5026
+ }
5027
+ }
5028
+ CGF.EmitNullInitialization(SharedResult, PrivateType);
5029
+ };
5030
+ EmitSharedInit();
5031
+ CGF.Builder.CreateBr(InitEndBB);
5032
+ CGF.EmitBlock(InitEndBB);
5033
+
5034
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5035
+ CGM.getModule(), OMPRTL___kmpc_barrier),
5036
+ BarrierArgs);
5037
+
5038
+ const Expr *ReductionOp = ReductionOps;
5039
+ const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5040
+ LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5041
+ LValue LHSLV = CGF.EmitLValue(Privates);
5042
+
5043
+ auto EmitCriticalReduction = [&](auto ReductionGen) {
5044
+ std::string CriticalName = getName({"reduction_critical"});
5045
+ emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5046
+ };
5047
+
5048
+ if (CurrentUDR) {
5049
+ // Handle user-defined reduction.
5050
+ auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5051
+ Action.Enter(CGF);
5052
+ std::pair<llvm::Function *, llvm::Function *> FnPair =
5053
+ getUserDefinedReduction(CurrentUDR);
5054
+ if (FnPair.first) {
5055
+ if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5056
+ const auto *OutDRE = cast<DeclRefExpr>(
5057
+ cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5058
+ ->getSubExpr());
5059
+ const auto *InDRE = cast<DeclRefExpr>(
5060
+ cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5061
+ ->getSubExpr());
5062
+ CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5063
+ LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5064
+ SharedLV.getAddress());
5065
+ LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5066
+ LHSLV.getAddress());
5067
+ (void)LocalScope.Privatize();
5068
+ emitReductionCombiner(CGF, ReductionOp);
5069
+ }
5070
+ }
5071
+ };
5072
+ EmitCriticalReduction(ReductionGen);
5073
+ } else {
5074
+ // Handle built-in reduction operations.
5075
+ #ifndef NDEBUG
5076
+ const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5077
+ if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5078
+ ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5079
+
5080
+ const Expr *AssignRHS = nullptr;
5081
+ if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5082
+ if (BinOp->getOpcode() == BO_Assign)
5083
+ AssignRHS = BinOp->getRHS();
5084
+ } else if (const auto *OpCall =
5085
+ dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5086
+ if (OpCall->getOperator() == OO_Equal)
5087
+ AssignRHS = OpCall->getArg(1);
5088
+ }
5089
+
5090
+ assert(AssignRHS &&
5091
+ "Private Variable Reduction : Invalid ReductionOp expression");
5092
+ #endif
5093
+
5094
+ auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5095
+ Action.Enter(CGF);
5096
+ const auto *OmpOutDRE =
5097
+ dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5098
+ const auto *OmpInDRE =
5099
+ dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5100
+ assert(
5101
+ OmpOutDRE && OmpInDRE &&
5102
+ "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5103
+ const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5104
+ const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5105
+ CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5106
+ LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5107
+ LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5108
+ (void)LocalScope.Privatize();
5109
+ // Emit the actual reduction operation
5110
+ CGF.EmitIgnoredExpr(ReductionOp);
5111
+ };
5112
+ EmitCriticalReduction(ReductionGen);
5113
+ }
5114
+
5115
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5116
+ CGM.getModule(), OMPRTL___kmpc_barrier),
5117
+ BarrierArgs);
5118
+
5119
+ // Broadcast final result
5120
+ bool IsAggregate = PrivateType->isAggregateType();
5121
+ LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5122
+ llvm::Value *FinalResultVal = nullptr;
5123
+ Address FinalResultAddr = Address::invalid();
5124
+
5125
+ if (IsAggregate)
5126
+ FinalResultAddr = SharedResult;
5127
+ else
5128
+ FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5129
+
5130
+ LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5131
+ if (IsAggregate) {
5132
+ CGF.EmitAggregateCopy(TargetLHSLV,
5133
+ CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5134
+ PrivateType, AggValueSlot::DoesNotOverlap, false);
5135
+ } else {
5136
+ CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5137
+ }
5138
+ // Final synchronization barrier
5139
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5140
+ CGM.getModule(), OMPRTL___kmpc_barrier),
5141
+ BarrierArgs);
5142
+
5143
+ // Combiner with original list item
5144
+ auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5145
+ PrePostActionTy &Action) {
5146
+ Action.Enter(CGF);
5147
+ emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5148
+ cast<DeclRefExpr>(LHSExprs),
5149
+ cast<DeclRefExpr>(RHSExprs));
5150
+ };
5151
+ EmitCriticalReduction(OriginalListCombiner);
5152
+ }
5153
+
4910
5154
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4911
- ArrayRef<const Expr *> Privates ,
4912
- ArrayRef<const Expr *> LHSExprs ,
4913
- ArrayRef<const Expr *> RHSExprs ,
4914
- ArrayRef<const Expr *> ReductionOps ,
5155
+ ArrayRef<const Expr *> OrgPrivates ,
5156
+ ArrayRef<const Expr *> OrgLHSExprs ,
5157
+ ArrayRef<const Expr *> OrgRHSExprs ,
5158
+ ArrayRef<const Expr *> OrgReductionOps ,
4915
5159
ReductionOptionsTy Options) {
4916
5160
if (!CGF.HaveInsertPoint())
4917
5161
return;
@@ -4958,10 +5202,10 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4958
5202
4959
5203
if (SimpleReduction) {
4960
5204
CodeGenFunction::RunCleanupsScope Scope(CGF);
4961
- const auto *IPriv = Privates .begin();
4962
- const auto *ILHS = LHSExprs .begin();
4963
- const auto *IRHS = RHSExprs .begin();
4964
- for (const Expr *E : ReductionOps ) {
5205
+ const auto *IPriv = OrgPrivates .begin();
5206
+ const auto *ILHS = OrgLHSExprs .begin();
5207
+ const auto *IRHS = OrgRHSExprs .begin();
5208
+ for (const Expr *E : OrgReductionOps ) {
4965
5209
emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4966
5210
cast<DeclRefExpr>(*IRHS));
4967
5211
++IPriv;
@@ -4971,6 +5215,26 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4971
5215
return;
4972
5216
}
4973
5217
5218
+ // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5219
+ // Only keep entries where the corresponding variable is not private.
5220
+ SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5221
+ FilteredRHSExprs, FilteredReductionOps;
5222
+ for (unsigned I : llvm::seq<unsigned>(
5223
+ std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5224
+ if (!Options.IsPrivateVarReduction[I]) {
5225
+ FilteredPrivates.emplace_back(OrgPrivates[I]);
5226
+ FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5227
+ FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5228
+ FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5229
+ }
5230
+ }
5231
+ // Wrap filtered vectors in ArrayRef for downstream shared reduction
5232
+ // processing.
5233
+ ArrayRef<const Expr *> Privates = FilteredPrivates;
5234
+ ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5235
+ ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5236
+ ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5237
+
4974
5238
// 1. Build a list of reduction variables.
4975
5239
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4976
5240
auto Size = RHSExprs.size();
@@ -5162,7 +5426,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5162
5426
} else {
5163
5427
// Emit as a critical region.
5164
5428
auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5165
- const Expr *, const Expr *) {
5429
+ const Expr *, const Expr *) {
5166
5430
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5167
5431
std::string Name = RT.getName({"atomic_reduction"});
5168
5432
RT.emitCriticalRegion(
@@ -5209,6 +5473,16 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5209
5473
5210
5474
CGF.EmitBranch(DefaultBB);
5211
5475
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5476
+ assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5477
+ "PrivateVarReduction: Privates size mismatch");
5478
+ assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5479
+ "PrivateVarReduction: ReductionOps size mismatch");
5480
+ for (unsigned I : llvm::seq<unsigned>(
5481
+ std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5482
+ if (Options.IsPrivateVarReduction[I])
5483
+ emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5484
+ OrgRHSExprs[I], OrgReductionOps[I]);
5485
+ }
5212
5486
}
5213
5487
5214
5488
/// Generates unique name for artificial threadprivate variables.
0 commit comments