Skip to content

Commit 9335af1

Browse files
author
Chandra Ghale
committed
multiple reduced value change
1 parent 0ca2f86 commit 9335af1

File tree

3 files changed

+110
-94
lines changed

3 files changed

+110
-94
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 75 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -4900,15 +4900,8 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
49004900
}
49014901

49024902
void CGOpenMPRuntime::emitPrivateReduction(
4903-
CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
4904-
ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
4905-
ArrayRef<const Expr *> ReductionOps) {
4906-
if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
4907-
return;
4908-
4909-
if (LHSExprs.size() != Privates.size() ||
4910-
LHSExprs.size() != ReductionOps.size())
4911-
return;
4903+
CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4904+
const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
49124905

49134906
// Create a shared global variable (__shared_reduction_var) to accumulate the
49144907
// final result.
@@ -4931,15 +4924,15 @@ void CGOpenMPRuntime::emitPrivateReduction(
49314924
// Each thread copies __shared_reduction_var[i] back to LHSExprs[i].
49324925
//
49334926
// Final __kmpc_barrier to synchronize after broadcasting
4934-
QualType PrivateType = Privates[0]->getType();
4927+
QualType PrivateType = Privates->getType();
49354928
llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
49364929

49374930
llvm::Constant *InitVal = nullptr;
4938-
const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps[0]);
4931+
const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
49394932
// Determine the initial value for the shared reduction variable
49404933
if (!UDR) {
49414934
InitVal = llvm::Constant::getNullValue(LLVMType);
4942-
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates[0])) {
4935+
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
49434936
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
49444937
const Expr *InitExpr = VD->getInit();
49454938
if (InitExpr && !PrivateType->isAggregateType() &&
@@ -4956,11 +4949,18 @@ void CGOpenMPRuntime::emitPrivateReduction(
49564949
} else {
49574950
InitVal = llvm::Constant::getNullValue(LLVMType);
49584951
}
4952+
std::string ReductionVarNameStr;
4953+
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts())) {
4954+
ReductionVarNameStr = DRE->getDecl()->getNameAsString();
4955+
} else {
4956+
ReductionVarNameStr = "unnamed_priv_var";
4957+
}
49594958

49604959
// Create an internal shared variable
4961-
std::string SharedName = getName({"internal_private_var"});
4960+
std::string SharedName =
4961+
CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
49624962
llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
4963-
CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
4963+
CGM.getModule(), LLVMType, false, llvm::GlobalValue::InternalLinkage,
49644964
InitVal, ".omp.reduction." + SharedName, nullptr,
49654965
llvm::GlobalVariable::NotThreadLocal);
49664966

@@ -4996,7 +4996,7 @@ void CGOpenMPRuntime::emitPrivateReduction(
49964996
}
49974997
return; // UDR initialization handled
49984998
}
4999-
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates[0])) {
4999+
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
50005000
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
50015001
const Expr *InitExpr = VD->getInit();
50025002
if (InitExpr && (PrivateType->isAggregateType() ||
@@ -5021,47 +5021,45 @@ void CGOpenMPRuntime::emitPrivateReduction(
50215021
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
50225022
CGM.getModule(), OMPRTL___kmpc_barrier),
50235023
BarrierArgs);
5024-
for (unsigned I :
5025-
llvm::seq<unsigned>(std::min(ReductionOps.size(), LHSExprs.size()))) {
50265024

5027-
const Expr *ReductionOp = ReductionOps[I];
5028-
const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5029-
LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5030-
LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
5025+
const Expr *ReductionOp = ReductionOps;
5026+
const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5027+
LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5028+
LValue LHSLV = CGF.EmitLValue(LHSExprs);
50315029

5032-
auto EmitCriticalReduction = [&](auto ReductionGen) {
5033-
std::string CriticalName = getName({"reduction_critical"});
5034-
emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5035-
};
5030+
auto EmitCriticalReduction = [&](auto ReductionGen) {
5031+
std::string CriticalName = getName({"reduction_critical"});
5032+
emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5033+
};
50365034

5037-
if (CurrentUDR) {
5038-
// Handle user-defined reduction.
5039-
auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5040-
Action.Enter(CGF);
5041-
std::pair<llvm::Function *, llvm::Function *> FnPair =
5042-
getUserDefinedReduction(CurrentUDR);
5043-
if (FnPair.first) {
5044-
if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5045-
const auto *OutDRE = cast<DeclRefExpr>(
5046-
cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5047-
->getSubExpr());
5048-
const auto *InDRE = cast<DeclRefExpr>(
5049-
cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5050-
->getSubExpr());
5051-
CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5052-
LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5053-
SharedLV.getAddress());
5054-
LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5055-
LHSLV.getAddress());
5056-
(void)LocalScope.Privatize();
5057-
emitReductionCombiner(CGF, ReductionOp);
5058-
}
5035+
if (CurrentUDR) {
5036+
// Handle user-defined reduction.
5037+
auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5038+
Action.Enter(CGF);
5039+
std::pair<llvm::Function *, llvm::Function *> FnPair =
5040+
getUserDefinedReduction(CurrentUDR);
5041+
if (FnPair.first) {
5042+
if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5043+
const auto *OutDRE = cast<DeclRefExpr>(
5044+
cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5045+
->getSubExpr());
5046+
const auto *InDRE = cast<DeclRefExpr>(
5047+
cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5048+
->getSubExpr());
5049+
CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5050+
LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5051+
SharedLV.getAddress());
5052+
LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5053+
LHSLV.getAddress());
5054+
(void)LocalScope.Privatize();
5055+
emitReductionCombiner(CGF, ReductionOp);
50595056
}
5060-
};
5061-
EmitCriticalReduction(ReductionGen);
5062-
continue;
5063-
}
5064-
// Handle built-in reduction operations.
5057+
}
5058+
};
5059+
EmitCriticalReduction(ReductionGen);
5060+
}
5061+
// Handle built-in reduction operations.
5062+
else {
50655063
const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
50665064
if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
50675065
ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
@@ -5077,7 +5075,7 @@ void CGOpenMPRuntime::emitPrivateReduction(
50775075
}
50785076

50795077
if (!AssignRHS)
5080-
continue;
5078+
return;
50815079

50825080
const Expr *CombinerExpr = AssignRHS->IgnoreParenImpCasts();
50835081
if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(CombinerExpr))
@@ -5086,9 +5084,9 @@ void CGOpenMPRuntime::emitPrivateReduction(
50865084
auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
50875085
Action.Enter(CGF);
50885086
const auto *OmpOutDRE =
5089-
dyn_cast<DeclRefExpr>(LHSExprs[I]->IgnoreParenImpCasts());
5087+
dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
50905088
const auto *OmpInDRE =
5091-
dyn_cast<DeclRefExpr>(RHSExprs[I]->IgnoreParenImpCasts());
5089+
dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
50925090
if (!OmpOutDRE || !OmpInDRE)
50935091
return;
50945092
const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
@@ -5109,24 +5107,22 @@ void CGOpenMPRuntime::emitPrivateReduction(
51095107

51105108
// Broadcast final result
51115109
bool IsAggregate = PrivateType->isAggregateType();
5112-
LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5110+
LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
51135111
llvm::Value *FinalResultVal = nullptr;
51145112
Address FinalResultAddr = Address::invalid();
51155113

51165114
if (IsAggregate)
51175115
FinalResultAddr = SharedResult;
51185116
else
5119-
FinalResultVal = CGF.EmitLoadOfScalar(SharedLV, Loc);
5120-
5121-
for (unsigned I : llvm::seq<unsigned>(Privates.size())) {
5122-
LValue TargetLHSLV = CGF.EmitLValue(LHSExprs[I]);
5123-
if (IsAggregate) {
5124-
CGF.EmitAggregateCopy(TargetLHSLV,
5125-
CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5126-
PrivateType, AggValueSlot::DoesNotOverlap, false);
5127-
} else {
5128-
CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5129-
}
5117+
FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5118+
5119+
LValue TargetLHSLV = CGF.EmitLValue(LHSExprs);
5120+
if (IsAggregate) {
5121+
CGF.EmitAggregateCopy(TargetLHSLV,
5122+
CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5123+
PrivateType, AggValueSlot::DoesNotOverlap, false);
5124+
} else {
5125+
CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
51305126
}
51315127
// Final synchronization barrier
51325128
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
@@ -5436,8 +5432,18 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
54365432

54375433
CGF.EmitBranch(DefaultBB);
54385434
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5439-
if (Options.IsPrivateVarReduction)
5440-
emitPrivateReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps);
5435+
if (Options.IsPrivateVarReduction) {
5436+
if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
5437+
return;
5438+
if (LHSExprs.size() != Privates.size() ||
5439+
LHSExprs.size() != ReductionOps.size())
5440+
return;
5441+
for (unsigned I :
5442+
llvm::seq<unsigned>(std::min(ReductionOps.size(), LHSExprs.size()))) {
5443+
emitPrivateReduction(CGF, Loc, Privates[I], LHSExprs[I], RHSExprs[I],
5444+
ReductionOps[I]);
5445+
}
5446+
}
54415447
}
54425448

54435449
/// Generates unique name for artificial threadprivate variables.

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1212,10 +1212,8 @@ class CGOpenMPRuntime {
12121212
/// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
12131213
/// or 'operator binop(LHS, RHS)'.
12141214
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc,
1215-
ArrayRef<const Expr *> Privates,
1216-
ArrayRef<const Expr *> LHSExprs,
1217-
ArrayRef<const Expr *> RHSExprs,
1218-
ArrayRef<const Expr *> ReductionOps);
1215+
const Expr *Privates, const Expr *LHSExprs,
1216+
const Expr *RHSExprs, const Expr *ReductionOps);
12191217

12201218
/// Emit a code for reduction clause. Next code should be emitted for
12211219
/// reduction:

clang/test/OpenMP/for_private_reduction_codegen.cpp

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,20 @@ int main(void) {
6868
}
6969
return 0;
7070
}
71+
7172
//.
7273
// CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
7374
// CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
7475
// CHECK: @.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer, align 8
7576
// CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 18, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
7677
// CHECK: @[[GLOB3:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
7778
// CHECK: @.gomp_critical_user_.atomic_reduction.var = common global [8 x i32] zeroinitializer, align 8
78-
// CHECK: @.omp.reduction..internal_private_var = common global %class.Sum zeroinitializer, align 4
79+
// CHECK: @.omp.reduction..internal_pivate_.result = internal global %class.Sum zeroinitializer, align 4
7980
// CHECK: @.gomp_critical_user_.reduction_critical.var = common global [8 x i32] zeroinitializer, align 8
8081
// CHECK: @[[GLOB4:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
81-
// CHECK: @.omp.reduction..internal_private_var.1 = common global i32 0, align 4
82-
// CHECK: @.omp.reduction..internal_private_var.2 = common global i32 0, align 4
82+
// CHECK: @.omp.reduction..internal_pivate_.sum_v = internal global i32 0, align 4
83+
// CHECK: @.omp.reduction..internal_pivate_.sum_v.1 = internal global i32 0, align 4
84+
// CHECK: @.omp.reduction..internal_pivate_.prod_v = internal global i32 1, align 4
8385
//.
8486
// CHECK-LABEL: define dso_local void @_Z8func_redv(
8587
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -232,15 +234,15 @@ int main(void) {
232234
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP2]], 0
233235
// CHECK-NEXT: br i1 [[TMP13]], label %[[INIT:.*]], label %[[INIT_END:.*]]
234236
// CHECK: [[INIT]]:
235-
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @.omp.reduction..internal_private_var, i32 noundef 0)
237+
// CHECK-NEXT: call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @.omp.reduction..internal_pivate_.result, i32 noundef 0)
236238
// CHECK-NEXT: br label %[[INIT_END]]
237239
// CHECK: [[INIT_END]]:
238240
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
239241
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction_critical.var)
240-
// CHECK-NEXT: call void @.omp_combiner.(ptr noundef @.omp.reduction..internal_private_var, ptr noundef [[RESULT]])
242+
// CHECK-NEXT: call void @.omp_combiner.(ptr noundef @.omp.reduction..internal_pivate_.result, ptr noundef [[RESULT]])
241243
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction_critical.var)
242244
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
243-
// CHECK-NEXT: [[TMP14:%.*]] = load [[CLASS_SUM]], ptr @.omp.reduction..internal_private_var, align 4
245+
// CHECK-NEXT: [[TMP14:%.*]] = load [[CLASS_SUM]], ptr @.omp.reduction..internal_pivate_.result, align 4
244246
// CHECK-NEXT: store [[CLASS_SUM]] [[TMP14]], ptr [[RESULT]], align 4
245247
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
246248
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]])
@@ -462,18 +464,18 @@ int main(void) {
462464
// CHECK-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP0]], 0
463465
// CHECK-NEXT: br i1 [[TMP28]], label %[[INIT:.*]], label %[[INIT_END:.*]]
464466
// CHECK: [[INIT]]:
465-
// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 @.omp.reduction..internal_private_var.1, i8 0, i64 4, i1 false)
467+
// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 @.omp.reduction..internal_pivate_.sum_v, i8 0, i64 4, i1 false)
466468
// CHECK-NEXT: br label %[[INIT_END]]
467469
// CHECK: [[INIT_END]]:
468470
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
469471
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
470-
// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr @.omp.reduction..internal_private_var.1, align 4
472+
// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.sum_v, align 4
471473
// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP7]], align 4
472474
// CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
473-
// CHECK-NEXT: store i32 [[ADD12]], ptr @.omp.reduction..internal_private_var.1, align 4
475+
// CHECK-NEXT: store i32 [[ADD12]], ptr @.omp.reduction..internal_pivate_.sum_v, align 4
474476
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
475477
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
476-
// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr @.omp.reduction..internal_private_var.1, align 4
478+
// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.sum_v, align 4
477479
// CHECK-NEXT: store i32 [[TMP31]], ptr [[TMP7]], align 4
478480
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
479481
// CHECK-NEXT: br label %[[OMP_PRECOND_END]]
@@ -666,26 +668,36 @@ int main(void) {
666668
// CHECK-NEXT: [[TMP47:%.*]] = icmp eq i32 [[TMP0]], 0
667669
// CHECK-NEXT: br i1 [[TMP47]], label %[[INIT:.*]], label %[[INIT_END:.*]]
668670
// CHECK: [[INIT]]:
669-
// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 @.omp.reduction..internal_private_var.2, i8 0, i64 4, i1 false)
671+
// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 @.omp.reduction..internal_pivate_.sum_v.1, i8 0, i64 4, i1 false)
670672
// CHECK-NEXT: br label %[[INIT_END]]
671673
// CHECK: [[INIT_END]]:
672674
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
673675
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
674-
// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr @.omp.reduction..internal_private_var.2, align 4
676+
// CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.sum_v.1, align 4
675677
// CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP9]], align 4
676678
// CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP48]], [[TMP49]]
677-
// CHECK-NEXT: store i32 [[ADD21]], ptr @.omp.reduction..internal_private_var.2, align 4
679+
// CHECK-NEXT: store i32 [[ADD21]], ptr @.omp.reduction..internal_pivate_.sum_v.1, align 4
678680
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
681+
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
682+
// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.sum_v.1, align 4
683+
// CHECK-NEXT: store i32 [[TMP50]], ptr [[TMP9]], align 4
684+
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
685+
// CHECK-NEXT: [[TMP51:%.*]] = icmp eq i32 [[TMP0]], 0
686+
// CHECK-NEXT: br i1 [[TMP51]], label %[[INIT22:.*]], label %[[INIT_END23:.*]]
687+
// CHECK: [[INIT22]]:
688+
// CHECK-NEXT: store i32 1, ptr @.omp.reduction..internal_pivate_.prod_v, align 4
689+
// CHECK-NEXT: br label %[[INIT_END23]]
690+
// CHECK: [[INIT_END23]]:
691+
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
679692
// CHECK-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
680-
// CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr @.omp.reduction..internal_private_var.2, align 4
681-
// CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP10]], align 4
682-
// CHECK-NEXT: [[MUL22:%.*]] = mul nsw i32 [[TMP50]], [[TMP51]]
683-
// CHECK-NEXT: store i32 [[MUL22]], ptr @.omp.reduction..internal_private_var.2, align 4
693+
// CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.prod_v, align 4
694+
// CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP10]], align 4
695+
// CHECK-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]]
696+
// CHECK-NEXT: store i32 [[MUL24]], ptr @.omp.reduction..internal_pivate_.prod_v, align 4
684697
// CHECK-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP0]], ptr @.gomp_critical_user_.reduction_critical.var)
685698
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
686-
// CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr @.omp.reduction..internal_private_var.2, align 4
687-
// CHECK-NEXT: store i32 [[TMP52]], ptr [[TMP9]], align 4
688-
// CHECK-NEXT: store i32 [[TMP52]], ptr [[TMP10]], align 4
699+
// CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr @.omp.reduction..internal_pivate_.prod_v, align 4
700+
// CHECK-NEXT: store i32 [[TMP54]], ptr [[TMP10]], align 4
689701
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP0]])
690702
// CHECK-NEXT: br label %[[OMP_PRECOND_END]]
691703
// CHECK: [[OMP_PRECOND_END]]:
@@ -755,7 +767,7 @@ int main(void) {
755767
// CHECK-NEXT: store i32 0, ptr [[SUM_V_EXT]], align 4
756768
// CHECK-NEXT: store i32 1, ptr [[PROD_V_EXT]], align 4
757769
// CHECK-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP0]], i32 4)
758-
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @main.omp_outlined.3, ptr [[V]], ptr [[SUM_V_EXT]], ptr [[PROD_V_EXT]])
770+
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @main.omp_outlined.2, ptr [[V]], ptr [[SUM_V_EXT]], ptr [[PROD_V_EXT]])
759771
// CHECK-NEXT: ret i32 0
760772
//
761773
//

0 commit comments

Comments
 (0)