@@ -705,9 +705,8 @@ enum OpenMPRTLFunction {
705
705
// Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
706
706
// *vec);
707
707
OMPRTL__kmpc_doacross_wait,
708
- // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
709
- // *data);
710
- OMPRTL__kmpc_task_reduction_init,
708
+ // Call to void *__kmpc_taskred_init(int gtid, int num_data, void *data);
709
+ OMPRTL__kmpc_taskred_init,
711
710
// Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
712
711
// *d);
713
712
OMPRTL__kmpc_task_reduction_get_th_data,
@@ -981,27 +980,37 @@ void ReductionCodeGen::emitAggregateInitialization(
981
980
}
982
981
983
982
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
983
+ ArrayRef<const Expr *> Origs,
984
984
ArrayRef<const Expr *> Privates,
985
985
ArrayRef<const Expr *> ReductionOps) {
986
986
ClausesData.reserve(Shareds.size());
987
987
SharedAddresses.reserve(Shareds.size());
988
988
Sizes.reserve(Shareds.size());
989
989
BaseDecls.reserve(Shareds.size());
990
- auto IPriv = Privates.begin();
991
- auto IRed = ReductionOps.begin();
990
+ const auto *IOrig = Origs.begin();
991
+ const auto *IPriv = Privates.begin();
992
+ const auto *IRed = ReductionOps.begin();
992
993
for (const Expr *Ref : Shareds) {
993
- ClausesData.emplace_back(Ref, *IPriv, *IRed);
994
+ ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
995
+ std::advance(IOrig, 1);
994
996
std::advance(IPriv, 1);
995
997
std::advance(IRed, 1);
996
998
}
997
999
}
998
1000
999
- void ReductionCodeGen::emitSharedLValue (CodeGenFunction &CGF, unsigned N) {
1000
- assert(SharedAddresses.size() == N &&
1001
+ void ReductionCodeGen::emitSharedOrigLValue (CodeGenFunction &CGF, unsigned N) {
1002
+ assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
1001
1003
"Number of generated lvalues must be exactly N.");
1002
- LValue First = emitSharedLValue(CGF, ClausesData[N].Ref );
1003
- LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref );
1004
+ LValue First = emitSharedLValue(CGF, ClausesData[N].Shared );
1005
+ LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared );
1004
1006
SharedAddresses.emplace_back(First, Second);
1007
+ if (ClausesData[N].Shared == ClausesData[N].Ref) {
1008
+ OrigAddresses.emplace_back(First, Second);
1009
+ } else {
1010
+ LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
1011
+ LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
1012
+ OrigAddresses.emplace_back(First, Second);
1013
+ }
1005
1014
}
1006
1015
1007
1016
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
@@ -2318,14 +2327,12 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
2318
2327
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2319
2328
break;
2320
2329
}
2321
- case OMPRTL__kmpc_task_reduction_init: {
2322
- // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2323
- // *data);
2330
+ case OMPRTL__kmpc_taskred_init: {
2331
+ // Build void *__kmpc_taskred_init(int gtid, int num_data, void *data);
2324
2332
llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2325
2333
auto *FnTy =
2326
2334
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2327
- RTLFn =
2328
- CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2335
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskred_init");
2329
2336
break;
2330
2337
}
2331
2338
case OMPRTL__kmpc_task_reduction_get_th_data: {
@@ -6546,7 +6553,7 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6546
6553
6547
6554
/// Emits reduction initializer function:
6548
6555
/// \code
6549
- /// void @.red_init(void* %arg) {
6556
+ /// void @.red_init(void* %arg, void* %orig ) {
6550
6557
/// %0 = bitcast void* %arg to <type>*
6551
6558
/// store <type> <init>, <type>* %0
6552
6559
/// ret void
@@ -6556,10 +6563,15 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6556
6563
SourceLocation Loc,
6557
6564
ReductionCodeGen &RCG, unsigned N) {
6558
6565
ASTContext &C = CGM.getContext();
6566
+ QualType VoidPtrTy = C.VoidPtrTy;
6567
+ VoidPtrTy.addRestrict();
6559
6568
FunctionArgList Args;
6560
- ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C. VoidPtrTy,
6569
+ ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
6561
6570
ImplicitParamDecl::Other);
6571
+ ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
6572
+ ImplicitParamDecl::Other);
6562
6573
Args.emplace_back(&Param);
6574
+ Args.emplace_back(&ParamOrig);
6563
6575
const auto &FnInfo =
6564
6576
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6565
6577
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
@@ -6584,28 +6596,25 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6584
6596
CGM.getContext().getSizeType(), Loc);
6585
6597
}
6586
6598
RCG.emitAggregateType(CGF, N, Size);
6587
- LValue SharedLVal ;
6599
+ LValue OrigLVal ;
6588
6600
// If initializer uses initializer from declare reduction construct, emit a
6589
6601
// pointer to the address of the original reduction item (reuired by reduction
6590
6602
// initializer)
6591
6603
if (RCG.usesReductionInitializer(N)) {
6592
- Address SharedAddr =
6593
- CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6594
- CGF, CGM.getContext().VoidPtrTy,
6595
- generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6604
+ Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
6596
6605
SharedAddr = CGF.EmitLoadOfPointer(
6597
6606
SharedAddr,
6598
6607
CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6599
- SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6608
+ OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6600
6609
} else {
6601
- SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6610
+ OrigLVal = CGF.MakeNaturalAlignAddrLValue(
6602
6611
llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6603
6612
CGM.getContext().VoidPtrTy);
6604
6613
}
6605
6614
// Emit the initializer:
6606
6615
// %0 = bitcast void* %arg to <type>*
6607
6616
// store <type> <init>, <type>* %0
6608
- RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal ,
6617
+ RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal ,
6609
6618
[](CodeGenFunction &) { return false; });
6610
6619
CGF.FinishFunction();
6611
6620
return Fn;
@@ -6745,18 +6754,20 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6745
6754
return nullptr;
6746
6755
6747
6756
// Build typedef struct:
6748
- // kmp_task_red_input {
6757
+ // kmp_taskred_input {
6749
6758
// void *reduce_shar; // shared reduction item
6759
+ // void *reduce_orig; // original reduction item used for initialization
6750
6760
// size_t reduce_size; // size of data item
6751
6761
// void *reduce_init; // data initialization routine
6752
6762
// void *reduce_fini; // data finalization routine
6753
6763
// void *reduce_comb; // data combiner routine
6754
6764
// kmp_task_red_flags_t flags; // flags for additional info from compiler
6755
- // } kmp_task_red_input_t ;
6765
+ // } kmp_taskred_input_t ;
6756
6766
ASTContext &C = CGM.getContext();
6757
- RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t ");
6767
+ RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t ");
6758
6768
RD->startDefinition();
6759
6769
const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6770
+ const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6760
6771
const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6761
6772
const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6762
6773
const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
@@ -6771,8 +6782,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6771
6782
RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6772
6783
// kmp_task_red_input_t .rd_input.[Size];
6773
6784
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6774
- ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies ,
6775
- Data.ReductionOps);
6785
+ ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionVars ,
6786
+ Data.ReductionCopies, Data. ReductionOps);
6776
6787
for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6777
6788
// kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6778
6789
llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
@@ -6784,20 +6795,24 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6784
6795
LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6785
6796
// ElemLVal.reduce_shar = &Shareds[Cnt];
6786
6797
LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6787
- RCG.emitSharedLValue (CGF, Cnt);
6798
+ RCG.emitSharedOrigLValue (CGF, Cnt);
6788
6799
llvm::Value *CastedShared =
6789
6800
CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6790
6801
CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6802
+ // ElemLVal.reduce_orig = &Origs[Cnt];
6803
+ LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6804
+ llvm::Value *CastedOrig =
6805
+ CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6806
+ CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6791
6807
RCG.emitAggregateType(CGF, Cnt);
6792
6808
llvm::Value *SizeValInChars;
6793
6809
llvm::Value *SizeVal;
6794
6810
std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6795
- // We use delayed creation/initialization for VLAs, array sections and
6796
- // custom reduction initializations. It is required because runtime does not
6797
- // provide the way to pass the sizes of VLAs/array sections to
6798
- // initializer/combiner/finalizer functions and does not pass the pointer to
6799
- // original reduction item to the initializer. Instead threadprivate global
6800
- // variables are used to store these values and use them in the functions.
6811
+ // We use delayed creation/initialization for VLAs and array sections. It is
6812
+ // required because runtime does not provide the way to pass the sizes of
6813
+ // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6814
+ // threadprivate global variables are used to store these values and use
6815
+ // them in the functions.
6801
6816
bool DelayedCreation = !!SizeVal;
6802
6817
SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6803
6818
/*isSigned=*/false);
@@ -6808,7 +6823,6 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6808
6823
llvm::Value *InitAddr =
6809
6824
CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6810
6825
CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6811
- DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6812
6826
// ElemLVal.reduce_fini = fini;
6813
6827
LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6814
6828
llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
@@ -6832,16 +6846,15 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6832
6846
CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6833
6847
FlagsLVal.getType());
6834
6848
}
6835
- // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6836
- // *data);
6849
+ // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6837
6850
llvm::Value *Args[] = {
6838
6851
CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6839
6852
/*isSigned=*/true),
6840
6853
llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6841
6854
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6842
6855
CGM.VoidPtrTy)};
6843
- return CGF.EmitRuntimeCall(
6844
- createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6856
+ return CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskred_init),
6857
+ Args);
6845
6858
}
6846
6859
6847
6860
void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
@@ -6859,16 +6872,6 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6859
6872
generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6860
6873
CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6861
6874
}
6862
- // Store address of the original reduction item if custom initializer is used.
6863
- if (RCG.usesReductionInitializer(N)) {
6864
- Address SharedAddr = getAddrOfArtificialThreadPrivate(
6865
- CGF, CGM.getContext().VoidPtrTy,
6866
- generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6867
- CGF.Builder.CreateStore(
6868
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6869
- RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6870
- SharedAddr, /*IsVolatile=*/false);
6871
- }
6872
6875
}
6873
6876
6874
6877
Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
0 commit comments