Skip to content

Commit 01a9b40

Browse files
jdoerfertronlieb
authored andcommitted
[OpenMP][FIX] Ensure device reduction geps work for multi-var reductions
previous patch: Split the reduction buffer size into two components needs current one to pass 552.pep and 452.ep If we have more than one reduction variable we need to be consistent wrt. indexing. In 3de645e we broke this as the buffer type was reduced to a singleton but the index computation was not adjusted to account for that offset. This fixes it by interleaving the reduction variables properly in a array-of-struct style. We can revert it back to struct-of-array in a follow up if turns out to be a problem. I doubt it since half the accesses should benefit from the locallity this layout offers and only the other half were consecutive before. Change-Id: I6866a8422f87dc1fdc5d71db426d3a2a5912a2c3
1 parent a4a5bbf commit 01a9b40

File tree

5 files changed

+237
-221
lines changed

5 files changed

+237
-221
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,11 @@ static RecordDecl *buildRecordForGlobalizedVars(
158158
Field->addAttr(*I);
159159
}
160160
} else {
161-
llvm::APInt ArraySize(32, BufSize);
162-
Type = C.getConstantArrayType(Type, ArraySize, nullptr,
163-
ArraySizeModifier::Normal, 0);
161+
if (BufSize > 1) {
162+
llvm::APInt ArraySize(32, BufSize);
163+
Type = C.getConstantArrayType(Type, ArraySize, nullptr,
164+
ArraySizeModifier::Normal, 0);
165+
}
164166
Field = FieldDecl::Create(
165167
C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
166168
C.getTrivialTypeSourceInfo(Type, SourceLocation()),
@@ -2400,8 +2402,7 @@ static llvm::Value *emitListToGlobalCopyFunction(
24002402
llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
24012403
CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
24022404
LLVMReductionsBufferTy->getPointerTo());
2403-
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
2404-
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
2405+
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
24052406
/*Volatile=*/false, C.IntTy,
24062407
Loc)};
24072408
unsigned Idx = 0;
@@ -2419,12 +2420,12 @@ static llvm::Value *emitListToGlobalCopyFunction(
24192420
const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
24202421
// Global = Buffer.VD[Idx];
24212422
const FieldDecl *FD = VarFieldMap.lookup(VD);
2423+
llvm::Value *BufferPtr =
2424+
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
24222425
LValue GlobLVal = CGF.EmitLValueForField(
2423-
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
2426+
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
24242427
Address GlobAddr = GlobLVal.getAddress(CGF);
2425-
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(),
2426-
GlobAddr.getPointer(), Idxs);
2427-
GlobLVal.setAddress(Address(BufferPtr,
2428+
GlobLVal.setAddress(Address(GlobAddr.getPointer(),
24282429
CGF.ConvertTypeForMem(Private->getType()),
24292430
GlobAddr.getAlignment()));
24302431
switch (CGF.getEvaluationKind(Private->getType())) {
@@ -2511,8 +2512,7 @@ static llvm::Value *emitListToGlobalReduceFunction(
25112512
Address ReductionList =
25122513
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
25132514
auto IPriv = Privates.begin();
2514-
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
2515-
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
2515+
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
25162516
/*Volatile=*/false, C.IntTy,
25172517
Loc)};
25182518
unsigned Idx = 0;
@@ -2521,12 +2521,13 @@ static llvm::Value *emitListToGlobalReduceFunction(
25212521
// Global = Buffer.VD[Idx];
25222522
const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
25232523
const FieldDecl *FD = VarFieldMap.lookup(VD);
2524+
llvm::Value *BufferPtr =
2525+
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
25242526
LValue GlobLVal = CGF.EmitLValueForField(
2525-
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
2527+
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
25262528
Address GlobAddr = GlobLVal.getAddress(CGF);
2527-
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
2528-
GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs);
2529-
CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy);
2529+
CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false,
2530+
C.VoidPtrTy);
25302531
if ((*IPriv)->getType()->isVariablyModifiedType()) {
25312532
// Store array size.
25322533
++Idx;
@@ -2608,8 +2609,7 @@ static llvm::Value *emitGlobalToListCopyFunction(
26082609
CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
26092610
LLVMReductionsBufferTy->getPointerTo());
26102611

2611-
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
2612-
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
2612+
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
26132613
/*Volatile=*/false, C.IntTy,
26142614
Loc)};
26152615
unsigned Idx = 0;
@@ -2627,12 +2627,12 @@ static llvm::Value *emitGlobalToListCopyFunction(
26272627
const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
26282628
// Global = Buffer.VD[Idx];
26292629
const FieldDecl *FD = VarFieldMap.lookup(VD);
2630+
llvm::Value *BufferPtr =
2631+
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
26302632
LValue GlobLVal = CGF.EmitLValueForField(
2631-
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
2633+
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
26322634
Address GlobAddr = GlobLVal.getAddress(CGF);
2633-
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(),
2634-
GlobAddr.getPointer(), Idxs);
2635-
GlobLVal.setAddress(Address(BufferPtr,
2635+
GlobLVal.setAddress(Address(GlobAddr.getPointer(),
26362636
CGF.ConvertTypeForMem(Private->getType()),
26372637
GlobAddr.getAlignment()));
26382638
switch (CGF.getEvaluationKind(Private->getType())) {
@@ -2719,8 +2719,7 @@ static llvm::Value *emitGlobalToListReduceFunction(
27192719
Address ReductionList =
27202720
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
27212721
auto IPriv = Privates.begin();
2722-
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
2723-
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
2722+
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
27242723
/*Volatile=*/false, C.IntTy,
27252724
Loc)};
27262725
unsigned Idx = 0;
@@ -2729,12 +2728,13 @@ static llvm::Value *emitGlobalToListReduceFunction(
27292728
// Global = Buffer.VD[Idx];
27302729
const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
27312730
const FieldDecl *FD = VarFieldMap.lookup(VD);
2731+
llvm::Value *BufferPtr =
2732+
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
27322733
LValue GlobLVal = CGF.EmitLValueForField(
2733-
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
2734+
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
27342735
Address GlobAddr = GlobLVal.getAddress(CGF);
2735-
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
2736-
GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs);
2737-
CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy);
2736+
CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false,
2737+
C.VoidPtrTy);
27382738
if ((*IPriv)->getType()->isVariablyModifiedType()) {
27392739
// Store array size.
27402740
++Idx;

0 commit comments

Comments
 (0)