Skip to content

Commit cb7ae2d

Browse files
committed
[OpenMPIRBuilder] Add support for distribute-parallel-for/do constructs
This patch adds codegen for `kmpc_dist_for_static_init` runtime calls, used to support worksharing a single loop across teams and threads. This can be used to implement `distribute parallel for/do` support.
1 parent 8ecbf35 commit cb7ae2d

File tree

1 file changed

+30
-4
lines changed

1 file changed

+30
-4
lines changed

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4130,6 +4130,23 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
41304130
return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
41314131
}
41324132

4133+
// Returns an LLVM function to call for initializing loop bounds using OpenMP
4134+
// static scheduling for composite `distribute parallel for` depending on
4135+
// `type`. Only i32 and i64 are supported by the runtime. Always interpret
4136+
// integers as unsigned similarly to CanonicalLoopInfo.
4137+
static FunctionCallee
4138+
getKmpcDistForStaticInitForType(Type *Ty, Module &M,
4139+
OpenMPIRBuilder &OMPBuilder) {
4140+
unsigned Bitwidth = Ty->getIntegerBitWidth();
4141+
if (Bitwidth == 32)
4142+
return OMPBuilder.getOrCreateRuntimeFunction(
4143+
M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4144+
if (Bitwidth == 64)
4145+
return OMPBuilder.getOrCreateRuntimeFunction(
4146+
M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4147+
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
4148+
}
4149+
41334150
// Returns an LLVM function to call for initializing loop bounds using OpenMP
41344151
// static scheduling depending on `type`. Only i32 and i64 are supported by the
41354152
// runtime. Always interpret integers as unsigned similarly to
@@ -4164,7 +4181,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
41644181
// Declare useful OpenMP runtime functions.
41654182
Value *IV = CLI->getIndVar();
41664183
Type *IVTy = IV->getType();
4167-
FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
4184+
FunctionCallee StaticInit =
4185+
LoopType == WorksharingLoopType::DistributeForStaticLoop
4186+
? getKmpcDistForStaticInitForType(IVTy, M, *this)
4187+
: getKmpcForStaticInitForType(IVTy, M, *this);
41684188
FunctionCallee StaticFini =
41694189
getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
41704190

@@ -4200,9 +4220,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
42004220

42014221
// Call the "init" function and update the trip count of the loop with the
42024222
// value it produced.
4203-
Builder.CreateCall(StaticInit,
4204-
{SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4205-
PUpperBound, PStride, One, Zero});
4223+
SmallVector<Value *, 10> Args(
4224+
{SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4225+
if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4226+
Value *PDistUpperBound =
4227+
Builder.CreateAlloca(IVTy, nullptr, "p.distupperbound");
4228+
Args.push_back(PDistUpperBound);
4229+
}
4230+
Args.append({PStride, One, Zero});
4231+
Builder.CreateCall(StaticInit, Args);
42064232
Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
42074233
Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
42084234
Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);

0 commit comments

Comments
 (0)