@@ -4130,6 +4130,23 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
4130
4130
return createCanonicalLoop (LoopLoc, BodyGen, TripCount, Name);
4131
4131
}
4132
4132
4133
+ // Returns an LLVM function to call for initializing loop bounds using OpenMP
4134
+ // static scheduling for composite `distribute parallel for` depending on
4135
+ // `type`. Only i32 and i64 are supported by the runtime. Always interpret
4136
+ // integers as unsigned similarly to CanonicalLoopInfo.
4137
+ static FunctionCallee
4138
+ getKmpcDistForStaticInitForType (Type *Ty, Module &M,
4139
+ OpenMPIRBuilder &OMPBuilder) {
4140
+ unsigned Bitwidth = Ty->getIntegerBitWidth ();
4141
+ if (Bitwidth == 32 )
4142
+ return OMPBuilder.getOrCreateRuntimeFunction (
4143
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4144
+ if (Bitwidth == 64 )
4145
+ return OMPBuilder.getOrCreateRuntimeFunction (
4146
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4147
+ llvm_unreachable (" unknown OpenMP loop iterator bitwidth" );
4148
+ }
4149
+
4133
4150
// Returns an LLVM function to call for initializing loop bounds using OpenMP
4134
4151
// static scheduling depending on `type`. Only i32 and i64 are supported by the
4135
4152
// runtime. Always interpret integers as unsigned similarly to
@@ -4164,7 +4181,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4164
4181
// Declare useful OpenMP runtime functions.
4165
4182
Value *IV = CLI->getIndVar ();
4166
4183
Type *IVTy = IV->getType ();
4167
- FunctionCallee StaticInit = getKmpcForStaticInitForType (IVTy, M, *this );
4184
+ FunctionCallee StaticInit =
4185
+ LoopType == WorksharingLoopType::DistributeForStaticLoop
4186
+ ? getKmpcDistForStaticInitForType (IVTy, M, *this )
4187
+ : getKmpcForStaticInitForType (IVTy, M, *this );
4168
4188
FunctionCallee StaticFini =
4169
4189
getOrCreateRuntimeFunction (M, omp::OMPRTL___kmpc_for_static_fini);
4170
4190
@@ -4200,9 +4220,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4200
4220
4201
4221
// Call the "init" function and update the trip count of the loop with the
4202
4222
// value it produced.
4203
- Builder.CreateCall (StaticInit,
4204
- {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4205
- PUpperBound, PStride, One, Zero});
4223
+ SmallVector<Value *, 10 > Args (
4224
+ {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4225
+ if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4226
+ Value *PDistUpperBound =
4227
+ Builder.CreateAlloca (IVTy, nullptr , " p.distupperbound" );
4228
+ Args.push_back (PDistUpperBound);
4229
+ }
4230
+ Args.append ({PStride, One, Zero});
4231
+ Builder.CreateCall (StaticInit, Args);
4206
4232
Value *LowerBound = Builder.CreateLoad (IVTy, PLowerBound);
4207
4233
Value *InclusiveUpperBound = Builder.CreateLoad (IVTy, PUpperBound);
4208
4234
Value *TripCountMinusOne = Builder.CreateSub (InclusiveUpperBound, LowerBound);
0 commit comments