@@ -999,6 +999,118 @@ CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
999
999
return createCanonicalLoop (Builder.saveIP (), BodyGen, TripCount);
1000
1000
}
1001
1001
1002
+ // Returns an LLVM function to call for initializing loop bounds using OpenMP
1003
+ // static scheduling depending on `type`. Only i32 and i64 are supported by the
1004
+ // runtime. Always interpret integers as unsigned similarly to
1005
+ // CanonicalLoopInfo.
1006
+ static FunctionCallee getKmpcForStaticInitForType (Type *Ty, Module &M,
1007
+ OpenMPIRBuilder &OMPBuilder) {
1008
+ unsigned Bitwidth = Ty->getIntegerBitWidth ();
1009
+ if (Bitwidth == 32 )
1010
+ return OMPBuilder.getOrCreateRuntimeFunction (
1011
+ M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1012
+ if (Bitwidth == 64 )
1013
+ return OMPBuilder.getOrCreateRuntimeFunction (
1014
+ M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1015
+ llvm_unreachable (" unknown OpenMP loop iterator bitwidth" );
1016
+ }
1017
+
1018
+ // Sets the number of loop iterations to the given value. This value must be
1019
+ // valid in the condition block (i.e., defined in the preheader) and is
1020
+ // interpreted as an unsigned integer.
1021
+ void setCanonicalLoopTripCount (CanonicalLoopInfo *CLI, Value *TripCount) {
1022
+ Instruction *CmpI = &CLI->getCond ()->front ();
1023
+ assert (isa<CmpInst>(CmpI) && " First inst must compare IV with TripCount" );
1024
+ CmpI->setOperand (1 , TripCount);
1025
+ CLI->assertOK ();
1026
+ }
1027
+
1028
+ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop (
1029
+ const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1030
+ InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
1031
+ // Set up the source location value for OpenMP runtime.
1032
+ if (!updateToLocation (Loc))
1033
+ return nullptr ;
1034
+
1035
+ Constant *SrcLocStr = getOrCreateSrcLocStr (Loc);
1036
+ Value *SrcLoc = getOrCreateIdent (SrcLocStr);
1037
+
1038
+ // Declare useful OpenMP runtime functions.
1039
+ Value *IV = CLI->getIndVar ();
1040
+ Type *IVTy = IV->getType ();
1041
+ FunctionCallee StaticInit = getKmpcForStaticInitForType (IVTy, M, *this );
1042
+ FunctionCallee StaticFini =
1043
+ getOrCreateRuntimeFunction (M, omp::OMPRTL___kmpc_for_static_fini);
1044
+
1045
+ // Allocate space for computed loop bounds as expected by the "init" function.
1046
+ Builder.restoreIP (AllocaIP);
1047
+ Type *I32Type = Type::getInt32Ty (M.getContext ());
1048
+ Value *PLastIter = Builder.CreateAlloca (I32Type, nullptr , " p.lastiter" );
1049
+ Value *PLowerBound = Builder.CreateAlloca (IVTy, nullptr , " p.lowerbound" );
1050
+ Value *PUpperBound = Builder.CreateAlloca (IVTy, nullptr , " p.upperbound" );
1051
+ Value *PStride = Builder.CreateAlloca (IVTy, nullptr , " p.stride" );
1052
+
1053
+ // At the end of the preheader, prepare for calling the "init" function by
1054
+ // storing the current loop bounds into the allocated space. A canonical loop
1055
+ // always iterates from 0 to trip-count with step 1. Note that "init" expects
1056
+ // and produces an inclusive upper bound.
1057
+ Builder.SetInsertPoint (CLI->getPreheader ()->getTerminator ());
1058
+ Constant *Zero = ConstantInt::get (IVTy, 0 );
1059
+ Constant *One = ConstantInt::get (IVTy, 1 );
1060
+ Builder.CreateStore (Zero, PLowerBound);
1061
+ Value *UpperBound = Builder.CreateSub (CLI->getTripCount (), One);
1062
+ Builder.CreateStore (UpperBound, PUpperBound);
1063
+ Builder.CreateStore (One, PStride);
1064
+
1065
+ if (!Chunk)
1066
+ Chunk = One;
1067
+
1068
+ Value *ThreadNum = getOrCreateThreadID (SrcLoc);
1069
+
1070
+ // TODO: extract scheduling type and map it to OMP constant. This is curently
1071
+ // happening in kmp.h and its ilk and needs to be moved to OpenMP.td first.
1072
+ constexpr int StaticSchedType = 34 ;
1073
+ Constant *SchedulingType = ConstantInt::get (I32Type, StaticSchedType);
1074
+
1075
+ // Call the "init" function and update the trip count of the loop with the
1076
+ // value it produced.
1077
+ Builder.CreateCall (StaticInit,
1078
+ {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1079
+ PUpperBound, PStride, One, Chunk});
1080
+ Value *LowerBound = Builder.CreateLoad (PLowerBound);
1081
+ Value *InclusiveUpperBound = Builder.CreateLoad (PUpperBound);
1082
+ Value *TripCountMinusOne = Builder.CreateSub (InclusiveUpperBound, LowerBound);
1083
+ Value *TripCount = Builder.CreateAdd (TripCountMinusOne, One);
1084
+ setCanonicalLoopTripCount (CLI, TripCount);
1085
+
1086
+ // Update all uses of the induction variable except the one in the condition
1087
+ // block that compares it with the actual upper bound, and the increment in
1088
+ // the latch block.
1089
+ // TODO: this can eventually move to CanonicalLoopInfo or to a new
1090
+ // CanonicalLoopInfoUpdater interface.
1091
+ Builder.SetInsertPoint (CLI->getBody (), CLI->getBody ()->getFirstInsertionPt ());
1092
+ Value *UpdatedIV = Builder.CreateAdd (IV, LowerBound);
1093
+ IV->replaceUsesWithIf (UpdatedIV, [&](Use &U) {
1094
+ auto *Instr = dyn_cast<Instruction>(U.getUser ());
1095
+ return !Instr ||
1096
+ (Instr->getParent () != CLI->getCond () &&
1097
+ Instr->getParent () != CLI->getLatch () && Instr != UpdatedIV);
1098
+ });
1099
+
1100
+ // In the "exit" block, call the "fini" function.
1101
+ Builder.SetInsertPoint (CLI->getExit (),
1102
+ CLI->getExit ()->getTerminator ()->getIterator ());
1103
+ Builder.CreateCall (StaticFini, {SrcLoc, ThreadNum});
1104
+
1105
+ // Add the barrier if requested.
1106
+ if (NeedsBarrier)
1107
+ createBarrier (Loc, omp::Directive::OMPD_for, /* ForceSimpleCall */ false ,
1108
+ /* CheckCancelFlag */ false );
1109
+
1110
+ CLI->assertOK ();
1111
+ return CLI;
1112
+ }
1113
+
1002
1114
void CanonicalLoopInfo::eraseFromParent () {
1003
1115
assert (IsValid && " can only erase previously valid loop cfg" );
1004
1116
IsValid = false ;
0 commit comments