@@ -2679,27 +2679,27 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
2679
2679
// Always interpret integers as unsigned similarly to CanonicalLoopInfo.
2680
2680
static FunctionCallee
2681
2681
getKmpcForStaticLoopForType (Type *Ty, OpenMPIRBuilder *OMPBuilder,
2682
- OpenMPIRBuilder:: WorksharingLoopType LoopType) {
2682
+ WorksharingLoopType LoopType) {
2683
2683
unsigned Bitwidth = Ty->getIntegerBitWidth ();
2684
2684
Module &M = OMPBuilder->M ;
2685
2685
switch (LoopType) {
2686
- case OpenMPIRBuilder:: WorksharingLoopType::ForStaticLoop:
2686
+ case WorksharingLoopType::ForStaticLoop:
2687
2687
if (Bitwidth == 32 )
2688
2688
return OMPBuilder->getOrCreateRuntimeFunction (
2689
2689
M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
2690
2690
if (Bitwidth == 64 )
2691
2691
return OMPBuilder->getOrCreateRuntimeFunction (
2692
2692
M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
2693
2693
break ;
2694
- case OpenMPIRBuilder:: WorksharingLoopType::DistributeStaticLoop:
2694
+ case WorksharingLoopType::DistributeStaticLoop:
2695
2695
if (Bitwidth == 32 )
2696
2696
return OMPBuilder->getOrCreateRuntimeFunction (
2697
2697
M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
2698
2698
if (Bitwidth == 64 )
2699
2699
return OMPBuilder->getOrCreateRuntimeFunction (
2700
2700
M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
2701
2701
break ;
2702
- case OpenMPIRBuilder:: WorksharingLoopType::DistributeForStaticLoop:
2702
+ case WorksharingLoopType::DistributeForStaticLoop:
2703
2703
if (Bitwidth == 32 )
2704
2704
return OMPBuilder->getOrCreateRuntimeFunction (
2705
2705
M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
@@ -2708,15 +2708,16 @@ getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder,
2708
2708
M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
2709
2709
break ;
2710
2710
}
2711
- if (Bitwidth != 32 && Bitwidth != 64 )
2712
- llvm_unreachable (" unknown OpenMP loop iterator bitwidth" );
2713
- return FunctionCallee ();
2711
+ if (Bitwidth != 32 && Bitwidth != 64 ) {
2712
+ llvm_unreachable (" Unknown OpenMP loop iterator bitwidth" );
2713
+ }
2714
+ llvm_unreachable (" Unknown type of OpenMP worksharing loop" );
2714
2715
}
2715
2716
2716
2717
// Inserts a call to proper OpenMP Device RTL function which handles
2717
2718
// loop worksharing.
2718
2719
static void createTargetLoopWorkshareCall (
2719
- OpenMPIRBuilder *OMPBuilder, OpenMPIRBuilder:: WorksharingLoopType LoopType,
2720
+ OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType,
2720
2721
BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg,
2721
2722
Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn) {
2722
2723
Type *TripCountTy = TripCount->getType ();
@@ -2726,16 +2727,11 @@ static void createTargetLoopWorkshareCall(
2726
2727
getKmpcForStaticLoopForType (TripCountTy, OMPBuilder, LoopType);
2727
2728
SmallVector<Value *, 8 > RealArgs;
2728
2729
RealArgs.push_back (Ident);
2729
- /* loop body func*/
2730
2730
RealArgs.push_back (Builder.CreateBitCast (&LoopBodyFn, ParallelTaskPtr));
2731
- /* loop body args*/
2732
2731
RealArgs.push_back (LoopBodyArg);
2733
- /* num of iters*/
2734
2732
RealArgs.push_back (TripCount);
2735
- if (LoopType == OpenMPIRBuilder::WorksharingLoopType::DistributeStaticLoop) {
2736
- /* block chunk*/ RealArgs.push_back (TripCountTy->getIntegerBitWidth () == 32
2737
- ? Builder.getInt32 (0 )
2738
- : Builder.getInt64 (0 ));
2733
+ if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
2734
+ RealArgs.push_back (ConstantInt::get (TripCountTy, 0 ));
2739
2735
Builder.CreateCall (RTLFn, RealArgs);
2740
2736
return ;
2741
2737
}
@@ -2744,17 +2740,12 @@ static void createTargetLoopWorkshareCall(
2744
2740
Builder.restoreIP ({InsertBlock, std::prev (InsertBlock->end ())});
2745
2741
Value *NumThreads = Builder.CreateCall (RTLNumThreads, {});
2746
2742
2747
- /* num of threads */ RealArgs.push_back (
2743
+ RealArgs.push_back (
2748
2744
Builder.CreateZExtOrTrunc (NumThreads, TripCountTy, " num.threads.cast" ));
2749
- if (LoopType ==
2750
- OpenMPIRBuilder::WorksharingLoopType::DistributeForStaticLoop) {
2751
- /* block chunk*/ RealArgs.push_back (TripCountTy->getIntegerBitWidth () == 32
2752
- ? Builder.getInt32 (0 )
2753
- : Builder.getInt64 (0 ));
2745
+ RealArgs.push_back (ConstantInt::get (TripCountTy, 0 ));
2746
+ if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
2747
+ RealArgs.push_back (ConstantInt::get (TripCountTy, 0 ));
2754
2748
}
2755
- /* thread chunk */ RealArgs.push_back (TripCountTy->getIntegerBitWidth () == 32
2756
- ? Builder.getInt32 (1 )
2757
- : Builder.getInt64 (1 ));
2758
2749
2759
2750
Builder.CreateCall (RTLFn, RealArgs);
2760
2751
}
@@ -2764,7 +2755,7 @@ workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder,
2764
2755
CanonicalLoopInfo *CLI, Value *Ident,
2765
2756
Function &OutlinedFn, Type *ParallelTaskPtr,
2766
2757
const SmallVector<Instruction *, 4 > &ToBeDeleted,
2767
- OpenMPIRBuilder:: WorksharingLoopType LoopType) {
2758
+ WorksharingLoopType LoopType) {
2768
2759
IRBuilder<> &Builder = OMPIRBuilder->Builder ;
2769
2760
BasicBlock *Preheader = CLI->getPreheader ();
2770
2761
Value *TripCount = CLI->getTripCount ();
@@ -2795,19 +2786,19 @@ workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder,
2795
2786
// Find the instruction which corresponds to loop body argument structure
2796
2787
// and remove the call to loop body function instruction.
2797
2788
Value *LoopBodyArg;
2798
- for ( auto instIt = Preheader-> begin (); instIt != Preheader-> end (); ++instIt) {
2799
- if (CallInst *CallInstruction = dyn_cast<CallInst>(instIt)) {
2800
- if (CallInstruction-> getCalledFunction () == &OutlinedFn) {
2801
- // Check in case no argument structure has been passed.
2802
- if (CallInstruction-> arg_size () > 1 )
2803
- LoopBodyArg = CallInstruction-> getArgOperand ( 1 );
2804
- else
2805
- LoopBodyArg = Constant::getNullValue (Builder. getPtrTy ());
2806
- CallInstruction-> eraseFromParent ();
2807
- break ;
2808
- }
2809
- }
2810
- }
2789
+ User *OutlinedFnUser = OutlinedFn. getUniqueUndroppableUser ();
2790
+ assert (OutlinedFnUser &&
2791
+ " Expected unique undroppable user of outlined function " );
2792
+ CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
2793
+ assert (OutlinedFnCallInstruction && " Expected outlined function call " );
2794
+ assert ((OutlinedFnCallInstruction-> getParent () == Preheader) &&
2795
+ " Expected outlined function call to be located in loop preheader " );
2796
+ // Check in case no argument structure has been passed.
2797
+ if (OutlinedFnCallInstruction-> arg_size () > 1 )
2798
+ LoopBodyArg = OutlinedFnCallInstruction-> getArgOperand ( 1 ) ;
2799
+ else
2800
+ LoopBodyArg = Constant::getNullValue (Builder. getPtrTy ());
2801
+ OutlinedFnCallInstruction-> eraseFromParent ();
2811
2802
2812
2803
createTargetLoopWorkshareCall (OMPIRBuilder, LoopType, Preheader, Ident,
2813
2804
LoopBodyArg, ParallelTaskPtr, TripCount,
@@ -2818,9 +2809,10 @@ workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder,
2818
2809
CLI->invalidate ();
2819
2810
}
2820
2811
2821
- OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget (
2822
- DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
2823
- OpenMPIRBuilder::WorksharingLoopType LoopType) {
2812
+ OpenMPIRBuilder::InsertPointTy
2813
+ OpenMPIRBuilder::applyWorkshareLoopTarget (DebugLoc DL, CanonicalLoopInfo *CLI,
2814
+ InsertPointTy AllocaIP,
2815
+ WorksharingLoopType LoopType) {
2824
2816
uint32_t SrcLocStrSize;
2825
2817
Constant *SrcLocStr = getOrCreateSrcLocStr (DL, SrcLocStrSize);
2826
2818
Value *Ident = getOrCreateIdent (SrcLocStr, SrcLocStrSize);
@@ -2844,14 +2836,14 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
2844
2836
2845
2837
// Insert new loop counter variable which will be used only in loop
2846
2838
// body.
2847
- AllocaInst *newLoopCnt = Builder.CreateAlloca (CLI->getIndVarType (), 0 , " " );
2848
- Instruction *newLoopCntLoad =
2849
- Builder.CreateLoad (CLI->getIndVarType (), newLoopCnt );
2839
+ AllocaInst *NewLoopCnt = Builder.CreateAlloca (CLI->getIndVarType (), 0 , " " );
2840
+ Instruction *NewLoopCntLoad =
2841
+ Builder.CreateLoad (CLI->getIndVarType (), NewLoopCnt );
2850
2842
// New loop counter instructions are redundant in the loop preheader when
2851
2843
// code generation for workshare loop is finshed. That's why mark them as
2852
2844
// ready for deletion.
2853
- ToBeDeleted.push_back (newLoopCntLoad );
2854
- ToBeDeleted.push_back (newLoopCnt );
2845
+ ToBeDeleted.push_back (NewLoopCntLoad );
2846
+ ToBeDeleted.push_back (NewLoopCnt );
2855
2847
2856
2848
// Analyse loop body region. Find all input variables which are used inside
2857
2849
// loop body region.
@@ -2884,22 +2876,17 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
2884
2876
// We need to model loop body region as the function f(cnt, loop_arg).
2885
2877
// That's why we replace loop induction variable by the new counter
2886
2878
// which will be one of loop body function argument
2887
- std::vector<User *> Users (CLI->getIndVar ()->user_begin (),
2888
- CLI->getIndVar ()->user_end ());
2889
- for (User *use : Users) {
2890
- if (Instruction *inst = dyn_cast<Instruction>(use)) {
2891
- if (ParallelRegionBlockSet.count (inst->getParent ())) {
2892
- inst->replaceUsesOfWith (CLI->getIndVar (), newLoopCntLoad);
2879
+ for (auto Use = CLI->getIndVar ()->user_begin ();
2880
+ Use != CLI->getIndVar ()->user_end (); ++Use) {
2881
+ if (Instruction *Inst = dyn_cast<Instruction>(*Use)) {
2882
+ if (ParallelRegionBlockSet.count (Inst->getParent ())) {
2883
+ Inst->replaceUsesOfWith (CLI->getIndVar (), NewLoopCntLoad);
2893
2884
}
2894
2885
}
2895
2886
}
2896
- Extractor.findInputsOutputs (Inputs, Outputs, SinkingCands);
2897
- for (Value *Input : Inputs) {
2898
- // Make sure that loop counter variable is not merged into loop body
2899
- // function argument structure and it is passed as separate variable
2900
- if (Input == newLoopCntLoad)
2901
- OI.ExcludeArgsFromAggregate .push_back (Input);
2902
- }
2887
+ // Make sure that loop counter variable is not merged into loop body
2888
+ // function argument structure and it is passed as separate variable
2889
+ OI.ExcludeArgsFromAggregate .push_back (NewLoopCntLoad);
2903
2890
2904
2891
// PostOutline CB is invoked when loop body function is outlined and
2905
2892
// loop body is replaced by call to outlined function. We need to add
@@ -2920,7 +2907,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop(
2920
2907
bool NeedsBarrier, omp::ScheduleKind SchedKind, Value *ChunkSize,
2921
2908
bool HasSimdModifier, bool HasMonotonicModifier,
2922
2909
bool HasNonmonotonicModifier, bool HasOrderedClause,
2923
- OpenMPIRBuilder:: WorksharingLoopType LoopType) {
2910
+ WorksharingLoopType LoopType) {
2924
2911
if (Config.isTargetDevice ())
2925
2912
return applyWorkshareLoopTarget (DL, CLI, AllocaIP, LoopType);
2926
2913
OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType (
0 commit comments