@@ -93,6 +93,57 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {
93
93
94
94
void OpenMPIRBuilder::initialize () { initializeTypes (M); }
95
95
96
+ void OpenMPIRBuilder::finalize () {
97
+ for (OutlineInfo &OI : OutlineInfos) {
98
+ assert (!OI.Blocks .empty () &&
99
+ " Outlined regions should have at least a single block!" );
100
+ BasicBlock *RegEntryBB = OI.Blocks .front ();
101
+ Function *OuterFn = RegEntryBB->getParent ();
102
+ CodeExtractorAnalysisCache CEAC (*OuterFn);
103
+ CodeExtractor Extractor (OI.Blocks , /* DominatorTree */ nullptr ,
104
+ /* AggregateArgs */ false ,
105
+ /* BlockFrequencyInfo */ nullptr ,
106
+ /* BranchProbabilityInfo */ nullptr ,
107
+ /* AssumptionCache */ nullptr ,
108
+ /* AllowVarArgs */ true ,
109
+ /* AllowAlloca */ true ,
110
+ /* Suffix */ " .omp_par" );
111
+
112
+ LLVM_DEBUG (dbgs () << " Before outlining: " << *OuterFn << " \n " );
113
+
114
+ Function *OutlinedFn = Extractor.extractCodeRegion (CEAC);
115
+
116
+ LLVM_DEBUG (dbgs () << " After outlining: " << *OuterFn << " \n " );
117
+ LLVM_DEBUG (dbgs () << " Outlined function: " << *OutlinedFn << " \n " );
118
+ assert (OutlinedFn->getReturnType ()->isVoidTy () &&
119
+ " OpenMP outlined functions should not return a value!" );
120
+
121
+ // For compability with the clang CG we move the outlined function after the
122
+ // one with the parallel region.
123
+ OutlinedFn->removeFromParent ();
124
+ M.getFunctionList ().insertAfter (OuterFn->getIterator (), OutlinedFn);
125
+
126
+ // Remove the artificial entry introduced by the extractor right away, we
127
+ // made our own entry block after all.
128
+ {
129
+ BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock ();
130
+ assert (ArtificialEntry.getUniqueSuccessor () == RegEntryBB);
131
+ assert (RegEntryBB->getUniquePredecessor () == &ArtificialEntry);
132
+ RegEntryBB->moveBefore (&ArtificialEntry);
133
+ ArtificialEntry.eraseFromParent ();
134
+ }
135
+ assert (&OutlinedFn->getEntryBlock () == RegEntryBB);
136
+ assert (OutlinedFn && OutlinedFn->getNumUses () == 1 );
137
+
138
+ // Run a user callback, e.g. to add attributes.
139
+ if (OI.PostOutlineCB )
140
+ OI.PostOutlineCB (*OutlinedFn);
141
+ }
142
+
143
+ // Allow finalize to be called multiple times.
144
+ OutlineInfos.clear ();
145
+ }
146
+
96
147
Value *OpenMPIRBuilder::getOrCreateIdent (Constant *SrcLocStr,
97
148
IdentFlag LocFlags) {
98
149
// Enable "C-mode".
@@ -415,32 +466,40 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
415
466
// PRegionExitBB <- A common exit to simplify block collection.
416
467
//
417
468
418
- LLVM_DEBUG (dbgs () << " Before body codegen: " << *UI-> getFunction () << " \n " );
469
+ LLVM_DEBUG (dbgs () << " Before body codegen: " << *OuterFn << " \n " );
419
470
420
471
// Let the caller create the body.
421
472
assert (BodyGenCB && " Expected body generation callback!" );
422
473
InsertPointTy CodeGenIP (PRegBodyBB, PRegBodyBB->begin ());
423
474
BodyGenCB (AllocaIP, CodeGenIP, *PRegPreFiniBB);
424
475
425
- LLVM_DEBUG (dbgs () << " After body codegen: " << *UI-> getFunction () << " \n " );
476
+ LLVM_DEBUG (dbgs () << " After body codegen: " << *OuterFn << " \n " );
426
477
478
+ OutlineInfo OI;
427
479
SmallPtrSet<BasicBlock *, 32 > ParallelRegionBlockSet;
428
- SmallVector<BasicBlock *, 32 > ParallelRegionBlocks, Worklist;
480
+ SmallVector<BasicBlock *, 32 > Worklist;
429
481
ParallelRegionBlockSet.insert (PRegEntryBB);
430
482
ParallelRegionBlockSet.insert (PRegExitBB);
431
483
432
484
// Collect all blocks in-between PRegEntryBB and PRegExitBB.
433
485
Worklist.push_back (PRegEntryBB);
434
486
while (!Worklist.empty ()) {
435
487
BasicBlock *BB = Worklist.pop_back_val ();
436
- ParallelRegionBlocks .push_back (BB);
488
+ OI. Blocks .push_back (BB);
437
489
for (BasicBlock *SuccBB : successors (BB))
438
490
if (ParallelRegionBlockSet.insert (SuccBB).second )
439
491
Worklist.push_back (SuccBB);
440
492
}
441
493
494
+ // Ensure a single exit node for the outlined region by creating one.
495
+ // We might have multiple incoming edges to the exit now due to finalizations,
496
+ // e.g., cancel calls that cause the control flow to leave the region.
497
+ BasicBlock *PRegOutlinedExitBB = PRegExitBB;
498
+ PRegExitBB = SplitBlock (PRegExitBB, &*PRegExitBB->getFirstInsertionPt ());
499
+ OI.Blocks .push_back (PRegOutlinedExitBB);
500
+
442
501
CodeExtractorAnalysisCache CEAC (*OuterFn);
443
- CodeExtractor Extractor (ParallelRegionBlocks , /* DominatorTree */ nullptr ,
502
+ CodeExtractor Extractor (OI. Blocks , /* DominatorTree */ nullptr ,
444
503
/* AggregateArgs */ false ,
445
504
/* BlockFrequencyInfo */ nullptr ,
446
505
/* BranchProbabilityInfo */ nullptr ,
@@ -455,7 +514,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
455
514
Extractor.findAllocas (CEAC, SinkingCands, HoistingCands, CommonExit);
456
515
Extractor.findInputsOutputs (Inputs, Outputs, SinkingCands);
457
516
458
- LLVM_DEBUG (dbgs () << " Before privatization: " << *UI-> getFunction () << " \n " );
517
+ LLVM_DEBUG (dbgs () << " Before privatization: " << *OuterFn << " \n " );
459
518
460
519
FunctionCallee TIDRTLFn =
461
520
getOrCreateRuntimeFunction (OMPRTL___kmpc_global_thread_num);
@@ -491,61 +550,15 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
491
550
LLVM_DEBUG (dbgs () << " Captured input: " << *Input << " \n " );
492
551
PrivHelper (*Input);
493
552
}
494
- for (Value *Output : Outputs) {
495
- LLVM_DEBUG (dbgs () << " Captured output: " << *Output << " \n " );
496
- PrivHelper (*Output);
497
- }
553
+ assert (Outputs.empty () &&
554
+ " OpenMP outlining should not produce live-out values!" );
498
555
499
- LLVM_DEBUG (dbgs () << " After privatization: " << *UI-> getFunction () << " \n " );
556
+ LLVM_DEBUG (dbgs () << " After privatization: " << *OuterFn << " \n " );
500
557
LLVM_DEBUG ({
501
- for (auto *BB : ParallelRegionBlocks )
558
+ for (auto *BB : OI. Blocks )
502
559
dbgs () << " PBR: " << BB->getName () << " \n " ;
503
560
});
504
561
505
- // Add some known attributes to the outlined function.
506
- Function *OutlinedFn = Extractor.extractCodeRegion (CEAC);
507
- OutlinedFn->addParamAttr (0 , Attribute::NoAlias);
508
- OutlinedFn->addParamAttr (1 , Attribute::NoAlias);
509
- OutlinedFn->addFnAttr (Attribute::NoUnwind);
510
- OutlinedFn->addFnAttr (Attribute::NoRecurse);
511
-
512
- LLVM_DEBUG (dbgs () << " After outlining: " << *UI->getFunction () << " \n " );
513
- LLVM_DEBUG (dbgs () << " Outlined function: " << *OutlinedFn << " \n " );
514
-
515
- // For compability with the clang CG we move the outlined function after the
516
- // one with the parallel region.
517
- OutlinedFn->removeFromParent ();
518
- M.getFunctionList ().insertAfter (OuterFn->getIterator (), OutlinedFn);
519
-
520
- // Remove the artificial entry introduced by the extractor right away, we
521
- // made our own entry block after all.
522
- {
523
- BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock ();
524
- assert (ArtificialEntry.getUniqueSuccessor () == PRegEntryBB);
525
- assert (PRegEntryBB->getUniquePredecessor () == &ArtificialEntry);
526
- PRegEntryBB->moveBefore (&ArtificialEntry);
527
- ArtificialEntry.eraseFromParent ();
528
- }
529
- LLVM_DEBUG (dbgs () << " PP Outlined function: " << *OutlinedFn << " \n " );
530
- assert (&OutlinedFn->getEntryBlock () == PRegEntryBB);
531
-
532
- assert (OutlinedFn && OutlinedFn->getNumUses () == 1 );
533
- assert (OutlinedFn->arg_size () >= 2 &&
534
- " Expected at least tid and bounded tid as arguments" );
535
- unsigned NumCapturedVars = OutlinedFn->arg_size () - /* tid & bounded tid */ 2 ;
536
-
537
- CallInst *CI = cast<CallInst>(OutlinedFn->user_back ());
538
- CI->getParent ()->setName (" omp_parallel" );
539
- Builder.SetInsertPoint (CI);
540
-
541
- // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
542
- Value *ForkCallArgs[] = {Ident, Builder.getInt32 (NumCapturedVars),
543
- Builder.CreateBitCast (OutlinedFn, ParallelTaskPtr)};
544
-
545
- SmallVector<Value *, 16 > RealArgs;
546
- RealArgs.append (std::begin (ForkCallArgs), std::end (ForkCallArgs));
547
- RealArgs.append (CI->arg_begin () + /* tid & bound tid */ 2 , CI->arg_end ());
548
-
549
562
FunctionCallee RTLFn = getOrCreateRuntimeFunction (OMPRTL___kmpc_fork_call);
550
563
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee ())) {
551
564
if (!F->hasMetadata (llvm::LLVMContext::MD_callback)) {
@@ -558,75 +571,105 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
558
571
// callback callee.
559
572
F->addMetadata (
560
573
llvm::LLVMContext::MD_callback,
561
- *llvm::MDNode::get (Ctx, {MDB. createCallbackEncoding (
562
- 2 , {-1 , -1 },
563
- /* VarArgsArePassed */ true )}));
574
+ *llvm::MDNode::get (
575
+ Ctx, {MDB. createCallbackEncoding ( 2 , {-1 , -1 },
576
+ /* VarArgsArePassed */ true )}));
564
577
}
565
578
}
566
579
567
- Builder.CreateCall (RTLFn, RealArgs);
580
+ OI.PostOutlineCB = [=](Function &OutlinedFn) {
581
+ // Add some known attributes.
582
+ OutlinedFn.addParamAttr (0 , Attribute::NoAlias);
583
+ OutlinedFn.addParamAttr (1 , Attribute::NoAlias);
584
+ OutlinedFn.addFnAttr (Attribute::NoUnwind);
585
+ OutlinedFn.addFnAttr (Attribute::NoRecurse);
568
586
569
- LLVM_DEBUG (dbgs () << " With fork_call placed: "
570
- << *Builder.GetInsertBlock ()->getParent () << " \n " );
587
+ assert (OutlinedFn.arg_size () >= 2 &&
588
+ " Expected at least tid and bounded tid as arguments" );
589
+ unsigned NumCapturedVars =
590
+ OutlinedFn.arg_size () - /* tid & bounded tid */ 2 ;
571
591
572
- InsertPointTy AfterIP (UI-> getParent (), UI-> getParent ()-> end ());
573
- InsertPointTy ExitIP (PRegExitBB, PRegExitBB-> end () );
574
- UI-> eraseFromParent ( );
592
+ CallInst *CI = cast<CallInst>(OutlinedFn. user_back ());
593
+ CI-> getParent ()-> setName ( " omp_parallel " );
594
+ Builder. SetInsertPoint (CI );
575
595
576
- // Initialize the local TID stack location with the argument value.
577
- Builder. SetInsertPoint (PrivTID);
578
- Function::arg_iterator OutlinedAI = OutlinedFn-> arg_begin ();
579
- Builder.CreateStore (Builder. CreateLoad (OutlinedAI), PrivTIDAddr) ;
596
+ // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
597
+ Value *ForkCallArgs[] = {
598
+ Ident, Builder. getInt32 (NumCapturedVars),
599
+ Builder.CreateBitCast (&OutlinedFn, ParallelTaskPtr)} ;
580
600
581
- // If no "if" clause was present we do not need the call created during
582
- // outlining, otherwise we reuse it in the serialized parallel region.
583
- if (!ElseTI) {
584
- CI->eraseFromParent ();
585
- } else {
601
+ SmallVector<Value *, 16 > RealArgs;
602
+ RealArgs.append (std::begin (ForkCallArgs), std::end (ForkCallArgs));
603
+ RealArgs.append (CI->arg_begin () + /* tid & bound tid */ 2 , CI->arg_end ());
586
604
587
- // If an "if" clause was present we are now generating the serialized
588
- // version into the "else" branch.
589
- Builder.SetInsertPoint (ElseTI);
605
+ Builder.CreateCall (RTLFn, RealArgs);
590
606
591
- // Build calls __kmpc_serialized_parallel(&Ident, GTid);
592
- Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
593
- Builder.CreateCall (
594
- getOrCreateRuntimeFunction (OMPRTL___kmpc_serialized_parallel),
595
- SerializedParallelCallArgs);
607
+ LLVM_DEBUG (dbgs () << " With fork_call placed: "
608
+ << *Builder.GetInsertBlock ()->getParent () << " \n " );
596
609
597
- // OutlinedFn(>id, &zero, CapturedStruct);
598
- CI->removeFromParent ();
599
- Builder.Insert (CI);
610
+ InsertPointTy ExitIP (PRegExitBB, PRegExitBB->end ());
600
611
601
- // __kmpc_end_serialized_parallel(&Ident, GTid);
602
- Value *EndArgs[] = {Ident, ThreadID};
603
- Builder.CreateCall (
604
- getOrCreateRuntimeFunction (OMPRTL___kmpc_end_serialized_parallel),
605
- EndArgs);
612
+ // Initialize the local TID stack location with the argument value.
613
+ Builder.SetInsertPoint (PrivTID);
614
+ Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin ();
615
+ Builder.CreateStore (Builder.CreateLoad (OutlinedAI), PrivTIDAddr);
606
616
607
- LLVM_DEBUG (dbgs () << " With serialized parallel region: "
608
- << *Builder.GetInsertBlock ()->getParent () << " \n " );
609
- }
617
+ // If no "if" clause was present we do not need the call created during
618
+ // outlining, otherwise we reuse it in the serialized parallel region.
619
+ if (!ElseTI) {
620
+ CI->eraseFromParent ();
621
+ } else {
622
+
623
+ // If an "if" clause was present we are now generating the serialized
624
+ // version into the "else" branch.
625
+ Builder.SetInsertPoint (ElseTI);
626
+
627
+ // Build calls __kmpc_serialized_parallel(&Ident, GTid);
628
+ Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
629
+ Builder.CreateCall (
630
+ getOrCreateRuntimeFunction (OMPRTL___kmpc_serialized_parallel),
631
+ SerializedParallelCallArgs);
632
+
633
+ // OutlinedFn(>id, &zero, CapturedStruct);
634
+ CI->removeFromParent ();
635
+ Builder.Insert (CI);
636
+
637
+ // __kmpc_end_serialized_parallel(&Ident, GTid);
638
+ Value *EndArgs[] = {Ident, ThreadID};
639
+ Builder.CreateCall (
640
+ getOrCreateRuntimeFunction (OMPRTL___kmpc_end_serialized_parallel),
641
+ EndArgs);
642
+
643
+ LLVM_DEBUG (dbgs () << " With serialized parallel region: "
644
+ << *Builder.GetInsertBlock ()->getParent () << " \n " );
645
+ }
646
+
647
+ for (Instruction *I : ToBeDeleted)
648
+ I->eraseFromParent ();
649
+ };
610
650
611
651
// Adjust the finalization stack, verify the adjustment, and call the
612
- // finalize function a last time to finalize values between the pre-fini block
613
- // and the exit block if we left the parallel "the normal way".
652
+ // finalize function a last time to finalize values between the pre-fini
653
+ // block and the exit block if we left the parallel "the normal way".
614
654
auto FiniInfo = FinalizationStack.pop_back_val ();
615
655
(void )FiniInfo;
616
656
assert (FiniInfo.DK == OMPD_parallel &&
617
657
" Unexpected finalization stack state!" );
618
658
619
- Instruction *PreFiniTI = PRegPreFiniBB->getTerminator ();
620
- assert (PreFiniTI->getNumSuccessors () == 1 &&
621
- PreFiniTI->getSuccessor (0 )->size () == 1 &&
622
- isa<ReturnInst>(PreFiniTI->getSuccessor (0 )->getTerminator ()) &&
659
+ Instruction *PRegOutlinedExitTI = PRegOutlinedExitBB->getTerminator ();
660
+ assert (PRegOutlinedExitTI->getNumSuccessors () == 1 &&
661
+ PRegOutlinedExitTI->getSuccessor (0 ) == PRegExitBB &&
623
662
" Unexpected CFG structure!" );
624
663
625
- InsertPointTy PreFiniIP (PRegPreFiniBB, PreFiniTI->getIterator ());
664
+ InsertPointTy PreFiniIP (PRegOutlinedExitBB,
665
+ PRegOutlinedExitTI->getIterator ());
626
666
FiniCB (PreFiniIP);
627
667
628
- for (Instruction *I : ToBeDeleted)
629
- I->eraseFromParent ();
668
+ InsertPointTy AfterIP (UI->getParent (), UI->getParent ()->end ());
669
+ UI->eraseFromParent ();
670
+
671
+ // Register the outlined info.
672
+ addOutlineInfo (std::move (OI));
630
673
631
674
return AfterIP;
632
675
}
0 commit comments