@@ -93,6 +93,55 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {
93
93
94
94
void OpenMPIRBuilder::initialize () { initializeTypes (M); }
95
95
96
+ void OpenMPIRBuilder::finalize () {
97
+ for (OutlineInfo &OI : OutlineInfos) {
98
+ assert (!OI.Blocks .empty () &&
99
+ " Outlined regions should have at least a single block!" );
100
+ BasicBlock *RegEntryBB = OI.Blocks .front ();
101
+ Function *OuterFn = RegEntryBB->getParent ();
102
+ CodeExtractorAnalysisCache CEAC (*OuterFn);
103
+ CodeExtractor Extractor (OI.Blocks , /* DominatorTree */ nullptr ,
104
+ /* AggregateArgs */ false ,
105
+ /* BlockFrequencyInfo */ nullptr ,
106
+ /* BranchProbabilityInfo */ nullptr ,
107
+ /* AssumptionCache */ nullptr ,
108
+ /* AllowVarArgs */ true ,
109
+ /* AllowAlloca */ true ,
110
+ /* Suffix */ " .omp_par" );
111
+
112
+ LLVM_DEBUG (dbgs () << " Before outlining: " << *OuterFn << " \n " );
113
+
114
+ Function *OutlinedFn = Extractor.extractCodeRegion (CEAC);
115
+
116
+ LLVM_DEBUG (dbgs () << " After outlining: " << *OuterFn << " \n " );
117
+ LLVM_DEBUG (dbgs () << " Outlined function: " << *OutlinedFn << " \n " );
118
+
119
+ // For compability with the clang CG we move the outlined function after the
120
+ // one with the parallel region.
121
+ OutlinedFn->removeFromParent ();
122
+ M.getFunctionList ().insertAfter (OuterFn->getIterator (), OutlinedFn);
123
+
124
+ // Remove the artificial entry introduced by the extractor right away, we
125
+ // made our own entry block after all.
126
+ {
127
+ BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock ();
128
+ assert (ArtificialEntry.getUniqueSuccessor () == RegEntryBB);
129
+ assert (RegEntryBB->getUniquePredecessor () == &ArtificialEntry);
130
+ RegEntryBB->moveBefore (&ArtificialEntry);
131
+ ArtificialEntry.eraseFromParent ();
132
+ }
133
+ assert (&OutlinedFn->getEntryBlock () == RegEntryBB);
134
+ assert (OutlinedFn && OutlinedFn->getNumUses () == 1 );
135
+
136
+ // Run a user callback, e.g. to add attributes.
137
+ if (OI.PostOutlineCB )
138
+ OI.PostOutlineCB (*OutlinedFn);
139
+ }
140
+
141
+ // Allow finalize to be called multiple times.
142
+ OutlineInfos.clear ();
143
+ }
144
+
96
145
Value *OpenMPIRBuilder::getOrCreateIdent (Constant *SrcLocStr,
97
146
IdentFlag LocFlags) {
98
147
// Enable "C-mode".
@@ -415,32 +464,33 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
415
464
// PRegionExitBB <- A common exit to simplify block collection.
416
465
//
417
466
418
- LLVM_DEBUG (dbgs () << " Before body codegen: " << *UI-> getFunction () << " \n " );
467
+ LLVM_DEBUG (dbgs () << " Before body codegen: " << *OuterFn << " \n " );
419
468
420
469
// Let the caller create the body.
421
470
assert (BodyGenCB && " Expected body generation callback!" );
422
471
InsertPointTy CodeGenIP (PRegBodyBB, PRegBodyBB->begin ());
423
472
BodyGenCB (AllocaIP, CodeGenIP, *PRegPreFiniBB);
424
473
425
- LLVM_DEBUG (dbgs () << " After body codegen: " << *UI-> getFunction () << " \n " );
474
+ LLVM_DEBUG (dbgs () << " After body codegen: " << *OuterFn << " \n " );
426
475
476
+ OutlineInfo OI;
427
477
SmallPtrSet<BasicBlock *, 32 > ParallelRegionBlockSet;
428
- SmallVector<BasicBlock *, 32 > ParallelRegionBlocks, Worklist;
478
+ SmallVector<BasicBlock *, 32 > Worklist;
429
479
ParallelRegionBlockSet.insert (PRegEntryBB);
430
480
ParallelRegionBlockSet.insert (PRegExitBB);
431
481
432
482
// Collect all blocks in-between PRegEntryBB and PRegExitBB.
433
483
Worklist.push_back (PRegEntryBB);
434
484
while (!Worklist.empty ()) {
435
485
BasicBlock *BB = Worklist.pop_back_val ();
436
- ParallelRegionBlocks .push_back (BB);
486
+ OI. Blocks .push_back (BB);
437
487
for (BasicBlock *SuccBB : successors (BB))
438
488
if (ParallelRegionBlockSet.insert (SuccBB).second )
439
489
Worklist.push_back (SuccBB);
440
490
}
441
491
442
492
CodeExtractorAnalysisCache CEAC (*OuterFn);
443
- CodeExtractor Extractor (ParallelRegionBlocks , /* DominatorTree */ nullptr ,
493
+ CodeExtractor Extractor (OI. Blocks , /* DominatorTree */ nullptr ,
444
494
/* AggregateArgs */ false ,
445
495
/* BlockFrequencyInfo */ nullptr ,
446
496
/* BranchProbabilityInfo */ nullptr ,
@@ -455,7 +505,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
455
505
Extractor.findAllocas (CEAC, SinkingCands, HoistingCands, CommonExit);
456
506
Extractor.findInputsOutputs (Inputs, Outputs, SinkingCands);
457
507
458
- LLVM_DEBUG (dbgs () << " Before privatization: " << *UI-> getFunction () << " \n " );
508
+ LLVM_DEBUG (dbgs () << " Before privatization: " << *OuterFn << " \n " );
459
509
460
510
FunctionCallee TIDRTLFn =
461
511
getOrCreateRuntimeFunction (OMPRTL___kmpc_global_thread_num);
@@ -496,56 +546,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
496
546
PrivHelper (*Output);
497
547
}
498
548
499
- LLVM_DEBUG (dbgs () << " After privatization: " << *UI-> getFunction () << " \n " );
549
+ LLVM_DEBUG (dbgs () << " After privatization: " << *OuterFn << " \n " );
500
550
LLVM_DEBUG ({
501
- for (auto *BB : ParallelRegionBlocks )
551
+ for (auto *BB : OI. Blocks )
502
552
dbgs () << " PBR: " << BB->getName () << " \n " ;
503
553
});
504
554
505
- // Add some known attributes to the outlined function.
506
- Function *OutlinedFn = Extractor.extractCodeRegion (CEAC);
507
- OutlinedFn->addParamAttr (0 , Attribute::NoAlias);
508
- OutlinedFn->addParamAttr (1 , Attribute::NoAlias);
509
- OutlinedFn->addFnAttr (Attribute::NoUnwind);
510
- OutlinedFn->addFnAttr (Attribute::NoRecurse);
511
-
512
- LLVM_DEBUG (dbgs () << " After outlining: " << *UI->getFunction () << " \n " );
513
- LLVM_DEBUG (dbgs () << " Outlined function: " << *OutlinedFn << " \n " );
514
-
515
- // For compability with the clang CG we move the outlined function after the
516
- // one with the parallel region.
517
- OutlinedFn->removeFromParent ();
518
- M.getFunctionList ().insertAfter (OuterFn->getIterator (), OutlinedFn);
519
-
520
- // Remove the artificial entry introduced by the extractor right away, we
521
- // made our own entry block after all.
522
- {
523
- BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock ();
524
- assert (ArtificialEntry.getUniqueSuccessor () == PRegEntryBB);
525
- assert (PRegEntryBB->getUniquePredecessor () == &ArtificialEntry);
526
- PRegEntryBB->moveBefore (&ArtificialEntry);
527
- ArtificialEntry.eraseFromParent ();
528
- }
529
- LLVM_DEBUG (dbgs () << " PP Outlined function: " << *OutlinedFn << " \n " );
530
- assert (&OutlinedFn->getEntryBlock () == PRegEntryBB);
531
-
532
- assert (OutlinedFn && OutlinedFn->getNumUses () == 1 );
533
- assert (OutlinedFn->arg_size () >= 2 &&
534
- " Expected at least tid and bounded tid as arguments" );
535
- unsigned NumCapturedVars = OutlinedFn->arg_size () - /* tid & bounded tid */ 2 ;
536
-
537
- CallInst *CI = cast<CallInst>(OutlinedFn->user_back ());
538
- CI->getParent ()->setName (" omp_parallel" );
539
- Builder.SetInsertPoint (CI);
540
-
541
- // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
542
- Value *ForkCallArgs[] = {Ident, Builder.getInt32 (NumCapturedVars),
543
- Builder.CreateBitCast (OutlinedFn, ParallelTaskPtr)};
544
-
545
- SmallVector<Value *, 16 > RealArgs;
546
- RealArgs.append (std::begin (ForkCallArgs), std::end (ForkCallArgs));
547
- RealArgs.append (CI->arg_begin () + /* tid & bound tid */ 2 , CI->arg_end ());
548
-
549
555
FunctionCallee RTLFn = getOrCreateRuntimeFunction (OMPRTL___kmpc_fork_call);
550
556
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee ())) {
551
557
if (!F->hasMetadata (llvm::LLVMContext::MD_callback)) {
@@ -558,75 +564,104 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
558
564
// callback callee.
559
565
F->addMetadata (
560
566
llvm::LLVMContext::MD_callback,
561
- *llvm::MDNode::get (Ctx, {MDB. createCallbackEncoding (
562
- 2 , {-1 , -1 },
563
- /* VarArgsArePassed */ true )}));
567
+ *llvm::MDNode::get (
568
+ Ctx, {MDB. createCallbackEncoding ( 2 , {-1 , -1 },
569
+ /* VarArgsArePassed */ true )}));
564
570
}
565
571
}
566
572
567
- Builder.CreateCall (RTLFn, RealArgs);
573
+ OI.PostOutlineCB = [=](Function &OutlinedFn) {
574
+ // Add some known attributes.
575
+ OutlinedFn.addParamAttr (0 , Attribute::NoAlias);
576
+ OutlinedFn.addParamAttr (1 , Attribute::NoAlias);
577
+ OutlinedFn.addFnAttr (Attribute::NoUnwind);
578
+ OutlinedFn.addFnAttr (Attribute::NoRecurse);
568
579
569
- LLVM_DEBUG (dbgs () << " With fork_call placed: "
570
- << *Builder.GetInsertBlock ()->getParent () << " \n " );
580
+ assert (OutlinedFn.arg_size () >= 2 &&
581
+ " Expected at least tid and bounded tid as arguments" );
582
+ unsigned NumCapturedVars =
583
+ OutlinedFn.arg_size () - /* tid & bounded tid */ 2 ;
571
584
572
- InsertPointTy AfterIP (UI-> getParent (), UI-> getParent ()-> end ());
573
- InsertPointTy ExitIP (PRegExitBB, PRegExitBB-> end () );
574
- UI-> eraseFromParent ( );
585
+ CallInst *CI = cast<CallInst>(OutlinedFn. user_back ());
586
+ CI-> getParent ()-> setName ( " omp_parallel " );
587
+ Builder. SetInsertPoint (CI );
575
588
576
- // Initialize the local TID stack location with the argument value.
577
- Builder. SetInsertPoint (PrivTID);
578
- Function::arg_iterator OutlinedAI = OutlinedFn-> arg_begin ();
579
- Builder.CreateStore (Builder. CreateLoad (OutlinedAI), PrivTIDAddr) ;
589
+ // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
590
+ Value *ForkCallArgs[] = {
591
+ Ident, Builder. getInt32 (NumCapturedVars),
592
+ Builder.CreateBitCast (&OutlinedFn, ParallelTaskPtr)} ;
580
593
581
- // If no "if" clause was present we do not need the call created during
582
- // outlining, otherwise we reuse it in the serialized parallel region.
583
- if (!ElseTI) {
584
- CI->eraseFromParent ();
585
- } else {
594
+ SmallVector<Value *, 16 > RealArgs;
595
+ RealArgs.append (std::begin (ForkCallArgs), std::end (ForkCallArgs));
596
+ RealArgs.append (CI->arg_begin () + /* tid & bound tid */ 2 , CI->arg_end ());
586
597
587
- // If an "if" clause was present we are now generating the serialized
588
- // version into the "else" branch.
589
- Builder.SetInsertPoint (ElseTI);
598
+ Builder.CreateCall (RTLFn, RealArgs);
590
599
591
- // Build calls __kmpc_serialized_parallel(&Ident, GTid);
592
- Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
593
- Builder.CreateCall (
594
- getOrCreateRuntimeFunction (OMPRTL___kmpc_serialized_parallel),
595
- SerializedParallelCallArgs);
600
+ LLVM_DEBUG (dbgs () << " With fork_call placed: "
601
+ << *Builder.GetInsertBlock ()->getParent () << " \n " );
596
602
597
- // OutlinedFn(>id, &zero, CapturedStruct);
598
- CI->removeFromParent ();
599
- Builder.Insert (CI);
603
+ InsertPointTy ExitIP (PRegExitBB, PRegExitBB->end ());
600
604
601
- // __kmpc_end_serialized_parallel(&Ident, GTid);
602
- Value *EndArgs[] = {Ident, ThreadID};
603
- Builder.CreateCall (
604
- getOrCreateRuntimeFunction (OMPRTL___kmpc_end_serialized_parallel),
605
- EndArgs);
605
+ // Initialize the local TID stack location with the argument value.
606
+ Builder.SetInsertPoint (PrivTID);
607
+ Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin ();
608
+ Builder.CreateStore (Builder.CreateLoad (OutlinedAI), PrivTIDAddr);
606
609
607
- LLVM_DEBUG (dbgs () << " With serialized parallel region: "
608
- << *Builder.GetInsertBlock ()->getParent () << " \n " );
609
- }
610
+ // If no "if" clause was present we do not need the call created during
611
+ // outlining, otherwise we reuse it in the serialized parallel region.
612
+ if (!ElseTI) {
613
+ CI->eraseFromParent ();
614
+ } else {
615
+
616
+ // If an "if" clause was present we are now generating the serialized
617
+ // version into the "else" branch.
618
+ Builder.SetInsertPoint (ElseTI);
619
+
620
+ // Build calls __kmpc_serialized_parallel(&Ident, GTid);
621
+ Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
622
+ Builder.CreateCall (
623
+ getOrCreateRuntimeFunction (OMPRTL___kmpc_serialized_parallel),
624
+ SerializedParallelCallArgs);
625
+
626
+ // OutlinedFn(>id, &zero, CapturedStruct);
627
+ CI->removeFromParent ();
628
+ Builder.Insert (CI);
629
+
630
+ // __kmpc_end_serialized_parallel(&Ident, GTid);
631
+ Value *EndArgs[] = {Ident, ThreadID};
632
+ Builder.CreateCall (
633
+ getOrCreateRuntimeFunction (OMPRTL___kmpc_end_serialized_parallel),
634
+ EndArgs);
635
+
636
+ LLVM_DEBUG (dbgs () << " With serialized parallel region: "
637
+ << *Builder.GetInsertBlock ()->getParent () << " \n " );
638
+ }
639
+
640
+ for (Instruction *I : ToBeDeleted)
641
+ I->eraseFromParent ();
642
+ };
610
643
611
644
// Adjust the finalization stack, verify the adjustment, and call the
612
- // finalize function a last time to finalize values between the pre-fini block
613
- // and the exit block if we left the parallel "the normal way".
645
+ // finalize function a last time to finalize values between the pre-fini
646
+ // block and the exit block if we left the parallel "the normal way".
614
647
auto FiniInfo = FinalizationStack.pop_back_val ();
615
648
(void )FiniInfo;
616
649
assert (FiniInfo.DK == OMPD_parallel &&
617
650
" Unexpected finalization stack state!" );
618
651
619
652
Instruction *PreFiniTI = PRegPreFiniBB->getTerminator ();
620
653
assert (PreFiniTI->getNumSuccessors () == 1 &&
621
- PreFiniTI->getSuccessor (0 )->size () == 1 &&
622
- isa<ReturnInst>(PreFiniTI->getSuccessor (0 )->getTerminator ()) &&
654
+ PreFiniTI->getSuccessor (0 ) == PRegExitBB &&
623
655
" Unexpected CFG structure!" );
624
656
625
657
InsertPointTy PreFiniIP (PRegPreFiniBB, PreFiniTI->getIterator ());
626
658
FiniCB (PreFiniIP);
627
659
628
- for (Instruction *I : ToBeDeleted)
629
- I->eraseFromParent ();
660
+ InsertPointTy AfterIP (UI->getParent (), UI->getParent ()->end ());
661
+ UI->eraseFromParent ();
662
+
663
+ // Register the outlined info.
664
+ addOutlineInfo (std::move (OI));
630
665
631
666
return AfterIP;
632
667
}
0 commit comments