Skip to content

Commit 8a56d64

Browse files
committed
[OpenMP][IRBuilder] Perform finalization (incl. outlining) late
In order to fix PR44560 and to prepare for loop transformations we now finalize a function late, which will also do the outlining late. The logic is as before but the actual outlining step happens now after the function was fully constructed. Once we have loop transformations we can apply them in the finalize step before the outlining. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D74372
1 parent 77b2ffc commit 8a56d64

File tree

5 files changed

+169
-108
lines changed

5 files changed

+169
-108
lines changed

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "clang/Basic/TargetInfo.h"
3333
#include "clang/CodeGen/CGFunctionInfo.h"
3434
#include "clang/Frontend/FrontendDiagnostic.h"
35+
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
3536
#include "llvm/IR/DataLayout.h"
3637
#include "llvm/IR/Dominators.h"
3738
#include "llvm/IR/FPEnv.h"
@@ -104,6 +105,14 @@ CodeGenFunction::~CodeGenFunction() {
104105

105106
if (getLangOpts().OpenMP && CurFn)
106107
CGM.getOpenMPRuntime().functionFinished(*this);
108+
109+
// If we have an OpenMPIRBuilder we want to finalize functions (incl.
110+
// outlining etc) at some point. Doing it once the function codegen is done
111+
// seems to be a reasonable spot. We do it here, as opposed to the deletion
112+
// time of the CodeGenModule, because we have to ensure the IR has not yet
113+
// been "emitted" to the outside, thus, modifications are still sensible.
114+
if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder())
115+
OMPBuilder->finalize();
107116
}
108117

109118
// Map the LangOption for rounding mode into

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ class OpenMPIRBuilder {
3434
/// before any other method and only once!
3535
void initialize();
3636

37+
/// Finalize the underlying module, e.g., by outlining regions.
38+
void finalize();
39+
3740
/// Add attributes known for \p FnID to \p Fn.
3841
void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
3942

@@ -254,6 +257,20 @@ class OpenMPIRBuilder {
254257

255258
/// Map to remember existing ident_t*.
256259
DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap;
260+
261+
/// Helper that contains information about regions we need to outline
262+
/// during finalization.
263+
struct OutlineInfo {
264+
SmallVector<BasicBlock *, 32> Blocks;
265+
using PostOutlineCBTy = std::function<void(Function &)>;
266+
PostOutlineCBTy PostOutlineCB;
267+
};
268+
269+
/// Collection of regions that need to be outlined during finalization.
270+
SmallVector<OutlineInfo, 16> OutlineInfos;
271+
272+
/// Add a new region that will be outlined later.
273+
void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
257274
};
258275

259276
} // end namespace llvm

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 130 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,55 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {
9393

9494
void OpenMPIRBuilder::initialize() { initializeTypes(M); }
9595

96+
void OpenMPIRBuilder::finalize() {
97+
for (OutlineInfo &OI : OutlineInfos) {
98+
assert(!OI.Blocks.empty() &&
99+
"Outlined regions should have at least a single block!");
100+
BasicBlock *RegEntryBB = OI.Blocks.front();
101+
Function *OuterFn = RegEntryBB->getParent();
102+
CodeExtractorAnalysisCache CEAC(*OuterFn);
103+
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
104+
/* AggregateArgs */ false,
105+
/* BlockFrequencyInfo */ nullptr,
106+
/* BranchProbabilityInfo */ nullptr,
107+
/* AssumptionCache */ nullptr,
108+
/* AllowVarArgs */ true,
109+
/* AllowAlloca */ true,
110+
/* Suffix */ ".omp_par");
111+
112+
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
113+
114+
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
115+
116+
LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
117+
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
118+
119+
// For compability with the clang CG we move the outlined function after the
120+
// one with the parallel region.
121+
OutlinedFn->removeFromParent();
122+
M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
123+
124+
// Remove the artificial entry introduced by the extractor right away, we
125+
// made our own entry block after all.
126+
{
127+
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
128+
assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
129+
assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
130+
RegEntryBB->moveBefore(&ArtificialEntry);
131+
ArtificialEntry.eraseFromParent();
132+
}
133+
assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
134+
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
135+
136+
// Run a user callback, e.g. to add attributes.
137+
if (OI.PostOutlineCB)
138+
OI.PostOutlineCB(*OutlinedFn);
139+
}
140+
141+
// Allow finalize to be called multiple times.
142+
OutlineInfos.clear();
143+
}
144+
96145
Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
97146
IdentFlag LocFlags) {
98147
// Enable "C-mode".
@@ -415,32 +464,33 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
415464
// PRegionExitBB <- A common exit to simplify block collection.
416465
//
417466

418-
LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
467+
LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
419468

420469
// Let the caller create the body.
421470
assert(BodyGenCB && "Expected body generation callback!");
422471
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
423472
BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
424473

425-
LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
474+
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
426475

476+
OutlineInfo OI;
427477
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
428-
SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
478+
SmallVector<BasicBlock *, 32> Worklist;
429479
ParallelRegionBlockSet.insert(PRegEntryBB);
430480
ParallelRegionBlockSet.insert(PRegExitBB);
431481

432482
// Collect all blocks in-between PRegEntryBB and PRegExitBB.
433483
Worklist.push_back(PRegEntryBB);
434484
while (!Worklist.empty()) {
435485
BasicBlock *BB = Worklist.pop_back_val();
436-
ParallelRegionBlocks.push_back(BB);
486+
OI.Blocks.push_back(BB);
437487
for (BasicBlock *SuccBB : successors(BB))
438488
if (ParallelRegionBlockSet.insert(SuccBB).second)
439489
Worklist.push_back(SuccBB);
440490
}
441491

442492
CodeExtractorAnalysisCache CEAC(*OuterFn);
443-
CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
493+
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
444494
/* AggregateArgs */ false,
445495
/* BlockFrequencyInfo */ nullptr,
446496
/* BranchProbabilityInfo */ nullptr,
@@ -455,7 +505,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
455505
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
456506
Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
457507

458-
LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
508+
LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
459509

460510
FunctionCallee TIDRTLFn =
461511
getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
@@ -496,56 +546,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
496546
PrivHelper(*Output);
497547
}
498548

499-
LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
549+
LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
500550
LLVM_DEBUG({
501-
for (auto *BB : ParallelRegionBlocks)
551+
for (auto *BB : OI.Blocks)
502552
dbgs() << " PBR: " << BB->getName() << "\n";
503553
});
504554

505-
// Add some known attributes to the outlined function.
506-
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
507-
OutlinedFn->addParamAttr(0, Attribute::NoAlias);
508-
OutlinedFn->addParamAttr(1, Attribute::NoAlias);
509-
OutlinedFn->addFnAttr(Attribute::NoUnwind);
510-
OutlinedFn->addFnAttr(Attribute::NoRecurse);
511-
512-
LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
513-
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
514-
515-
// For compability with the clang CG we move the outlined function after the
516-
// one with the parallel region.
517-
OutlinedFn->removeFromParent();
518-
M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
519-
520-
// Remove the artificial entry introduced by the extractor right away, we
521-
// made our own entry block after all.
522-
{
523-
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
524-
assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
525-
assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
526-
PRegEntryBB->moveBefore(&ArtificialEntry);
527-
ArtificialEntry.eraseFromParent();
528-
}
529-
LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
530-
assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
531-
532-
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
533-
assert(OutlinedFn->arg_size() >= 2 &&
534-
"Expected at least tid and bounded tid as arguments");
535-
unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
536-
537-
CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
538-
CI->getParent()->setName("omp_parallel");
539-
Builder.SetInsertPoint(CI);
540-
541-
// Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
542-
Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
543-
Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
544-
545-
SmallVector<Value *, 16> RealArgs;
546-
RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
547-
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
548-
549555
FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
550556
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
551557
if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
@@ -558,75 +564,104 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
558564
// callback callee.
559565
F->addMetadata(
560566
llvm::LLVMContext::MD_callback,
561-
*llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
562-
2, {-1, -1},
563-
/* VarArgsArePassed */ true)}));
567+
*llvm::MDNode::get(
568+
Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
569+
/* VarArgsArePassed */ true)}));
564570
}
565571
}
566572

567-
Builder.CreateCall(RTLFn, RealArgs);
573+
OI.PostOutlineCB = [=](Function &OutlinedFn) {
574+
// Add some known attributes.
575+
OutlinedFn.addParamAttr(0, Attribute::NoAlias);
576+
OutlinedFn.addParamAttr(1, Attribute::NoAlias);
577+
OutlinedFn.addFnAttr(Attribute::NoUnwind);
578+
OutlinedFn.addFnAttr(Attribute::NoRecurse);
568579

569-
LLVM_DEBUG(dbgs() << "With fork_call placed: "
570-
<< *Builder.GetInsertBlock()->getParent() << "\n");
580+
assert(OutlinedFn.arg_size() >= 2 &&
581+
"Expected at least tid and bounded tid as arguments");
582+
unsigned NumCapturedVars =
583+
OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
571584

572-
InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
573-
InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
574-
UI->eraseFromParent();
585+
CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
586+
CI->getParent()->setName("omp_parallel");
587+
Builder.SetInsertPoint(CI);
575588

576-
// Initialize the local TID stack location with the argument value.
577-
Builder.SetInsertPoint(PrivTID);
578-
Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
579-
Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
589+
// Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
590+
Value *ForkCallArgs[] = {
591+
Ident, Builder.getInt32(NumCapturedVars),
592+
Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
580593

581-
// If no "if" clause was present we do not need the call created during
582-
// outlining, otherwise we reuse it in the serialized parallel region.
583-
if (!ElseTI) {
584-
CI->eraseFromParent();
585-
} else {
594+
SmallVector<Value *, 16> RealArgs;
595+
RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
596+
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
586597

587-
// If an "if" clause was present we are now generating the serialized
588-
// version into the "else" branch.
589-
Builder.SetInsertPoint(ElseTI);
598+
Builder.CreateCall(RTLFn, RealArgs);
590599

591-
// Build calls __kmpc_serialized_parallel(&Ident, GTid);
592-
Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
593-
Builder.CreateCall(
594-
getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
595-
SerializedParallelCallArgs);
600+
LLVM_DEBUG(dbgs() << "With fork_call placed: "
601+
<< *Builder.GetInsertBlock()->getParent() << "\n");
596602

597-
// OutlinedFn(&GTid, &zero, CapturedStruct);
598-
CI->removeFromParent();
599-
Builder.Insert(CI);
603+
InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
600604

601-
// __kmpc_end_serialized_parallel(&Ident, GTid);
602-
Value *EndArgs[] = {Ident, ThreadID};
603-
Builder.CreateCall(
604-
getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
605-
EndArgs);
605+
// Initialize the local TID stack location with the argument value.
606+
Builder.SetInsertPoint(PrivTID);
607+
Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
608+
Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
606609

607-
LLVM_DEBUG(dbgs() << "With serialized parallel region: "
608-
<< *Builder.GetInsertBlock()->getParent() << "\n");
609-
}
610+
// If no "if" clause was present we do not need the call created during
611+
// outlining, otherwise we reuse it in the serialized parallel region.
612+
if (!ElseTI) {
613+
CI->eraseFromParent();
614+
} else {
615+
616+
// If an "if" clause was present we are now generating the serialized
617+
// version into the "else" branch.
618+
Builder.SetInsertPoint(ElseTI);
619+
620+
// Build calls __kmpc_serialized_parallel(&Ident, GTid);
621+
Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
622+
Builder.CreateCall(
623+
getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
624+
SerializedParallelCallArgs);
625+
626+
// OutlinedFn(&GTid, &zero, CapturedStruct);
627+
CI->removeFromParent();
628+
Builder.Insert(CI);
629+
630+
// __kmpc_end_serialized_parallel(&Ident, GTid);
631+
Value *EndArgs[] = {Ident, ThreadID};
632+
Builder.CreateCall(
633+
getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
634+
EndArgs);
635+
636+
LLVM_DEBUG(dbgs() << "With serialized parallel region: "
637+
<< *Builder.GetInsertBlock()->getParent() << "\n");
638+
}
639+
640+
for (Instruction *I : ToBeDeleted)
641+
I->eraseFromParent();
642+
};
610643

611644
// Adjust the finalization stack, verify the adjustment, and call the
612-
// finalize function a last time to finalize values between the pre-fini block
613-
// and the exit block if we left the parallel "the normal way".
645+
// finalize function a last time to finalize values between the pre-fini
646+
// block and the exit block if we left the parallel "the normal way".
614647
auto FiniInfo = FinalizationStack.pop_back_val();
615648
(void)FiniInfo;
616649
assert(FiniInfo.DK == OMPD_parallel &&
617650
"Unexpected finalization stack state!");
618651

619652
Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
620653
assert(PreFiniTI->getNumSuccessors() == 1 &&
621-
PreFiniTI->getSuccessor(0)->size() == 1 &&
622-
isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
654+
PreFiniTI->getSuccessor(0) == PRegExitBB &&
623655
"Unexpected CFG structure!");
624656

625657
InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
626658
FiniCB(PreFiniIP);
627659

628-
for (Instruction *I : ToBeDeleted)
629-
I->eraseFromParent();
660+
InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
661+
UI->eraseFromParent();
662+
663+
// Register the outlined info.
664+
addOutlineInfo(std::move(OI));
630665

631666
return AfterIP;
632667
}

llvm/lib/Transforms/Utils/CodeExtractor.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,11 +1405,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
14051405
DISubprogram *OldSP = OldFunc.getSubprogram();
14061406
LLVMContext &Ctx = OldFunc.getContext();
14071407

1408-
// See llvm.org/PR44560, OpenMP passes an invalid subprogram to CodeExtractor.
1409-
bool NeedWorkaroundForOpenMPIRBuilderBug =
1410-
OldSP && OldSP->getRetainedNodes()->isTemporary();
1411-
1412-
if (!OldSP || NeedWorkaroundForOpenMPIRBuilderBug) {
1408+
if (!OldSP) {
14131409
// Erase any debug info the new function contains.
14141410
stripDebugInfo(NewFunc);
14151411
// Make sure the old function doesn't contain any non-local metadata refs.

0 commit comments

Comments
 (0)