Skip to content

Commit 70cac41

Browse files
committed
Reapply "[OpenMP][IRBuilder] Perform finalization (incl. outlining) late"
Reapply 8a56d64 with minor fixes. The problem was that cancellation can cause new edges to the parallel region exit block which is not outlined. The CodeExtractor will encode the information which "exit" was taken as a return value. The fix is to ensure we do not return any value from the outlined function, to prevent control to value conversion we ensure a single exit block for the outlined region. This reverts commit 3aac953.
1 parent a6f38b4 commit 70cac41

File tree

5 files changed

+184
-115
lines changed

5 files changed

+184
-115
lines changed

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "clang/Basic/TargetInfo.h"
3333
#include "clang/CodeGen/CGFunctionInfo.h"
3434
#include "clang/Frontend/FrontendDiagnostic.h"
35+
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
3536
#include "llvm/IR/DataLayout.h"
3637
#include "llvm/IR/Dominators.h"
3738
#include "llvm/IR/FPEnv.h"
@@ -104,6 +105,14 @@ CodeGenFunction::~CodeGenFunction() {
104105

105106
if (getLangOpts().OpenMP && CurFn)
106107
CGM.getOpenMPRuntime().functionFinished(*this);
108+
109+
// If we have an OpenMPIRBuilder we want to finalize functions (incl.
110+
// outlining etc) at some point. Doing it once the function codegen is done
111+
// seems to be a reasonable spot. We do it here, as opposed to the deletion
112+
// time of the CodeGenModule, because we have to ensure the IR has not yet
113+
// been "emitted" to the outside, thus, modifications are still sensible.
114+
if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder())
115+
OMPBuilder->finalize();
107116
}
108117

109118
// Map the LangOption for rounding mode into

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ class OpenMPIRBuilder {
3434
/// before any other method and only once!
3535
void initialize();
3636

37+
/// Finalize the underlying module, e.g., by outlining regions.
38+
void finalize();
39+
3740
/// Add attributes known for \p FnID to \p Fn.
3841
void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
3942

@@ -254,6 +257,20 @@ class OpenMPIRBuilder {
254257

255258
/// Map to remember existing ident_t*.
256259
DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap;
260+
261+
/// Helper that contains information about regions we need to outline
262+
/// during finalization.
263+
struct OutlineInfo {
264+
SmallVector<BasicBlock *, 32> Blocks;
265+
using PostOutlineCBTy = std::function<void(Function &)>;
266+
PostOutlineCBTy PostOutlineCB;
267+
};
268+
269+
/// Collection of regions that need to be outlined during finalization.
270+
SmallVector<OutlineInfo, 16> OutlineInfos;
271+
272+
/// Add a new region that will be outlined later.
273+
void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
257274
};
258275

259276
} // end namespace llvm

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 145 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,57 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {
9393

9494
void OpenMPIRBuilder::initialize() { initializeTypes(M); }
9595

96+
void OpenMPIRBuilder::finalize() {
97+
for (OutlineInfo &OI : OutlineInfos) {
98+
assert(!OI.Blocks.empty() &&
99+
"Outlined regions should have at least a single block!");
100+
BasicBlock *RegEntryBB = OI.Blocks.front();
101+
Function *OuterFn = RegEntryBB->getParent();
102+
CodeExtractorAnalysisCache CEAC(*OuterFn);
103+
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
104+
/* AggregateArgs */ false,
105+
/* BlockFrequencyInfo */ nullptr,
106+
/* BranchProbabilityInfo */ nullptr,
107+
/* AssumptionCache */ nullptr,
108+
/* AllowVarArgs */ true,
109+
/* AllowAlloca */ true,
110+
/* Suffix */ ".omp_par");
111+
112+
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
113+
114+
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
115+
116+
LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
117+
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
118+
assert(OutlinedFn->getReturnType()->isVoidTy() &&
119+
"OpenMP outlined functions should not return a value!");
120+
121+
// For compability with the clang CG we move the outlined function after the
122+
// one with the parallel region.
123+
OutlinedFn->removeFromParent();
124+
M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
125+
126+
// Remove the artificial entry introduced by the extractor right away, we
127+
// made our own entry block after all.
128+
{
129+
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
130+
assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
131+
assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
132+
RegEntryBB->moveBefore(&ArtificialEntry);
133+
ArtificialEntry.eraseFromParent();
134+
}
135+
assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
136+
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
137+
138+
// Run a user callback, e.g. to add attributes.
139+
if (OI.PostOutlineCB)
140+
OI.PostOutlineCB(*OutlinedFn);
141+
}
142+
143+
// Allow finalize to be called multiple times.
144+
OutlineInfos.clear();
145+
}
146+
96147
Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
97148
IdentFlag LocFlags) {
98149
// Enable "C-mode".
@@ -415,32 +466,40 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
415466
// PRegionExitBB <- A common exit to simplify block collection.
416467
//
417468

418-
LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
469+
LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
419470

420471
// Let the caller create the body.
421472
assert(BodyGenCB && "Expected body generation callback!");
422473
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
423474
BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
424475

425-
LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
476+
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
426477

478+
OutlineInfo OI;
427479
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
428-
SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
480+
SmallVector<BasicBlock *, 32> Worklist;
429481
ParallelRegionBlockSet.insert(PRegEntryBB);
430482
ParallelRegionBlockSet.insert(PRegExitBB);
431483

432484
// Collect all blocks in-between PRegEntryBB and PRegExitBB.
433485
Worklist.push_back(PRegEntryBB);
434486
while (!Worklist.empty()) {
435487
BasicBlock *BB = Worklist.pop_back_val();
436-
ParallelRegionBlocks.push_back(BB);
488+
OI.Blocks.push_back(BB);
437489
for (BasicBlock *SuccBB : successors(BB))
438490
if (ParallelRegionBlockSet.insert(SuccBB).second)
439491
Worklist.push_back(SuccBB);
440492
}
441493

494+
// Ensure a single exit node for the outlined region by creating one.
495+
// We might have multiple incoming edges to the exit now due to finalizations,
496+
// e.g., cancel calls that cause the control flow to leave the region.
497+
BasicBlock *PRegOutlinedExitBB = PRegExitBB;
498+
PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
499+
OI.Blocks.push_back(PRegOutlinedExitBB);
500+
442501
CodeExtractorAnalysisCache CEAC(*OuterFn);
443-
CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
502+
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
444503
/* AggregateArgs */ false,
445504
/* BlockFrequencyInfo */ nullptr,
446505
/* BranchProbabilityInfo */ nullptr,
@@ -455,7 +514,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
455514
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
456515
Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
457516

458-
LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
517+
LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
459518

460519
FunctionCallee TIDRTLFn =
461520
getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
@@ -491,61 +550,15 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
491550
LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
492551
PrivHelper(*Input);
493552
}
494-
for (Value *Output : Outputs) {
495-
LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
496-
PrivHelper(*Output);
497-
}
553+
assert(Outputs.empty() &&
554+
"OpenMP outlining should not produce live-out values!");
498555

499-
LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
556+
LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
500557
LLVM_DEBUG({
501-
for (auto *BB : ParallelRegionBlocks)
558+
for (auto *BB : OI.Blocks)
502559
dbgs() << " PBR: " << BB->getName() << "\n";
503560
});
504561

505-
// Add some known attributes to the outlined function.
506-
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
507-
OutlinedFn->addParamAttr(0, Attribute::NoAlias);
508-
OutlinedFn->addParamAttr(1, Attribute::NoAlias);
509-
OutlinedFn->addFnAttr(Attribute::NoUnwind);
510-
OutlinedFn->addFnAttr(Attribute::NoRecurse);
511-
512-
LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
513-
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
514-
515-
// For compability with the clang CG we move the outlined function after the
516-
// one with the parallel region.
517-
OutlinedFn->removeFromParent();
518-
M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
519-
520-
// Remove the artificial entry introduced by the extractor right away, we
521-
// made our own entry block after all.
522-
{
523-
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
524-
assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
525-
assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
526-
PRegEntryBB->moveBefore(&ArtificialEntry);
527-
ArtificialEntry.eraseFromParent();
528-
}
529-
LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
530-
assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
531-
532-
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
533-
assert(OutlinedFn->arg_size() >= 2 &&
534-
"Expected at least tid and bounded tid as arguments");
535-
unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
536-
537-
CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
538-
CI->getParent()->setName("omp_parallel");
539-
Builder.SetInsertPoint(CI);
540-
541-
// Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
542-
Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
543-
Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
544-
545-
SmallVector<Value *, 16> RealArgs;
546-
RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
547-
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
548-
549562
FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
550563
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
551564
if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
@@ -558,75 +571,105 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
558571
// callback callee.
559572
F->addMetadata(
560573
llvm::LLVMContext::MD_callback,
561-
*llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
562-
2, {-1, -1},
563-
/* VarArgsArePassed */ true)}));
574+
*llvm::MDNode::get(
575+
Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
576+
/* VarArgsArePassed */ true)}));
564577
}
565578
}
566579

567-
Builder.CreateCall(RTLFn, RealArgs);
580+
OI.PostOutlineCB = [=](Function &OutlinedFn) {
581+
// Add some known attributes.
582+
OutlinedFn.addParamAttr(0, Attribute::NoAlias);
583+
OutlinedFn.addParamAttr(1, Attribute::NoAlias);
584+
OutlinedFn.addFnAttr(Attribute::NoUnwind);
585+
OutlinedFn.addFnAttr(Attribute::NoRecurse);
568586

569-
LLVM_DEBUG(dbgs() << "With fork_call placed: "
570-
<< *Builder.GetInsertBlock()->getParent() << "\n");
587+
assert(OutlinedFn.arg_size() >= 2 &&
588+
"Expected at least tid and bounded tid as arguments");
589+
unsigned NumCapturedVars =
590+
OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
571591

572-
InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
573-
InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
574-
UI->eraseFromParent();
592+
CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
593+
CI->getParent()->setName("omp_parallel");
594+
Builder.SetInsertPoint(CI);
575595

576-
// Initialize the local TID stack location with the argument value.
577-
Builder.SetInsertPoint(PrivTID);
578-
Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
579-
Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
596+
// Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
597+
Value *ForkCallArgs[] = {
598+
Ident, Builder.getInt32(NumCapturedVars),
599+
Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
580600

581-
// If no "if" clause was present we do not need the call created during
582-
// outlining, otherwise we reuse it in the serialized parallel region.
583-
if (!ElseTI) {
584-
CI->eraseFromParent();
585-
} else {
601+
SmallVector<Value *, 16> RealArgs;
602+
RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
603+
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
586604

587-
// If an "if" clause was present we are now generating the serialized
588-
// version into the "else" branch.
589-
Builder.SetInsertPoint(ElseTI);
605+
Builder.CreateCall(RTLFn, RealArgs);
590606

591-
// Build calls __kmpc_serialized_parallel(&Ident, GTid);
592-
Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
593-
Builder.CreateCall(
594-
getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
595-
SerializedParallelCallArgs);
607+
LLVM_DEBUG(dbgs() << "With fork_call placed: "
608+
<< *Builder.GetInsertBlock()->getParent() << "\n");
596609

597-
// OutlinedFn(&GTid, &zero, CapturedStruct);
598-
CI->removeFromParent();
599-
Builder.Insert(CI);
610+
InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
600611

601-
// __kmpc_end_serialized_parallel(&Ident, GTid);
602-
Value *EndArgs[] = {Ident, ThreadID};
603-
Builder.CreateCall(
604-
getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
605-
EndArgs);
612+
// Initialize the local TID stack location with the argument value.
613+
Builder.SetInsertPoint(PrivTID);
614+
Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
615+
Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
606616

607-
LLVM_DEBUG(dbgs() << "With serialized parallel region: "
608-
<< *Builder.GetInsertBlock()->getParent() << "\n");
609-
}
617+
// If no "if" clause was present we do not need the call created during
618+
// outlining, otherwise we reuse it in the serialized parallel region.
619+
if (!ElseTI) {
620+
CI->eraseFromParent();
621+
} else {
622+
623+
// If an "if" clause was present we are now generating the serialized
624+
// version into the "else" branch.
625+
Builder.SetInsertPoint(ElseTI);
626+
627+
// Build calls __kmpc_serialized_parallel(&Ident, GTid);
628+
Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
629+
Builder.CreateCall(
630+
getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
631+
SerializedParallelCallArgs);
632+
633+
// OutlinedFn(&GTid, &zero, CapturedStruct);
634+
CI->removeFromParent();
635+
Builder.Insert(CI);
636+
637+
// __kmpc_end_serialized_parallel(&Ident, GTid);
638+
Value *EndArgs[] = {Ident, ThreadID};
639+
Builder.CreateCall(
640+
getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
641+
EndArgs);
642+
643+
LLVM_DEBUG(dbgs() << "With serialized parallel region: "
644+
<< *Builder.GetInsertBlock()->getParent() << "\n");
645+
}
646+
647+
for (Instruction *I : ToBeDeleted)
648+
I->eraseFromParent();
649+
};
610650

611651
// Adjust the finalization stack, verify the adjustment, and call the
612-
// finalize function a last time to finalize values between the pre-fini block
613-
// and the exit block if we left the parallel "the normal way".
652+
// finalize function a last time to finalize values between the pre-fini
653+
// block and the exit block if we left the parallel "the normal way".
614654
auto FiniInfo = FinalizationStack.pop_back_val();
615655
(void)FiniInfo;
616656
assert(FiniInfo.DK == OMPD_parallel &&
617657
"Unexpected finalization stack state!");
618658

619-
Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
620-
assert(PreFiniTI->getNumSuccessors() == 1 &&
621-
PreFiniTI->getSuccessor(0)->size() == 1 &&
622-
isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
659+
Instruction *PRegOutlinedExitTI = PRegOutlinedExitBB->getTerminator();
660+
assert(PRegOutlinedExitTI->getNumSuccessors() == 1 &&
661+
PRegOutlinedExitTI->getSuccessor(0) == PRegExitBB &&
623662
"Unexpected CFG structure!");
624663

625-
InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
664+
InsertPointTy PreFiniIP(PRegOutlinedExitBB,
665+
PRegOutlinedExitTI->getIterator());
626666
FiniCB(PreFiniIP);
627667

628-
for (Instruction *I : ToBeDeleted)
629-
I->eraseFromParent();
668+
InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
669+
UI->eraseFromParent();
670+
671+
// Register the outlined info.
672+
addOutlineInfo(std::move(OI));
630673

631674
return AfterIP;
632675
}

llvm/lib/Transforms/Utils/CodeExtractor.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,11 +1405,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
14051405
DISubprogram *OldSP = OldFunc.getSubprogram();
14061406
LLVMContext &Ctx = OldFunc.getContext();
14071407

1408-
// See llvm.org/PR44560, OpenMP passes an invalid subprogram to CodeExtractor.
1409-
bool NeedWorkaroundForOpenMPIRBuilderBug =
1410-
OldSP && OldSP->getRetainedNodes()->isTemporary();
1411-
1412-
if (!OldSP || NeedWorkaroundForOpenMPIRBuilderBug) {
1408+
if (!OldSP) {
14131409
// Erase any debug info the new function contains.
14141410
stripDebugInfo(NewFunc);
14151411
// Make sure the old function doesn't contain any non-local metadata refs.

0 commit comments

Comments
 (0)