Skip to content

Commit 3e7415a

Browse files
committed
[OMPIRBuilder] Support ordered clause specified without parameter
This patch supports ordered clause specified without parameter in worksharing-loop directive in the OpenMPIRBuilder and lowering MLIR to LLVM IR. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D114940
1 parent c7ed65b commit 3e7415a

File tree

7 files changed

+358
-24
lines changed

7 files changed

+358
-24
lines changed

llvm/include/llvm/Frontend/OpenMP/OMPConstants.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,6 @@ enum class IdentFlag {
7878
enum class OMPScheduleType {
7979
StaticChunked = 33,
8080
Static = 34, // static unspecialized
81-
DistributeChunked = 91,
82-
Distribute = 92,
8381
DynamicChunked = 35,
8482
GuidedChunked = 36, // guided unspecialized
8583
Runtime = 37,
@@ -89,6 +87,16 @@ enum class OMPScheduleType {
8987
GuidedSimd = 46, // guided with chunk adjustment
9088
RuntimeSimd = 47, // runtime with chunk adjustment
9189

90+
OrderedStaticChunked = 65,
91+
OrderedStatic = 66, // ordered static unspecialized
92+
OrderedDynamicChunked = 67,
93+
OrderedGuidedChunked = 68,
94+
OrderedRuntime = 69,
95+
OrderedAuto = 70, // ordered auto
96+
97+
DistributeChunked = 91, // distribute static chunked
98+
Distribute = 92, // distribute static unspecialized
99+
92100
ModifierMonotonic =
93101
(1 << 29), // Set if the monotonic schedule modifier was present
94102
ModifierNonmonotonic =

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,13 +403,16 @@ class OpenMPIRBuilder {
403403
/// the loop.
404404
/// \param Chunk The size of loop chunk considered as a unit when
405405
/// scheduling. If \p nullptr, defaults to 1.
406+
/// \param Ordered Indicates whether the ordered clause is specified without
407+
/// parameter.
406408
///
407409
/// \returns Point where to insert code after the workshare construct.
408410
InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
409411
InsertPointTy AllocaIP,
410412
omp::OMPScheduleType SchedType,
411413
bool NeedsBarrier,
412-
Value *Chunk = nullptr);
414+
Value *Chunk = nullptr,
415+
bool Ordered = false);
413416

414417
/// Modifies the canonical loop to be a workshare loop.
415418
///

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1903,9 +1903,24 @@ getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
19031903
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
19041904
}
19051905

1906+
/// Returns an LLVM function to call for finalizing the dynamic loop using
1907+
/// depending on `type`. Only i32 and i64 are supported by the runtime. Always
1908+
/// interpret integers as unsigned similarly to CanonicalLoopInfo.
1909+
static FunctionCallee
1910+
getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
1911+
unsigned Bitwidth = Ty->getIntegerBitWidth();
1912+
if (Bitwidth == 32)
1913+
return OMPBuilder.getOrCreateRuntimeFunction(
1914+
M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
1915+
if (Bitwidth == 64)
1916+
return OMPBuilder.getOrCreateRuntimeFunction(
1917+
M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
1918+
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1919+
}
1920+
19061921
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
19071922
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1908-
OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
1923+
OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk, bool Ordered) {
19091924
assert(CLI->isValid() && "Requires a valid canonical loop");
19101925
assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
19111926
"Require dedicated allocate IP");
@@ -1946,6 +1961,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
19461961
BasicBlock *Header = CLI->getHeader();
19471962
BasicBlock *Exit = CLI->getExit();
19481963
BasicBlock *Cond = CLI->getCond();
1964+
BasicBlock *Latch = CLI->getLatch();
19491965
InsertPointTy AfterIP = CLI->getAfterIP();
19501966

19511967
// The CLI will be "broken" in the code below, as the loop is no longer
@@ -2005,6 +2021,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
20052021
assert(BI->getSuccessor(1) == Exit);
20062022
BI->setSuccessor(1, OuterCond);
20072023

2024+
// Call the "fini" function if "ordered" is present in wsloop directive.
2025+
if (Ordered) {
2026+
Builder.SetInsertPoint(&Latch->back());
2027+
FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this);
2028+
Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
2029+
}
2030+
20082031
// Add the barrier if requested.
20092032
if (NeedsBarrier) {
20102033
Builder.SetInsertPoint(&Exit->back());

llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2089,11 +2089,13 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
20892089
InsertPointTy AfterIP = CLI->getAfterIP();
20902090
BasicBlock *Preheader = CLI->getPreheader();
20912091
BasicBlock *ExitBlock = CLI->getExit();
2092+
BasicBlock *LatchBlock = CLI->getLatch();
20922093
Value *IV = CLI->getIndVar();
20932094

20942095
InsertPointTy EndIP =
20952096
OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType,
2096-
/*NeedsBarrier=*/true, ChunkVal);
2097+
/*NeedsBarrier=*/true, ChunkVal,
2098+
/*Ordered=*/false);
20972099
// The returned value should be the "after" point.
20982100
ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
20992101
ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
@@ -2146,6 +2148,10 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
21462148
EXPECT_EQ(OrigUpperBound->getValue(), 21);
21472149
EXPECT_EQ(OrigStride->getValue(), 1);
21482150

2151+
CallInst *FiniCall = dyn_cast<CallInst>(
2152+
&*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2153+
EXPECT_EQ(FiniCall, nullptr);
2154+
21492155
// The original loop iterator should only be used in the condition, in the
21502156
// increment and in the statement that adds the lower bound to it.
21512157
EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
@@ -2181,6 +2187,83 @@ INSTANTIATE_TEST_SUITE_P(
21812187
omp::OMPScheduleType::Runtime |
21822188
omp::OMPScheduleType::ModifierMonotonic));
21832189

2190+
TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
2191+
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
2192+
OpenMPIRBuilder OMPBuilder(*M);
2193+
OMPBuilder.initialize();
2194+
IRBuilder<> Builder(BB);
2195+
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
2196+
2197+
omp::OMPScheduleType SchedType = omp::OMPScheduleType::OrderedStaticChunked;
2198+
uint32_t ChunkSize = 1;
2199+
Type *LCTy = Type::getInt32Ty(Ctx);
2200+
Value *StartVal = ConstantInt::get(LCTy, 10);
2201+
Value *StopVal = ConstantInt::get(LCTy, 52);
2202+
Value *StepVal = ConstantInt::get(LCTy, 2);
2203+
Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
2204+
auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
2205+
2206+
CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
2207+
Loc, LoopBodyGen, StartVal, StopVal, StepVal,
2208+
/*IsSigned=*/false, /*InclusiveStop=*/false);
2209+
2210+
Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
2211+
InsertPointTy AllocaIP = Builder.saveIP();
2212+
2213+
// Collect all the info from CLI, as it isn't usable after the call to
2214+
// createDynamicWorkshareLoop.
2215+
InsertPointTy AfterIP = CLI->getAfterIP();
2216+
BasicBlock *Preheader = CLI->getPreheader();
2217+
BasicBlock *ExitBlock = CLI->getExit();
2218+
BasicBlock *LatchBlock = CLI->getLatch();
2219+
Value *IV = CLI->getIndVar();
2220+
2221+
InsertPointTy EndIP =
2222+
OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType,
2223+
/*NeedsBarrier=*/true, ChunkVal,
2224+
/*Ordered=*/true);
2225+
2226+
// Add a termination to our block and check that it is internally consistent.
2227+
Builder.restoreIP(EndIP);
2228+
Builder.CreateRetVoid();
2229+
OMPBuilder.finalize();
2230+
EXPECT_FALSE(verifyModule(*M, &errs()));
2231+
2232+
CallInst *InitCall = nullptr;
2233+
for (Instruction &EI : *Preheader) {
2234+
Instruction *Cur = &EI;
2235+
if (isa<CallInst>(Cur)) {
2236+
InitCall = cast<CallInst>(Cur);
2237+
if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u")
2238+
break;
2239+
InitCall = nullptr;
2240+
}
2241+
}
2242+
EXPECT_NE(InitCall, nullptr);
2243+
EXPECT_EQ(InitCall->arg_size(), 7U);
2244+
ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
2245+
EXPECT_EQ(SchedVal->getValue(), static_cast<uint64_t>(SchedType));
2246+
2247+
CallInst *FiniCall = dyn_cast<CallInst>(
2248+
&*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
2249+
ASSERT_NE(FiniCall, nullptr);
2250+
EXPECT_EQ(FiniCall->getCalledFunction()->getName(),
2251+
"__kmpc_dispatch_fini_4u");
2252+
EXPECT_EQ(FiniCall->arg_size(), 2U);
2253+
EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0));
2254+
EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1));
2255+
2256+
// The original loop iterator should only be used in the condition, in the
2257+
// increment and in the statement that adds the lower bound to it.
2258+
EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3);
2259+
2260+
// The exit block should contain the barrier call, plus the call to obtain
2261+
// the thread ID.
2262+
size_t NumCallsInExitBlock =
2263+
count_if(*ExitBlock, [](Instruction &I) { return isa<CallInst>(I); });
2264+
EXPECT_EQ(NumCallsInExitBlock, 2u);
2265+
}
2266+
21842267
TEST_F(OpenMPIRBuilderTest, MasterDirective) {
21852268
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
21862269
OpenMPIRBuilder OMPBuilder(*M);

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,9 @@ def SingleOp : OpenMP_Op<"single", [AttrSizedOperandSegments]> {
280280
def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
281281
AllTypesMatch<["lowerBound", "upperBound", "step"]>,
282282
RecursiveSideEffects, ReductionClauseInterface]> {
283-
let summary = "workshare loop construct";
283+
let summary = "worksharing-loop construct";
284284
let description = [{
285-
The workshare loop construct specifies that the iterations of the loop(s)
285+
The worksharing-loop construct specifies that the iterations of the loop(s)
286286
will be executed in parallel by threads in the current context. These
287287
iterations are spread across threads that already exist in the enclosing
288288
parallel region. The lower and upper bounds specify a half-open range: the
@@ -332,7 +332,8 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
332332
implicit barrier at the end of the loop.
333333

334334
The optional `ordered_val` attribute specifies how many loops are associated
335-
with the do loop construct.
335+
with the worksharing-loop construct. The value of zero refers to the ordered
336+
clause specified without parameter.
336337

337338
The optional `order` attribute specifies which order the iterations of the
338339
associate loops are executed in. Currently the only option for this

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -799,32 +799,63 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
799799

800800
bool isSimd = loop.simd_modifier();
801801

802-
if (schedule == omp::ClauseScheduleKind::Static) {
802+
// The orderedVal refers to the value obtained from the ordered[(n)] clause.
803+
// orderedVal == -1: No ordered[(n)] clause specified.
804+
// orderedVal == 0: The ordered clause specified without a parameter.
805+
// orderedVal > 0: The ordered clause specified with a parameter (n).
806+
// TODO: Handle doacross loop init when orderedVal is greater than 0.
807+
int64_t orderedVal =
808+
loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1;
809+
if (schedule == omp::ClauseScheduleKind::Static && orderedVal != 0) {
803810
ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
804811
!loop.nowait(),
805812
llvm::omp::OMP_SCHEDULE_Static, chunk);
806813
} else {
807814
llvm::omp::OMPScheduleType schedType;
808815
switch (schedule) {
816+
case omp::ClauseScheduleKind::Static:
817+
if (loop.schedule_chunk_var())
818+
schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked;
819+
else
820+
schedType = llvm::omp::OMPScheduleType::OrderedStatic;
821+
break;
809822
case omp::ClauseScheduleKind::Dynamic:
810-
schedType = llvm::omp::OMPScheduleType::DynamicChunked;
823+
if (orderedVal == 0)
824+
schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked;
825+
else
826+
schedType = llvm::omp::OMPScheduleType::DynamicChunked;
811827
break;
812828
case omp::ClauseScheduleKind::Guided:
813-
if (isSimd)
814-
schedType = llvm::omp::OMPScheduleType::GuidedSimd;
815-
else
816-
schedType = llvm::omp::OMPScheduleType::GuidedChunked;
829+
if (orderedVal == 0) {
830+
schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked;
831+
} else {
832+
if (isSimd)
833+
schedType = llvm::omp::OMPScheduleType::GuidedSimd;
834+
else
835+
schedType = llvm::omp::OMPScheduleType::GuidedChunked;
836+
}
817837
break;
818838
case omp::ClauseScheduleKind::Auto:
819-
schedType = llvm::omp::OMPScheduleType::Auto;
839+
if (orderedVal == 0)
840+
schedType = llvm::omp::OMPScheduleType::OrderedAuto;
841+
else
842+
schedType = llvm::omp::OMPScheduleType::Auto;
820843
break;
821844
case omp::ClauseScheduleKind::Runtime:
822-
if (isSimd)
823-
schedType = llvm::omp::OMPScheduleType::RuntimeSimd;
824-
else
825-
schedType = llvm::omp::OMPScheduleType::Runtime;
845+
if (orderedVal == 0) {
846+
schedType = llvm::omp::OMPScheduleType::OrderedRuntime;
847+
} else {
848+
if (isSimd)
849+
schedType = llvm::omp::OMPScheduleType::RuntimeSimd;
850+
else
851+
schedType = llvm::omp::OMPScheduleType::Runtime;
852+
}
826853
break;
827854
default:
855+
if (orderedVal == 0) {
856+
schedType = llvm::omp::OMPScheduleType::OrderedStatic;
857+
break;
858+
}
828859
llvm_unreachable("Unknown schedule value");
829860
break;
830861
}
@@ -841,9 +872,23 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
841872
// Nothing to do here.
842873
break;
843874
}
875+
} else {
876+
// OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
877+
// If the static schedule kind is specified or if the ordered clause is
878+
// specified, and if the nonmonotonic modifier is not specified, the
879+
// effect is as if the monotonic modifier is specified. Otherwise, unless
880+
// the monotonic modifier is specified, the effect is as if the
881+
// nonmonotonic modifier is specified.
882+
// The monotonic is used by default in openmp runtime library, so no need
883+
// to set it.
884+
if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic ||
885+
schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked))
886+
schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
844887
}
888+
845889
ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
846-
schedType, !loop.nowait(), chunk);
890+
schedType, !loop.nowait(), chunk,
891+
/*ordered*/ orderedVal == 0);
847892
}
848893

849894
// Continue building IR after the loop. Note that the LoopInfo returned by

0 commit comments

Comments
 (0)