Skip to content

Commit dc2be87

Browse files
committed
[MLIR][OpenMP] Add support for ordered construct
This patch supports the ordered construct in OpenMP dialect following Section 2.19.9 of the OpenMP 5.1 standard. Also lowering to LLVM IR using OpenMP IRBduiler. Lowering to LLVM IR for ordered simd directive is not supported yet since LLVM optimization passes do not support it for now. Reviewed By: kiranchandramohan, clementval, ftynse, shraiysh Differential Revision: https://reviews.llvm.org/D110015
1 parent 34188f2 commit dc2be87

File tree

6 files changed

+350
-0
lines changed

6 files changed

+350
-0
lines changed

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,73 @@ def BarrierOp : OpenMP_Op<"barrier"> {
405405
let assemblyFormat = "attr-dict";
406406
}
407407

408+
//===----------------------------------------------------------------------===//
409+
// [5.1] 2.19.9 ordered Construct
410+
//===----------------------------------------------------------------------===//
411+
412+
def ClauseDependSource : StrEnumAttrCase<"dependsource">;
413+
def ClauseDependSink : StrEnumAttrCase<"dependsink">;
414+
415+
def ClauseDepend : StrEnumAttr<
416+
"ClauseDepend",
417+
"depend clause",
418+
[ClauseDependSource, ClauseDependSink]> {
419+
let cppNamespace = "::mlir::omp";
420+
}
421+
422+
def OrderedOp : OpenMP_Op<"ordered"> {
423+
let summary = "ordered construct without region";
424+
let description = [{
425+
The ordered construct without region is a stand-alone directive that
426+
specifies cross-iteration dependences in a doacross loop nest.
427+
428+
The `depend_type_val` attribute refers to either the DEPEND(SOURCE) clause
429+
or the DEPEND(SINK: vec) clause.
430+
431+
The `num_loops_val` attribute specifies the number of loops in the doacross
432+
nest.
433+
434+
The `depend_vec_vars` is a variadic list of operands that specifies the index
435+
of the loop iterator in the doacross nest for the DEPEND(SOURCE) clause or
436+
the index of the element of "vec" for the DEPEND(SINK: vec) clause. It
437+
contains the operands in multiple "vec" when multiple DEPEND(SINK: vec)
438+
clauses exist in one ORDERED directive.
439+
}];
440+
441+
let arguments = (ins OptionalAttr<ClauseDepend>:$depend_type_val,
442+
Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$num_loops_val,
443+
Variadic<AnyType>:$depend_vec_vars);
444+
445+
let assemblyFormat = [{
446+
( `depend_type` `(` $depend_type_val^ `)` )?
447+
( `depend_vec` `(` $depend_vec_vars^ `:` type($depend_vec_vars) `)` )?
448+
attr-dict
449+
}];
450+
451+
let verifier = "return ::verifyOrderedOp(*this);";
452+
}
453+
454+
def OrderedRegionOp : OpenMP_Op<"ordered_region"> {
455+
let summary = "ordered construct with region";
456+
let description = [{
457+
The ordered construct with region specifies a structured block in a
458+
worksharing-loop, SIMD, or worksharing-loop SIMD region that is executed in
459+
the order of the loop iterations.
460+
461+
The `simd` attribute corresponds to the SIMD clause specified. If it is not
462+
present, it behaves as if the THREADS clause is specified or no clause is
463+
specified.
464+
}];
465+
466+
let arguments = (ins UnitAttr:$simd);
467+
468+
let regions = (region AnyRegion:$region);
469+
470+
let assemblyFormat = [{ ( `simd` $simd^ )? $region attr-dict}];
471+
472+
let verifier = "return ::verifyOrderedRegionOp(*this);";
473+
}
474+
408475
//===----------------------------------------------------------------------===//
409476
// 2.17.5 taskwait Construct
410477
//===----------------------------------------------------------------------===//

mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,5 +1129,42 @@ static LogicalResult verifyCriticalOp(CriticalOp op) {
11291129
return success();
11301130
}
11311131

1132+
//===----------------------------------------------------------------------===//
1133+
// Verifier for ordered construct
1134+
//===----------------------------------------------------------------------===//
1135+
1136+
static LogicalResult verifyOrderedOp(OrderedOp op) {
1137+
auto container = op->getParentOfType<WsLoopOp>();
1138+
if (!container || !container.ordered_valAttr() ||
1139+
container.ordered_valAttr().getInt() == 0)
1140+
return op.emitOpError() << "ordered depend directive must be closely "
1141+
<< "nested inside a worksharing-loop with ordered "
1142+
<< "clause with parameter present";
1143+
1144+
if (container.ordered_valAttr().getInt() !=
1145+
(int64_t)op.num_loops_val().getValue())
1146+
return op.emitOpError() << "number of variables in depend clause does not "
1147+
<< "match number of iteration variables in the "
1148+
<< "doacross loop";
1149+
1150+
return success();
1151+
}
1152+
1153+
static LogicalResult verifyOrderedRegionOp(OrderedRegionOp op) {
1154+
// TODO: The code generation for ordered simd directive is not supported yet.
1155+
if (op.simd())
1156+
return failure();
1157+
1158+
if (auto container = op->getParentOfType<WsLoopOp>()) {
1159+
if (!container.ordered_valAttr() ||
1160+
container.ordered_valAttr().getInt() != 0)
1161+
return op.emitOpError() << "ordered region must be closely nested inside "
1162+
<< "a worksharing-loop region with an ordered "
1163+
<< "clause without parameter present";
1164+
}
1165+
1166+
return success();
1167+
}
1168+
11321169
#define GET_OP_CLASSES
11331170
#include "mlir/Dialect/OpenMP/OpenMPOps.cpp.inc"

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,73 @@ makeAtomicReductionGen(omp::ReductionDeclareOp decl,
479479
return atomicGen;
480480
}
481481

482+
/// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder.
483+
static LogicalResult
484+
convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
485+
LLVM::ModuleTranslation &moduleTranslation) {
486+
auto orderedOp = cast<omp::OrderedOp>(opInst);
487+
488+
omp::ClauseDepend dependType =
489+
*omp::symbolizeClauseDepend(orderedOp.depend_type_valAttr().getValue());
490+
bool isDependSource = dependType == omp::ClauseDepend::dependsource;
491+
unsigned numLoops = orderedOp.num_loops_val().getValue();
492+
SmallVector<llvm::Value *> vecValues =
493+
moduleTranslation.lookupValues(orderedOp.depend_vec_vars());
494+
495+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(
496+
builder.saveIP(), builder.getCurrentDebugLocation());
497+
size_t indexVecValues = 0;
498+
while (indexVecValues < vecValues.size()) {
499+
SmallVector<llvm::Value *> storeValues;
500+
storeValues.reserve(numLoops);
501+
for (unsigned i = 0; i < numLoops; i++) {
502+
storeValues.push_back(vecValues[indexVecValues]);
503+
indexVecValues++;
504+
}
505+
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
506+
ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops,
507+
storeValues, ".cnt.addr", isDependSource));
508+
}
509+
return success();
510+
}
511+
512+
/// Converts an OpenMP 'ordered_region' operation into LLVM IR using
513+
/// OpenMPIRBuilder.
514+
static LogicalResult
515+
convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
516+
LLVM::ModuleTranslation &moduleTranslation) {
517+
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
518+
auto orderedRegionOp = cast<omp::OrderedRegionOp>(opInst);
519+
520+
// TODO: The code generation for ordered simd directive is not supported yet.
521+
if (orderedRegionOp.simd())
522+
return failure();
523+
524+
// TODO: support error propagation in OpenMPIRBuilder and use it instead of
525+
// relying on captured variables.
526+
LogicalResult bodyGenStatus = success();
527+
528+
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
529+
llvm::BasicBlock &continuationBlock) {
530+
// OrderedOp has only one region associated with it.
531+
auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
532+
convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(),
533+
continuationBlock, builder, moduleTranslation,
534+
bodyGenStatus);
535+
};
536+
537+
// TODO: Perform finalization actions for variables. This has to be
538+
// called for variables which have destructors/finalizers.
539+
auto finiCB = [&](InsertPointTy codeGenIP) {};
540+
541+
llvm::OpenMPIRBuilder::LocationDescription ompLoc(
542+
builder.saveIP(), builder.getCurrentDebugLocation());
543+
builder.restoreIP(
544+
moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
545+
ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd()));
546+
return bodyGenStatus;
547+
}
548+
482549
/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
483550
static LogicalResult
484551
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
@@ -807,6 +874,12 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
807874
.Case([&](omp::CriticalOp) {
808875
return convertOmpCritical(*op, builder, moduleTranslation);
809876
})
877+
.Case([&](omp::OrderedRegionOp) {
878+
return convertOmpOrderedRegion(*op, builder, moduleTranslation);
879+
})
880+
.Case([&](omp::OrderedOp) {
881+
return convertOmpOrdered(*op, builder, moduleTranslation);
882+
})
810883
.Case([&](omp::WsLoopOp) {
811884
return convertOmpWsLoop(*op, builder, moduleTranslation);
812885
})

mlir/test/Dialect/OpenMP/invalid.mlir

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,61 @@ omp.critical.declare @mutex hint(nonspeculative, speculative)
318318

319319
// expected-error @below {{invalid_hint is not a valid hint}}
320320
omp.critical.declare @mutex hint(invalid_hint)
321+
322+
// -----
323+
324+
func @omp_ordered1(%arg1 : i64, %arg2 : i64, %arg3 : i64) -> () {
325+
omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(1) inclusive {
326+
// expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
327+
omp.ordered_region {
328+
omp.terminator
329+
}
330+
omp.yield
331+
}
332+
return
333+
}
334+
335+
// -----
336+
337+
func @omp_ordered2(%arg1 : i64, %arg2 : i64, %arg3 : i64) -> () {
338+
omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) inclusive {
339+
// expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
340+
omp.ordered_region {
341+
omp.terminator
342+
}
343+
omp.yield
344+
}
345+
return
346+
}
347+
348+
// -----
349+
350+
func @omp_ordered3(%vec0 : i64) -> () {
351+
// expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
352+
omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
353+
return
354+
}
355+
356+
// -----
357+
358+
func @omp_ordered4(%arg1 : i64, %arg2 : i64, %arg3 : i64, %vec0 : i64) -> () {
359+
omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(0) inclusive {
360+
// expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
361+
omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
362+
363+
omp.yield
364+
}
365+
return
366+
}
367+
368+
// -----
369+
370+
func @omp_ordered5(%arg1 : i64, %arg2 : i64, %arg3 : i64, %vec0 : i64, %vec1 : i64) -> () {
371+
omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(1) inclusive {
372+
// expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}}
373+
omp.ordered depend_type("dependsource") depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
374+
375+
omp.yield
376+
}
377+
return
378+
}

mlir/test/Dialect/OpenMP/ops.mlir

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,3 +400,43 @@ func @omp_critical() -> () {
400400
}
401401
return
402402
}
403+
404+
func @omp_ordered(%arg1 : i64, %arg2 : i64, %arg3 : i64,
405+
%vec0 : i64, %vec1 : i64, %vec2 : i64, %vec3 : i64) -> () {
406+
// CHECK: omp.ordered_region
407+
omp.ordered_region {
408+
// CHECK: omp.terminator
409+
omp.terminator
410+
}
411+
412+
omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(0) inclusive {
413+
omp.ordered_region {
414+
omp.terminator
415+
}
416+
omp.yield
417+
}
418+
419+
omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(1) inclusive {
420+
// Only one DEPEND(SINK: vec) clause
421+
// CHECK: omp.ordered depend_type("dependsink") depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
422+
omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
423+
424+
// CHECK: omp.ordered depend_type("dependsource") depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
425+
omp.ordered depend_type("dependsource") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
426+
427+
omp.yield
428+
}
429+
430+
omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(2) inclusive {
431+
// Multiple DEPEND(SINK: vec) clauses
432+
// CHECK: omp.ordered depend_type("dependsink") depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64}
433+
omp.ordered depend_type("dependsink") depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
434+
435+
// CHECK: omp.ordered depend_type("dependsource") depend_vec(%{{.*}}, %{{.*}} : i64, i64) {num_loops_val = 2 : i64}
436+
omp.ordered depend_type("dependsource") depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
437+
438+
omp.yield
439+
}
440+
441+
return
442+
}

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,3 +554,78 @@ llvm.func @collapse_wsloop(
554554
}
555555
llvm.return
556556
}
557+
558+
llvm.func @omp_ordered(%arg0 : i64, %arg1 : i64, %arg2 : i64, %arg3 : i64,
559+
%arg4: i64, %arg5: i64, %arg6: i64) -> () {
560+
// CHECK: [[ADDR9:%.*]] = alloca [2 x i64], align 8
561+
// CHECK: [[ADDR7:%.*]] = alloca [2 x i64], align 8
562+
// CHECK: [[ADDR5:%.*]] = alloca [2 x i64], align 8
563+
// CHECK: [[ADDR3:%.*]] = alloca [1 x i64], align 8
564+
// CHECK: [[ADDR:%.*]] = alloca [1 x i64], align 8
565+
566+
// CHECK: [[OMP_THREAD:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
567+
// CHECK-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]])
568+
omp.ordered_region {
569+
omp.terminator
570+
// CHECK: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]])
571+
}
572+
573+
omp.wsloop (%arg7) : i64 = (%arg0) to (%arg1) step (%arg2) ordered(0) inclusive {
574+
// CHECK: [[OMP_THREAD:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
575+
// CHECK-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]])
576+
omp.ordered_region {
577+
omp.terminator
578+
// CHECK: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]])
579+
}
580+
omp.yield
581+
}
582+
583+
omp.wsloop (%arg7) : i64 = (%arg0) to (%arg1) step (%arg2) ordered(1) inclusive {
584+
// CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0
585+
// CHECK: store i64 [[ARG0:%.*]], i64* [[TMP]], align 4
586+
// CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0
587+
// CHECK: [[OMP_THREAD2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
588+
// CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB3]], i32 [[OMP_THREAD2]], i64* [[TMP2]])
589+
omp.ordered depend_type("dependsink") depend_vec(%arg3 : i64) {num_loops_val = 1 : i64}
590+
591+
// CHECK: [[TMP3:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR3]], i64 0, i64 0
592+
// CHECK: store i64 [[ARG0]], i64* [[TMP3]], align 4
593+
// CHECK: [[TMP4:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR3]], i64 0, i64 0
594+
// CHECK: [[OMP_THREAD4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]])
595+
// CHECK: call void @__kmpc_doacross_post(%struct.ident_t* @[[GLOB5]], i32 [[OMP_THREAD4]], i64* [[TMP4]])
596+
omp.ordered depend_type("dependsource") depend_vec(%arg3 : i64) {num_loops_val = 1 : i64}
597+
598+
omp.yield
599+
}
600+
601+
omp.wsloop (%arg7) : i64 = (%arg0) to (%arg1) step (%arg2) ordered(2) inclusive {
602+
// CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 0
603+
// CHECK: store i64 [[ARG0]], i64* [[TMP5]], align 4
604+
// CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 1
605+
// CHECK: store i64 [[ARG1:%.*]], i64* [[TMP6]], align 4
606+
// CHECK: [[TMP7:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 0
607+
// CHECK: [[OMP_THREAD6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7:[0-9]+]])
608+
// CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB7]], i32 [[OMP_THREAD6]], i64* [[TMP7]])
609+
// CHECK: [[TMP8:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 0
610+
// CHECK: store i64 [[ARG2:%.*]], i64* [[TMP8]], align 4
611+
// CHECK: [[TMP9:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 1
612+
// CHECK: store i64 [[ARG3:%.*]], i64* [[TMP9]], align 4
613+
// CHECK: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 0
614+
// CHECK: [[OMP_THREAD8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]])
615+
// CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB7]], i32 [[OMP_THREAD8]], i64* [[TMP10]])
616+
omp.ordered depend_type("dependsink") depend_vec(%arg3, %arg4, %arg5, %arg6 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
617+
618+
// CHECK: [[TMP11:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 0
619+
// CHECK: store i64 [[ARG0]], i64* [[TMP11]], align 4
620+
// CHECK: [[TMP12:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 1
621+
// CHECK: store i64 [[ARG1]], i64* [[TMP12]], align 4
622+
// CHECK: [[TMP13:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 0
623+
// CHECK: [[OMP_THREAD10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9:[0-9]+]])
624+
// CHECK: call void @__kmpc_doacross_post(%struct.ident_t* @[[GLOB9]], i32 [[OMP_THREAD10]], i64* [[TMP13]])
625+
omp.ordered depend_type("dependsource") depend_vec(%arg3, %arg4 : i64, i64) {num_loops_val = 2 : i64}
626+
627+
omp.yield
628+
}
629+
630+
llvm.return
631+
}

0 commit comments

Comments
 (0)