Skip to content

Commit d851ed4

Browse files
committed
[mlir][OpenMP] Add optional alloc region to reduction decl
The verifier checks that there is at most one block in the alloc region. This is not sufficient to avoid control flow in general MLIR, but by the time we are converting to LLVMIR structured control flow should already have been lowered to the cf dialect.
1 parent eddfd50 commit d851ed4

File tree

4 files changed

+200
-25
lines changed

4 files changed

+200
-25
lines changed

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,18 +1523,29 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove,
15231523
Declares an OpenMP reduction kind. This requires two mandatory and two
15241524
optional regions.
15251525

1526-
1. The initializer region specifies how to initialize the thread-local
1526+
1. The optional alloc region specifies how to allocate the thread-local
1527+
reduction value. This region should not contain control flow and all
1528+
IR should be suitable for inlining straight into an entry block. In
1529+
the common case this is expected to contain only allocas. It is
1530+
expected to `omp.yield` the allocated value on all control paths.
1531+
If allocation is conditional (e.g. only allocate if the mold is
1532+
allocated), this should be done in the initilizer region and this
1533+
region not included. The alloc region is not used for by-value
1534+
reductions (where allocation is implicit).
1535+
2. The initializer region specifies how to initialize the thread-local
15271536
reduction value. This is usually the neutral element of the reduction.
15281537
For convenience, the region has an argument that contains the value
1529-
of the reduction accumulator at the start of the reduction. It is
1530-
expected to `omp.yield` the new value on all control flow paths.
1531-
2. The reduction region specifies how to combine two values into one, i.e.
1538+
of the reduction accumulator at the start of the reduction. If an alloc
1539+
region is specified, there is a second block argument containing the
1540+
address of the allocated memory. The initializer region is expected to
1541+
`omp.yield` the new value on all control flow paths.
1542+
3. The reduction region specifies how to combine two values into one, i.e.
15321543
the reduction operator. It accepts the two values as arguments and is
15331544
expected to `omp.yield` the combined value on all control flow paths.
1534-
3. The atomic reduction region is optional and specifies how two values
1545+
4. The atomic reduction region is optional and specifies how two values
15351546
can be combined atomically given local accumulator variables. It is
15361547
expected to store the combined value in the first accumulator variable.
1537-
4. The cleanup region is optional and specifies how to clean up any memory
1548+
5. The cleanup region is optional and specifies how to clean up any memory
15381549
allocated by the initializer region. The region has an argument that
15391550
contains the value of the thread-local reduction accumulator. This will
15401551
be executed after the reduction has completed.
@@ -1550,12 +1561,14 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove,
15501561
let arguments = (ins SymbolNameAttr:$sym_name,
15511562
TypeAttr:$type);
15521563

1553-
let regions = (region AnyRegion:$initializerRegion,
1564+
let regions = (region MaxSizedRegion<1>:$allocRegion,
1565+
AnyRegion:$initializerRegion,
15541566
AnyRegion:$reductionRegion,
15551567
AnyRegion:$atomicReductionRegion,
15561568
AnyRegion:$cleanupRegion);
15571569

15581570
let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword "
1571+
"custom<AllocReductionRegion>($allocRegion) "
15591572
"`init` $initializerRegion "
15601573
"`combiner` $reductionRegion "
15611574
"custom<AtomicReductionRegion>($atomicReductionRegion) "
@@ -1568,6 +1581,17 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [IsolatedFromAbove,
15681581

15691582
return cast<PointerLikeType>(getAtomicReductionRegion().front().getArgument(0).getType());
15701583
}
1584+
1585+
Value getInitializerMoldArg() {
1586+
return getInitializerRegion().front().getArgument(0);
1587+
}
1588+
1589+
Value getInitializerAllocArg() {
1590+
if (getAllocRegion().empty() ||
1591+
getInitializerRegion().front().getNumArguments() != 2)
1592+
return {nullptr};
1593+
return getInitializerRegion().front().getArgument(1);
1594+
}
15711595
}];
15721596
let hasRegionVerifier = 1;
15731597
}

mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,46 +1839,84 @@ LogicalResult DistributeOp::verify() {
18391839
// DeclareReductionOp
18401840
//===----------------------------------------------------------------------===//
18411841

1842-
static ParseResult parseAtomicReductionRegion(OpAsmParser &parser,
1843-
Region &region) {
1844-
if (parser.parseOptionalKeyword("atomic"))
1842+
static ParseResult parseOptionalReductionRegion(OpAsmParser &parser,
1843+
Region &region,
1844+
StringRef keyword) {
1845+
if (parser.parseOptionalKeyword(keyword))
18451846
return success();
18461847
return parser.parseRegion(region);
18471848
}
18481849

1849-
static void printAtomicReductionRegion(OpAsmPrinter &printer,
1850-
DeclareReductionOp op, Region &region) {
1850+
static void printOptionalReductionRegion(OpAsmPrinter &printer, Region &region,
1851+
StringRef keyword) {
18511852
if (region.empty())
18521853
return;
1853-
printer << "atomic ";
1854+
printer << keyword << " ";
18541855
printer.printRegion(region);
18551856
}
18561857

1858+
static ParseResult parseAllocReductionRegion(OpAsmParser &parser,
1859+
Region &region) {
1860+
return parseOptionalReductionRegion(parser, region, "alloc");
1861+
}
1862+
1863+
static void printAllocReductionRegion(OpAsmPrinter &printer,
1864+
DeclareReductionOp op, Region &region) {
1865+
printOptionalReductionRegion(printer, region, "alloc");
1866+
}
1867+
1868+
static ParseResult parseAtomicReductionRegion(OpAsmParser &parser,
1869+
Region &region) {
1870+
return parseOptionalReductionRegion(parser, region, "atomic");
1871+
}
1872+
1873+
static void printAtomicReductionRegion(OpAsmPrinter &printer,
1874+
DeclareReductionOp op, Region &region) {
1875+
printOptionalReductionRegion(printer, region, "atomic");
1876+
}
1877+
18571878
static ParseResult parseCleanupReductionRegion(OpAsmParser &parser,
18581879
Region &region) {
1859-
if (parser.parseOptionalKeyword("cleanup"))
1860-
return success();
1861-
return parser.parseRegion(region);
1880+
return parseOptionalReductionRegion(parser, region, "cleanup");
18621881
}
18631882

18641883
static void printCleanupReductionRegion(OpAsmPrinter &printer,
18651884
DeclareReductionOp op, Region &region) {
1866-
if (region.empty())
1867-
return;
1868-
printer << "cleanup ";
1869-
printer.printRegion(region);
1885+
printOptionalReductionRegion(printer, region, "cleanup");
18701886
}
18711887

18721888
LogicalResult DeclareReductionOp::verifyRegions() {
1889+
if (!getAllocRegion().empty()) {
1890+
for (YieldOp yieldOp : getAllocRegion().getOps<YieldOp>()) {
1891+
if (yieldOp.getResults().size() != 1 ||
1892+
yieldOp.getResults().getTypes()[0] != getType())
1893+
return emitOpError() << "expects alloc region to yield a value "
1894+
"of the reduction type";
1895+
}
1896+
}
1897+
18731898
if (getInitializerRegion().empty())
18741899
return emitOpError() << "expects non-empty initializer region";
18751900
Block &initializerEntryBlock = getInitializerRegion().front();
1876-
if (initializerEntryBlock.getNumArguments() != 1 ||
1877-
initializerEntryBlock.getArgument(0).getType() != getType()) {
1878-
return emitOpError() << "expects initializer region with one argument "
1879-
"of the reduction type";
1901+
1902+
if (initializerEntryBlock.getNumArguments() == 1) {
1903+
if (!getAllocRegion().empty())
1904+
return emitOpError() << "expects two arguments to the initializer region "
1905+
"when an allocation region is used";
1906+
} else if (initializerEntryBlock.getNumArguments() == 2) {
1907+
if (getAllocRegion().empty())
1908+
return emitOpError() << "expects one argument to the initializer region "
1909+
"when no allocation region is used";
1910+
} else {
1911+
return emitOpError()
1912+
<< "expects one or two arguments to the initializer region";
18801913
}
18811914

1915+
for (mlir::Value arg : initializerEntryBlock.getArguments())
1916+
if (arg.getType() != getType())
1917+
return emitOpError() << "expects initializer region argument to match "
1918+
"the reduction type";
1919+
18821920
for (YieldOp yieldOp : getInitializerRegion().getOps<YieldOp>()) {
18831921
if (yieldOp.getResults().size() != 1 ||
18841922
yieldOp.getResults().getTypes()[0] != getType())

mlir/test/Dialect/OpenMP/invalid.mlir

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,63 @@ func.func @omp_simd_pretty_simdlen_safelen(%lb : index, %ub : index, %step : ind
565565

566566
// -----
567567

568-
// expected-error @below {{op expects initializer region with one argument of the reduction type}}
568+
// expected-error @below {{op expects alloc region to yield a value of the reduction type}}
569+
omp.declare_reduction @add_f32 : f32
570+
alloc {
571+
^bb0(%arg: f32):
572+
// nonsense test code
573+
%0 = arith.constant 0.0 : f64
574+
omp.yield (%0 : f64)
575+
}
576+
init {
577+
^bb0(%arg0: f32, %arg1: f32):
578+
%0 = arith.constant 0.0 : f32
579+
omp.yield (%0 : f32)
580+
}
581+
combiner {
582+
^bb1(%arg0: f32, %arg1: f32):
583+
%1 = arith.addf %arg0, %arg1 : f32
584+
omp.yield (%1 : f32)
585+
}
586+
587+
// -----
588+
589+
// expected-error @below {{op expects two arguments to the initializer region when an allocation region is used}}
590+
omp.declare_reduction @add_f32 : f32
591+
alloc {
592+
^bb0(%arg: f32):
593+
// nonsense test code
594+
omp.yield (%arg : f32)
595+
}
596+
init {
597+
^bb0(%arg0: f32):
598+
%0 = arith.constant 0.0 : f32
599+
omp.yield (%0 : f32)
600+
}
601+
combiner {
602+
^bb1(%arg0: f32, %arg1: f32):
603+
%1 = arith.addf %arg0, %arg1 : f32
604+
omp.yield (%1 : f32)
605+
}
606+
607+
// -----
608+
609+
// expected-error @below {{op expects one argument to the initializer region when no allocation region is used}}
610+
omp.declare_reduction @add_f32 : f32
611+
init {
612+
^bb0(%arg: f32, %arg2: f32):
613+
%0 = arith.constant 0.0 : f32
614+
omp.yield (%0 : f32)
615+
}
616+
combiner {
617+
^bb1(%arg0: f32, %arg1: f32):
618+
%1 = arith.addf %arg0, %arg1 : f32
619+
omp.yield (%1 : f32)
620+
}
621+
622+
// -----
623+
624+
// expected-error @below {{op expects initializer region argument to match the reduction type}}
569625
omp.declare_reduction @add_f32 : f64
570626
init {
571627
^bb0(%arg: f32):
@@ -683,6 +739,33 @@ cleanup {
683739

684740
// -----
685741

742+
// expected-error @below {{op region #0 ('allocRegion') failed to verify constraint: region with at most 1 blocks}}
743+
omp.declare_reduction @alloc_reduction : !llvm.ptr
744+
alloc {
745+
^bb0(%arg: !llvm.ptr):
746+
%c1 = arith.constant 1 : i32
747+
%0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
748+
cf.br ^bb1(%0: !llvm.ptr)
749+
^bb1(%ret: !llvm.ptr):
750+
omp.yield (%ret : !llvm.ptr)
751+
}
752+
init {
753+
^bb0(%arg: !llvm.ptr):
754+
%cst = arith.constant 1.0 : f32
755+
llvm.store %cst, %arg : f32, !llvm.ptr
756+
omp.yield (%arg : !llvm.ptr)
757+
}
758+
combiner {
759+
^bb1(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
760+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
761+
%1 = llvm.load %arg1 : !llvm.ptr -> f32
762+
%2 = arith.addf %0, %1 : f32
763+
llvm.store %2, %arg0 : f32, !llvm.ptr
764+
omp.yield (%arg0 : !llvm.ptr)
765+
}
766+
767+
// -----
768+
686769
func.func @foo(%lb : index, %ub : index, %step : index) {
687770
%c1 = arith.constant 1 : i32
688771
%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr

mlir/test/Dialect/OpenMP/ops.mlir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2520,6 +2520,36 @@ atomic {
25202520
omp.yield
25212521
}
25222522

2523+
// CHECK-LABEL: @alloc_reduction
2524+
// CHECK-SAME: alloc {
2525+
// CHECK-NEXT: ^bb0(%[[ARG0:.*]]: !llvm.ptr):
2526+
// ...
2527+
// CHECK: omp.yield
2528+
// CHECK-NEXT: } init {
2529+
// CHECK: } combiner {
2530+
// CHECK: }
2531+
omp.declare_reduction @alloc_reduction : !llvm.ptr
2532+
alloc {
2533+
^bb0(%arg: !llvm.ptr):
2534+
%c1 = arith.constant 1 : i32
2535+
%0 = llvm.alloca %c1 x f32 : (i32) -> !llvm.ptr
2536+
omp.yield (%0 : !llvm.ptr)
2537+
}
2538+
init {
2539+
^bb0(%mold: !llvm.ptr, %alloc: !llvm.ptr):
2540+
%cst = arith.constant 1.0 : f32
2541+
llvm.store %cst, %alloc : f32, !llvm.ptr
2542+
omp.yield (%alloc : !llvm.ptr)
2543+
}
2544+
combiner {
2545+
^bb1(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
2546+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
2547+
%1 = llvm.load %arg1 : !llvm.ptr -> f32
2548+
%2 = arith.addf %0, %1 : f32
2549+
llvm.store %2, %arg0 : f32, !llvm.ptr
2550+
omp.yield (%arg0 : !llvm.ptr)
2551+
}
2552+
25232553
// CHECK-LABEL: omp_targets_with_map_bounds
25242554
// CHECK-SAME: (%[[ARG0:.*]]: !llvm.ptr, %[[ARG1:.*]]: !llvm.ptr)
25252555
func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () {

0 commit comments

Comments
 (0)