@@ -770,7 +770,7 @@ def ParallelOp : SCF_Op<"parallel",
770
770
"getSingleLowerBound", "getSingleUpperBound", "getSingleStep"]>,
771
771
RecursiveMemoryEffects,
772
772
DeclareOpInterfaceMethods<RegionBranchOpInterface>,
773
- SingleBlockImplicitTerminator<"scf::YieldOp ">]> {
773
+ SingleBlockImplicitTerminator<"scf::ReduceOp ">]> {
774
774
let summary = "parallel for operation";
775
775
let description = [{
776
776
The "scf.parallel" operation represents a loop nest taking 4 groups of SSA
@@ -791,27 +791,36 @@ def ParallelOp : SCF_Op<"parallel",
791
791
792
792
The parallel loop operation supports reduction of values produced by
793
793
individual iterations into a single result. This is modeled using the
794
- scf.reduce operation (see scf.reduce for details). Each result of a
795
- scf.parallel operation is associated with an initial value operand and
796
- reduce operation that is an immediate child. Reductions are matched to
797
- result and initial values in order of their appearance in the body.
798
- Consequently, we require that the body region has the same number of
799
- results and initial values as it has reduce operations.
800
-
801
- The body region must contain exactly one block that terminates with
802
- "scf.yield" without operands. Parsing ParallelOp will create such a region
803
- and insert the terminator when it is absent from the custom format.
794
+ "scf.reduce" terminator operation (see "scf.reduce" for details). The i-th
795
+ result of an "scf.parallel" operation is associated with the i-th initial
796
+ value operand, the i-th operand of the "scf.reduce" operation (the value to
797
+ be reduced) and the i-th region of the "scf.reduce" operation (the reduction
798
+ function). Consequently, we require that the number of results of an
799
+ "scf.parallel" op matches the number of initial values and the the number of
800
+ reductions in the "scf.reduce" terminator.
801
+
802
+ The body region must contain exactly one block that terminates with a
803
+ "scf.reduce" operation. If an "scf.parallel" op has no reductions, the
804
+ terminator has no operands and no regions. The "scf.parallel" parser will
805
+ automatically insert the terminator for ops that have no reductions if it is
806
+ absent.
804
807
805
808
Example:
806
809
807
810
```mlir
808
811
%init = arith.constant 0.0 : f32
809
- scf.parallel (%iv) = (%lb) to (%ub) step (%step) init (%init) -> f32 {
810
- %elem_to_reduce = load %buffer[%iv] : memref<100xf32>
811
- scf.reduce(%elem_to_reduce) : f32 {
812
+ %r:2 = scf.parallel (%iv) = (%lb) to (%ub) step (%step) init (%init, %init)
813
+ -> f32, f32 {
814
+ %elem_to_reduce1 = load %buffer1[%iv] : memref<100xf32>
815
+ %elem_to_reduce2 = load %buffer2[%iv] : memref<100xf32>
816
+ scf.reduce(%elem_to_reduce1, %elem_to_reduce2 : f32, f32) {
812
817
^bb0(%lhs : f32, %rhs: f32):
813
818
%res = arith.addf %lhs, %rhs : f32
814
819
scf.reduce.return %res : f32
820
+ }, {
821
+ ^bb0(%lhs : f32, %rhs: f32):
822
+ %res = arith.mulf %lhs, %rhs : f32
823
+ scf.reduce.return %res : f32
815
824
}
816
825
}
817
826
```
@@ -853,36 +862,36 @@ def ParallelOp : SCF_Op<"parallel",
853
862
// ReduceOp
854
863
//===----------------------------------------------------------------------===//
855
864
856
- def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
857
- let summary = "reduce operation for parallel for";
865
+ def ReduceOp : SCF_Op<"reduce", [
866
+ Terminator, HasParent<"ParallelOp">, RecursiveMemoryEffects,
867
+ DeclareOpInterfaceMethods<RegionBranchTerminatorOpInterface>]> {
868
+ let summary = "reduce operation for scf.parallel";
858
869
let description = [{
859
- "scf.reduce" is an operation occurring inside "scf.parallel" operations.
860
- It consists of one block with two arguments which have the same type as the
861
- operand of "scf.reduce".
862
-
863
- "scf.reduce" is used to model the value for reduction computations of a
864
- "scf.parallel" operation. It has to appear as an immediate child of a
865
- "scf.parallel" and is associated with a result value of its parent
866
- operation.
867
-
868
- Association is in the order of appearance in the body where the first
869
- result of a parallel loop operation corresponds to the first "scf.reduce"
870
- in the operation's body region. The reduce operation takes a single
871
- operand, which is the value to be used in the reduction.
872
-
873
- The reduce operation contains a region whose entry block expects two
874
- arguments of the same type as the operand. As the iteration order of the
875
- parallel loop and hence reduction order is unspecified, the result of
876
- reduction may be non-deterministic unless the operation is associative and
877
- commutative.
878
-
879
- The result of the reduce operation's body must have the same type as the
880
- operands and associated result value of the parallel loop operation.
870
+ "scf.reduce" is the terminator for "scf.parallel" operations. It can model
871
+ an arbitrary number of reductions. It has one region per reduction. Each
872
+ region has one block with two arguments which have the same type as the
873
+ corresponding operand of "scf.reduce". The operands of the op are the values
874
+ that should be reduce; one value per reduction.
875
+
876
+ The i-th reduction (i.e., the i-th region and the i-th operand) corresponds
877
+ the i-th initial value and the i-th result of the enclosing "scf.parallel"
878
+ op.
879
+
880
+ The "scf.reduce" operation contains regions whose entry blocks expect two
881
+ arguments of the same type as the corresponding operand. As the iteration
882
+ order of the enclosing parallel loop and hence reduction order is
883
+ unspecified, the results of the reductions may be non-deterministic unless
884
+ the reductions are associative and commutative.
885
+
886
+ The result of a reduction region ("scf.reduce.return" operand) must have the
887
+ same type as the corresponding "scf.reduce" operand and the corresponding
888
+ "scf.parallel" initial value.
889
+
881
890
Example:
882
891
883
892
```mlir
884
893
%operand = arith.constant 1.0 : f32
885
- scf.reduce(%operand) : f32 {
894
+ scf.reduce(%operand : f32) {
886
895
^bb0(%lhs : f32, %rhs: f32):
887
896
%res = arith.addf %lhs, %rhs : f32
888
897
scf.reduce.return %res : f32
@@ -892,14 +901,15 @@ def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
892
901
893
902
let skipDefaultBuilders = 1;
894
903
let builders = [
895
- OpBuilder<(ins "Value":$operand,
896
- CArg<"function_ref<void (OpBuilder &, Location, Value, Value)>",
897
- "nullptr">:$bodyBuilderFn)>
904
+ OpBuilder<(ins "ValueRange":$operands)>,
905
+ OpBuilder<(ins)>
898
906
];
899
907
900
- let arguments = (ins AnyType:$operand);
901
- let hasCustomAssemblyFormat = 1;
902
- let regions = (region SizedRegion<1>:$reductionOperator);
908
+ let arguments = (ins Variadic<AnyType>:$operands);
909
+ let assemblyFormat = [{
910
+ (`(` $operands^ `:` type($operands) `)`)? $reductions attr-dict
911
+ }];
912
+ let regions = (region VariadicRegion<SizedRegion<1>>:$reductions);
903
913
let hasRegionVerifier = 1;
904
914
}
905
915
@@ -908,13 +918,14 @@ def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
908
918
//===----------------------------------------------------------------------===//
909
919
910
920
def ReduceReturnOp :
911
- SCF_Op<"reduce.return", [HasParent<"ReduceOp">, Pure,
912
- Terminator]> {
921
+ SCF_Op<"reduce.return", [HasParent<"ReduceOp">, Pure, Terminator]> {
913
922
let summary = "terminator for reduce operation";
914
923
let description = [{
915
924
"scf.reduce.return" is a special terminator operation for the block inside
916
- "scf.reduce". It terminates the region. It should have the same type as
917
- the operand of "scf.reduce". Example for the custom format:
925
+ "scf.reduce" regions. It terminates the region. It should have the same
926
+ operand type as the corresponding operand of the enclosing "scf.reduce" op.
927
+
928
+ Example:
918
929
919
930
```mlir
920
931
scf.reduce.return %res : f32
@@ -1150,7 +1161,7 @@ def IndexSwitchOp : SCF_Op<"index_switch", [RecursiveMemoryEffects,
1150
1161
1151
1162
def YieldOp : SCF_Op<"yield", [Pure, ReturnLike, Terminator,
1152
1163
ParentOneOf<["ExecuteRegionOp", "ForOp", "IfOp", "IndexSwitchOp",
1153
- "ParallelOp", " WhileOp"]>]> {
1164
+ "WhileOp"]>]> {
1154
1165
let summary = "loop yield and termination operation";
1155
1166
let description = [{
1156
1167
"scf.yield" yields an SSA value from the SCF dialect op region and
0 commit comments