swiftlang
diff --git a/‎mlir/include/mlir/Analysis/AffineAnalysis.h
Lines changed: 5 additions & 0 deletions b/‎mlir/include/mlir/Analysis/AffineAnalysis.h
Lines changed: 5 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Affine/Passes.td
Lines changed: 5 additions & 1 deletion b/‎mlir/include/mlir/Dialect/Affine/Passes.td
Lines changed: 5 additions & 1 deletion
diff --git a/‎mlir/include/mlir/Dialect/Affine/Utils.h
Lines changed: 11 additions & 1 deletion b/‎mlir/include/mlir/Dialect/Affine/Utils.h
Lines changed: 11 additions & 1 deletion
diff --git a/‎mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
Lines changed: 14 additions & 0 deletions b/‎mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
Lines changed: 14 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Vector/VectorOps.h
Lines changed: 6 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Vector/VectorOps.h
Lines changed: 6 additions & 0 deletions
diff --git a/‎mlir/lib/Analysis/AffineAnalysis.cpp
Lines changed: 10 additions & 0 deletions b/‎mlir/lib/Analysis/AffineAnalysis.cpp
Lines changed: 10 additions & 0 deletions
diff --git a/‎mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
Lines changed: 0 additions & 43 deletions b/‎mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
Lines changed: 0 additions & 43 deletions
@@ -47,6 +47,11 @@ bool isLoopParallel(
     AffineForOp forOp,
     SmallVectorImpl<LoopReduction> *parallelReductions = nullptr);
 
+/// Returns true if `forOp' doesn't have memory dependences preventing
+/// parallelization. This function doesn't check iter_args and should be used
+/// only as a building block for full parallel-checking functions.
+bool isLoopMemoryParallel(AffineForOp forOp);
+
 /// Returns in `affineApplyOps`, the sequence of those AffineApplyOp
 /// Operations that are reachable via a search starting from `operands` and
 /// ending at those operands that are not the result of an AffineApplyOp.
 
@@ -112,7 +112,11 @@ def AffineVectorize : FunctionPass<"affine-super-vectorize"> {
                "Specify a 1-D, 2-D or 3-D pattern of fastest varying memory "
                "dimensions to match. See defaultPatterns in Vectorize.cpp for "
                "a description and examples. This is used for testing purposes",
-               "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">
+               "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">,
+    Option<"vectorizeReductions", "vectorize-reductions", "bool",
+           /*default=*/"false",
+           "Vectorize known reductions expressed via iter_args. "
+           "Switched off by default.">
   ];
 }
 
 
@@ -13,6 +13,7 @@
 #ifndef MLIR_DIALECT_AFFINE_UTILS_H
 #define MLIR_DIALECT_AFFINE_UTILS_H
 
+#include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
@@ -27,6 +28,8 @@ struct LogicalResult;
 struct LoopReduction;
 class Operation;
 
+using ReductionLoopMap = DenseMap<Operation *, SmallVector<LoopReduction, 2>>;
+
 /// Replaces parallel affine.for op with 1-d affine.parallel op.
 /// mlir::isLoopParallel detects the parallel affine.for ops.
 /// Parallelizes the specified reductions. Parallelization will fail in presence
@@ -81,16 +84,23 @@ struct VectorizationStrategy {
   // The candidate will be vectorized using the vectorization factor in
   // 'vectorSizes' for that dimension.
   DenseMap<Operation *, unsigned> loopToVectorDim;
+  // Maps loops that implement vectorizable reductions to the corresponding
+  // reduction descriptors.
+  ReductionLoopMap reductionLoops;
 };
 
 /// Vectorizes affine loops in 'loops' using the n-D vectorization factors in
 /// 'vectorSizes'. By default, each vectorization factor is applied
 /// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can
 /// be optionally used to provide a different loop vectorization order.
+/// If `reductionLoops` is not empty, the given reduction loops may be
+/// vectorized along the reduction dimension.
+/// TODO: Vectorizing reductions is supported only for 1-D vectorization.
 void vectorizeAffineLoops(
     Operation *parentOp,
     llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops,
-    ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern);
+    ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern,
+    const ReductionLoopMap &reductionLoops = ReductionLoopMap());
 
 /// External utility to vectorize affine loops from a single loop nest using an
 /// n-D vectorization strategy (see doc in VectorizationStrategy definition).
 
@@ -124,6 +124,20 @@ bool applyCmpPredicate(CmpFPredicate predicate, const APFloat &lhs,
 /// Ignore integer bitwitdh and type mismatch that come from the fact there is
 /// no IndexAttr and that IndexType have no bitwidth.
 bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2);
+
+/// Returns the identity value attribute associated with an AtomicRMWKind op.
+Attribute getIdentityValueAttr(AtomicRMWKind kind, Type resultType,
+                               OpBuilder &builder, Location loc);
+
+/// Returns the identity value associated with an AtomicRMWKind op.
+Value getIdentityValue(AtomicRMWKind op, Type resultType, OpBuilder &builder,
+                       Location loc);
+
+/// Returns the value obtained by applying the reduction operation kind
+/// associated with a binary AtomicRMWKind op to `lhs` and `rhs`.
+Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc,
+                     Value lhs, Value rhs);
+
 } // end namespace mlir
 
 #endif // MLIR_DIALECT_IR_STANDARDOPS_IR_OPS_H
@@ -13,6 +13,7 @@
 #ifndef MLIR_DIALECT_VECTOR_VECTOROPS_H
 #define MLIR_DIALECT_VECTOR_VECTOROPS_H
 
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/BuiltinTypes.h"
@@ -192,6 +193,11 @@ IntegerType getVectorSubscriptType(Builder &builder);
 /// the integer type required for subscripts in the vector dialect.
 ArrayAttr getVectorSubscriptAttr(Builder &b, ArrayRef<int64_t> values);
 
+/// Returns the value obtained by reducing the vector into a scalar using the
+/// operation kind associated with a binary AtomicRMWKind op.
+Value getVectorReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc,
+                           Value vector);
+
 namespace impl {
 /// Build the default minor identity map suitable for a vector transfer. This
 /// also handles the case memref<... x vector<...>> -> vector<...> in which the
 
@@ -70,6 +70,8 @@ static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
     return nullptr;
   if (!forOp.getRegionIterArgs()[pos].hasOneUse())
     return nullptr;
+  if (!yielded.hasOneUse())
+    return nullptr;
 
   Optional<AtomicRMWKind> maybeKind =
       TypeSwitch<Operation *, Optional<AtomicRMWKind>>(definition)
@@ -123,6 +125,14 @@ bool mlir::isLoopParallel(AffineForOp forOp,
       return false;
   }
 
+  // Check memory dependences.
+  return isLoopMemoryParallel(forOp);
+}
+
+/// Returns true if `forOp' doesn't have memory dependences preventing
+/// parallelization. This function doesn't check iter_args and should be used
+/// only as a building block for full parallel-checking functions.
+bool mlir::isLoopMemoryParallel(AffineForOp forOp) {
   // Collect all load and store ops in loop nest rooted at 'forOp'.
   SmallVector<Operation *, 8> loadAndStoreOps;
   auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
 
@@ -367,49 +367,6 @@ class AffineForLowering : public OpRewritePattern<AffineForOp> {
   }
 };
 
-/// Returns the identity value associated with an AtomicRMWKind op.
-static Value getIdentityValue(AtomicRMWKind op, Type resultType,
-                              OpBuilder &builder, Location loc) {
-  switch (op) {
-  case AtomicRMWKind::addf:
-    return builder.create<ConstantOp>(loc, builder.getFloatAttr(resultType, 0));
-  case AtomicRMWKind::addi:
-    return builder.create<ConstantOp>(loc,
-                                      builder.getIntegerAttr(resultType, 0));
-  case AtomicRMWKind::mulf:
-    return builder.create<ConstantOp>(loc, builder.getFloatAttr(resultType, 1));
-  case AtomicRMWKind::muli:
-    return builder.create<ConstantOp>(loc,
-                                      builder.getIntegerAttr(resultType, 1));
-  // TODO: Add remaining reduction operations.
-  default:
-    (void)emitOptionalError(loc, "Reduction operation type not supported");
-    break;
-  }
-  return nullptr;
-}
-
-/// Return the value obtained by applying the reduction operation kind
-/// associated with a binary AtomicRMWKind op to `lhs` and `rhs`.
-static Value getReductionOp(AtomicRMWKind op, OpBuilder &builder, Location loc,
-                            Value lhs, Value rhs) {
-  switch (op) {
-  case AtomicRMWKind::addf:
-    return builder.create<AddFOp>(loc, lhs, rhs);
-  case AtomicRMWKind::addi:
-    return builder.create<AddIOp>(loc, lhs, rhs);
-  case AtomicRMWKind::mulf:
-    return builder.create<MulFOp>(loc, lhs, rhs);
-  case AtomicRMWKind::muli:
-    return builder.create<MulIOp>(loc, lhs, rhs);
-  // TODO: Add remaining reduction operations.
-  default:
-    (void)emitOptionalError(loc, "Reduction operation type not supported");
-    break;
-  }
-  return nullptr;
-}
-
 /// Convert an `affine.parallel` (loop nest) operation into a `scf.parallel`
 /// operation.
 class AffineParallelLowering : public OpRewritePattern<AffineParallelOp> {