Skip to content

Commit 999d525

Browse files
committed
[mlir][vectorize] Support affine.apply in SuperVectorize
We have no need to vectorize affine.apply inside the vectorizing loop. However, we still need to generate it in the original scalar form. We have to replace all its operands with the generated scalar operands in the vectorizing loop, e.g., induction variables.
1 parent 0daf303 commit 999d525

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -721,8 +721,7 @@ struct VectorizationState {
721721
/// Example:
722722
/// * 'replaced': induction variable of a loop to be vectorized.
723723
/// * 'replacement': new induction variable in the new vector loop.
724-
void registerValueScalarReplacement(BlockArgument replaced,
725-
BlockArgument replacement);
724+
void registerValueScalarReplacement(Value replaced, Value replacement);
726725

727726
/// Registers the scalar replacement of a scalar result returned from a
728727
/// reduction loop. 'replacement' must be scalar.
@@ -854,8 +853,8 @@ void VectorizationState::registerValueVectorReplacementImpl(Value replaced,
854853
/// Example:
855854
/// * 'replaced': induction variable of a loop to be vectorized.
856855
/// * 'replacement': new induction variable in the new vector loop.
857-
void VectorizationState::registerValueScalarReplacement(
858-
BlockArgument replaced, BlockArgument replacement) {
856+
void VectorizationState::registerValueScalarReplacement(Value replaced,
857+
Value replacement) {
859858
registerValueScalarReplacementImpl(replaced, replacement);
860859
}
861860

@@ -978,6 +977,28 @@ static arith::ConstantOp vectorizeConstant(arith::ConstantOp constOp,
978977
return newConstOp;
979978
}
980979

980+
/// We have no need to vectorize affine.apply. However, we still need to
981+
/// generate it and replace the operands with values in valueScalarReplacement.
982+
static Operation *vectorizeAffineApplyOp(AffineApplyOp applyOp,
983+
VectorizationState &state) {
984+
SmallVector<Value, 8> updatedOperands;
985+
for (Value operand : applyOp.getOperands()) {
986+
Value updatedOperand = operand;
987+
if (state.valueScalarReplacement.contains(operand)) {
988+
updatedOperand = state.valueScalarReplacement.lookupOrDefault(operand);
989+
}
990+
updatedOperands.push_back(updatedOperand);
991+
}
992+
993+
auto newApplyOp = state.builder.create<AffineApplyOp>(
994+
applyOp.getLoc(), applyOp.getAffineMap(), updatedOperands);
995+
996+
// Register the new affine.apply result.
997+
state.registerValueScalarReplacement(applyOp.getResult(),
998+
newApplyOp.getResult());
999+
return newApplyOp;
1000+
}
1001+
9811002
/// Creates a constant vector filled with the neutral elements of the given
9821003
/// reduction. The scalar type of vector elements will be taken from
9831004
/// `oldOperand`.
@@ -1493,6 +1514,8 @@ static Operation *vectorizeOneOperation(Operation *op,
14931514
return vectorizeAffineYieldOp(yieldOp, state);
14941515
if (auto constant = dyn_cast<arith::ConstantOp>(op))
14951516
return vectorizeConstant(constant, state);
1517+
if (auto applyOp = dyn_cast<AffineApplyOp>(op))
1518+
return vectorizeAffineApplyOp(applyOp, state);
14961519

14971520
// Other ops with regions are not supported.
14981521
if (op->getNumRegions() != 0)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=8 test-fastest-varying=0" -split-input-file | FileCheck %s
2+
3+
// CHECK-DAG: #[[$map_id0:map[0-9a-zA-Z_]*]] = affine_map<(d0) -> (d0 mod 12)>
4+
// CHECK-DAG: #[[$map_id1:map[0-9a-zA-Z_]*]] = affine_map<(d0) -> (d0 mod 16)>
5+
6+
// CHECK-LABEL: func @vec_affine_apply
7+
func.func @vec_affine_apply(%arg0: memref<8x12x16xf32>, %arg1: memref<8x24x48xf32>) {
8+
affine.for %arg2 = 0 to 8 {
9+
// CHECK: affine.for %[[S0:.*]] = 0 to 24 {
10+
// CHECK-NEXT: affine.for %[[S1:.*]] = 0 to 48 step 8 {
11+
affine.for %arg3 = 0 to 24 {
12+
affine.for %arg4 = 0 to 48 {
13+
// CHECK-NEXT: affine.apply #[[$map_id0]](%[[S0]])
14+
// CHECK-NEXT: affine.apply #[[$map_id1]](%[[S1]])
15+
%0 = affine.apply affine_map<(d0) -> (d0 mod 12)>(%arg3)
16+
%1 = affine.apply affine_map<(d0) -> (d0 mod 16)>(%arg4)
17+
%2 = affine.load %arg0[%arg2, %0, %1] : memref<8x12x16xf32>
18+
affine.store %2, %arg1[%arg2, %arg3, %arg4] : memref<8x24x48xf32>
19+
}
20+
}
21+
}
22+
return
23+
}

0 commit comments

Comments
 (0)