Skip to content

Commit b812932

Browse files
[FLANG] Change loop versioning to use shift instead of divide
Despite me being convinced that the use of divide didn't produce any divide instructions, it does in fact add more instructions than using a plain shift operation. This patch simply changes the divide to a shift right, with an assert to check that the "divisor" is a power of two. Reviewed By: kiranchandramohan, tblah Differential Revision: https://reviews.llvm.org/D151880
1 parent 8dc1395 commit b812932

File tree

2 files changed

+11
-7
lines changed

2 files changed

+11
-7
lines changed

flang/lib/Optimizer/Transforms/LoopVersioning.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,15 +262,19 @@ void LoopVersioningPass::runOnOperation() {
262262
loc, curIndex, totalIndex)
263263
: curIndex;
264264
}
265-
mlir::Value elemSize =
266-
builder.createIntegerConstant(loc, idxTy, arg.size);
267265
// This is the lowest dimension - which doesn't need scaling
268266
mlir::Value finalIndex =
269267
builder.createConvert(loc, idxTy, coop->getOperand(1));
270268
if (totalIndex) {
269+
assert(llvm::isPowerOf2_32(arg.size) &&
270+
"Expected power of two here");
271+
unsigned bits = llvm::Log2_32(arg.size);
272+
mlir::Value elemShift =
273+
builder.createIntegerConstant(loc, idxTy, bits);
271274
totalIndex = builder.create<mlir::arith::AddIOp>(
272275
loc,
273-
builder.create<mlir::arith::DivSIOp>(loc, totalIndex, elemSize),
276+
builder.create<mlir::arith::ShRSIOp>(loc, totalIndex,
277+
elemShift),
274278
finalIndex);
275279
} else {
276280
totalIndex = finalIndex;

flang/test/Transforms/loop-versioning.fir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,9 +366,9 @@ func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"},
366366
// Check the 2D -> 1D coordinate conversion, should have a multiply and a final add.
367367
// Some other operations are checked to synch the different parts.
368368
// CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}}
369-
// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index
370369
// CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}}
371-
// CHECK: %[[OUTER_DIV:.*]] = arith.divsi %[[OUTER_IDX]], %[[ITEMSIZE]]
370+
// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index
371+
// CHECK: %[[OUTER_DIV:.*]] = arith.shrsi %[[OUTER_IDX]], %[[ITEMSHIFT]]
372372
// CHECK: %[[C2D:.*]] = arith.addi %[[OUTER_DIV]], %[[INNER_IDX]]
373373
// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C2D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
374374
// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
@@ -498,9 +498,9 @@ func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"},
498498
// CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS2]]#2, {{.*}}
499499
// CHECK: %[[MIDDLE_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}}
500500
// CHECK: %[[MIDDLE_SUM:.*]] = arith.addi %[[MIDDLE_IDX]], %[[OUTER_IDX]]
501-
// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index
502501
// CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}}
503-
// CHECK: %[[MIDDLE_DIV:.*]] = arith.divsi %[[MIDDLE_SUM]], %[[ITEMSIZE]]
502+
// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index
503+
// CHECK: %[[MIDDLE_DIV:.*]] = arith.shrsi %[[MIDDLE_SUM]], %[[ITEMSHIFT]]
504504
// CHECK: %[[C3D:.*]] = arith.addi %[[MIDDLE_DIV]], %[[INNER_IDX]]
505505
// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C3D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
506506
// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>

0 commit comments

Comments
 (0)