Skip to content

Commit 8beedcd

Browse files
author
Eric Eaton
committed
[mlir][gpu] Eliminate redundant gpu.barrier ops
1 parent 374fb41 commit 8beedcd

File tree

3 files changed

+34
-0
lines changed

3 files changed

+34
-0
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,7 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
10101010
in convergence.
10111011
}];
10121012
let assemblyFormat = "attr-dict";
1013+
let hasCanonicalizer = 1;
10131014
}
10141015

10151016
def GPU_GPUModuleOp : GPU_Op<"module", [

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,29 @@ void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value,
11391139
mode);
11401140
}
11411141

1142+
//===----------------------------------------------------------------------===//
1143+
// BarrierOp
1144+
//===----------------------------------------------------------------------===//
1145+
1146+
namespace {
1147+
1148+
/// Remove gpu.barrier after gpu.barrier, the threads are already synchronized!
1149+
LogicalResult eraseRedundantGpuBarrierOps(BarrierOp op,
1150+
PatternRewriter &rewriter) {
1151+
if (isa_and_nonnull<BarrierOp>(op->getNextNode())) {
1152+
rewriter.eraseOp(op);
1153+
return success();
1154+
}
1155+
return failure();
1156+
}
1157+
1158+
} // end anonymous namespace
1159+
1160+
void BarrierOp::getCanonicalizationPatterns(RewritePatternSet &results,
1161+
MLIRContext *context) {
1162+
results.add(eraseRedundantGpuBarrierOps);
1163+
}
1164+
11421165
//===----------------------------------------------------------------------===//
11431166
// GPUFuncOp
11441167
//===----------------------------------------------------------------------===//

mlir/test/Dialect/GPU/canonicalize.mlir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@ func.func @fold_wait_op_test1() {
1111
}
1212
// CHECK-NOT: gpu.wait
1313

14+
// Erase duplicate barriers.
15+
// CHECK-LABEL: func @erase_barriers
16+
// CHECK-NEXT: gpu.barrier
17+
// CHECK-NEXT: return
18+
func.func @erase_barriers() {
19+
gpu.barrier
20+
gpu.barrier
21+
return
22+
}
23+
1424
// Replace uses of gpu.wait op with its async dependency.
1525
// CHECK-LABEL: func @fold_wait_op_test2
1626
func.func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {

0 commit comments

Comments
 (0)