Skip to content

Commit de4d742

Browse files
committed
[MLIR][NVVM] Add CTA Cluster barrier intrinsics for sm_90
This work adds CTA Cluster barrier intrinsics for sm_90 in NVVM dialect. They are already supported in LLVM core, so this work uses the existing intrinsics. Differential Revision: https://reviews.llvm.org/D158720
1 parent 574b423 commit de4d742

File tree

2 files changed

+56
-0
lines changed

2 files changed

+56
-0
lines changed

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,34 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
483483
let assemblyFormat = "attr-dict";
484484
}
485485

486+
def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
487+
string llvmBuilder = [{
488+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
489+
}];
490+
let assemblyFormat = "attr-dict";
491+
}
492+
493+
def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed"> {
494+
string llvmBuilder = [{
495+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
496+
}];
497+
let assemblyFormat = "attr-dict";
498+
}
499+
500+
def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait"> {
501+
string llvmBuilder = [{
502+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
503+
}];
504+
let assemblyFormat = "attr-dict";
505+
}
506+
507+
def NVVM_FenceScClusterOp : NVVM_Op<"fence.sc.cluster"> {
508+
string llvmBuilder = [{
509+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_fence_sc_cluster);
510+
}];
511+
let assemblyFormat = "attr-dict";
512+
}
513+
486514
def ShflKindBfly : I32EnumAttrCase<"bfly", 0>;
487515
def ShflKindUp : I32EnumAttrCase<"up", 1>;
488516
def ShflKindDown : I32EnumAttrCase<"down", 2>;

mlir/test/Dialect/LLVMIR/nvvm.mlir

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,34 @@ func.func @llvm_nvvm_barrier0() {
4343
llvm.return
4444
}
4545

46+
// CHECK-LABEL: @llvm_nvvm_cluster_arrive
47+
func.func @llvm_nvvm_cluster_arrive() {
48+
// CHECK: nvvm.cluster.arrive
49+
nvvm.cluster.arrive
50+
llvm.return
51+
}
52+
53+
// CHECK-LABEL: @llvm_nvvm_cluster_arrive_relaxed
54+
func.func @llvm_nvvm_cluster_arrive_relaxed() {
55+
// CHECK: nvvm.cluster.arrive.relaxed
56+
nvvm.cluster.arrive.relaxed
57+
llvm.return
58+
}
59+
60+
// CHECK-LABEL: @llvm_nvvm_cluster_wait
61+
func.func @llvm_nvvm_cluster_wait() {
62+
// CHECK: nvvm.cluster.wait
63+
nvvm.cluster.wait
64+
llvm.return
65+
}
66+
67+
// CHECK-LABEL: @llvm_nvvm_fence_sc_cluster
68+
func.func @llvm_nvvm_fence_sc_cluster() {
69+
// CHECK: nvvm.fence.sc.cluster
70+
nvvm.fence.sc.cluster
71+
llvm.return
72+
}
73+
4674
// CHECK-LABEL: @nvvm_shfl
4775
func.func @nvvm_shfl(
4876
%arg0 : i32, %arg1 : i32, %arg2 : i32,

0 commit comments

Comments
 (0)