Skip to content

Commit dc01b59

Browse files
authored
[MLIR][NVVM] Add support for aligned variants of cluster barriers (#78142)
This patch adds: * Support for the 'aligned' variants of the cluster barrier Ops, by extending the existing Op with an 'aligned' attribute. * Docs for these Ops. * Test cases to verify the lowering to the corresponding intrinsics. Signed-off-by: Durgadoss R <[email protected]>
1 parent 74cb287 commit dc01b59

File tree

3 files changed

+88
-3
lines changed

3 files changed

+88
-3
lines changed

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -378,22 +378,74 @@ def NVVM_Barrier0Op : NVVM_Op<"barrier0"> {
378378
}
379379

380380
def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
381+
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
382+
383+
let summary = "Cluster Barrier Arrive Op";
384+
let description = [{
385+
The `cluster.arrive` can be used by the threads within the cluster for synchronization and
386+
communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
387+
without causing the executing thread to wait for other participating threads.
388+
389+
The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
390+
391+
[For more information, see PTX ISA]
392+
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
393+
}];
394+
381395
string llvmBuilder = [{
382-
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
396+
if ($aligned)
397+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_aligned);
398+
else
399+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive);
383400
}];
384401
let assemblyFormat = "attr-dict";
385402
}
386403

387404
def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed"> {
405+
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
406+
407+
let summary = "Cluster Barrier Relaxed Arrive Op";
408+
let description = [{
409+
The `cluster.arrive` can be used by the threads within the cluster for synchronization and
410+
communication. The `cluster.arrive` instruction marks the warps' arrival at the barrier
411+
without causing the executing thread to wait for other participating threads.
412+
413+
The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
414+
The .relaxed qualifier on `cluster.arrive` specifies that there are no memory
415+
ordering and visibility guarantees provided for the memory accesses performed prior to
416+
`cluster.arrive`.
417+
418+
[For more information, see PTX ISA]
419+
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
420+
}];
421+
388422
string llvmBuilder = [{
389-
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
423+
if ($aligned)
424+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed_aligned);
425+
else
426+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_arrive_relaxed);
390427
}];
391428
let assemblyFormat = "attr-dict";
392429
}
393430

394431
def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait"> {
432+
let arguments = (ins OptionalAttr<UnitAttr>:$aligned);
433+
434+
let summary = "Cluster Barrier Wait Op";
435+
let description = [{
436+
The `cluster.wait` causes the executing thread to wait for all non-exited threads
437+
of the cluster to perform `cluster.arrive`. The `aligned` attribute, when provided,
438+
generates the .aligned version of the PTX instruction.
439+
440+
[For more information, see PTX ISA]
441+
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
442+
}];
443+
395444
string llvmBuilder = [{
396-
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
445+
if ($aligned)
446+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait_aligned);
447+
else
448+
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_barrier_cluster_wait);
397449
}];
398450
let assemblyFormat = "attr-dict";
399451
}

mlir/test/Dialect/LLVMIR/nvvm.mlir

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,26 @@ func.func @llvm_nvvm_barrier0() {
4747
func.func @llvm_nvvm_cluster_arrive() {
4848
// CHECK: nvvm.cluster.arrive
4949
nvvm.cluster.arrive
50+
// CHECK: nvvm.cluster.arrive {aligned}
51+
nvvm.cluster.arrive {aligned}
5052
llvm.return
5153
}
5254

5355
// CHECK-LABEL: @llvm_nvvm_cluster_arrive_relaxed
5456
func.func @llvm_nvvm_cluster_arrive_relaxed() {
5557
// CHECK: nvvm.cluster.arrive.relaxed
5658
nvvm.cluster.arrive.relaxed
59+
// CHECK: nvvm.cluster.arrive.relaxed {aligned}
60+
nvvm.cluster.arrive.relaxed {aligned}
5761
llvm.return
5862
}
5963

6064
// CHECK-LABEL: @llvm_nvvm_cluster_wait
6165
func.func @llvm_nvvm_cluster_wait() {
6266
// CHECK: nvvm.cluster.wait
6367
nvvm.cluster.wait
68+
// CHECK: nvvm.cluster.wait {aligned}
69+
nvvm.cluster.wait {aligned}
6470
llvm.return
6571
}
6672

mlir/test/Target/LLVMIR/nvvmir.mlir

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,33 @@ llvm.func @llvm_nvvm_barrier0() {
8080
llvm.return
8181
}
8282

83+
// CHECK-LABEL: @llvm_nvvm_cluster_arrive
84+
llvm.func @llvm_nvvm_cluster_arrive() {
85+
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive()
86+
nvvm.cluster.arrive
87+
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive.aligned()
88+
nvvm.cluster.arrive {aligned}
89+
llvm.return
90+
}
91+
92+
// CHECK-LABEL: @llvm_nvvm_cluster_arrive_relaxed
93+
llvm.func @llvm_nvvm_cluster_arrive_relaxed() {
94+
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed()
95+
nvvm.cluster.arrive.relaxed
96+
// CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
97+
nvvm.cluster.arrive.relaxed {aligned}
98+
llvm.return
99+
}
100+
101+
// CHECK-LABEL: @llvm_nvvm_cluster_wait
102+
llvm.func @llvm_nvvm_cluster_wait() {
103+
// CHECK: call void @llvm.nvvm.barrier.cluster.wait()
104+
nvvm.cluster.wait
105+
// CHECK: call void @llvm.nvvm.barrier.cluster.wait.aligned()
106+
nvvm.cluster.wait {aligned}
107+
llvm.return
108+
}
109+
83110
// CHECK-LABEL: @nvvm_shfl
84111
llvm.func @nvvm_shfl(
85112
%0 : i32, %1 : i32, %2 : i32,

0 commit comments

Comments
 (0)