-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][NVVM] Add support for griddepcontrol Ops #124603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MLIR][NVVM] Add support for griddepcontrol Ops #124603
Conversation
Adds `griddepcontrol.wait` and `griddepcontrol.launch.dependents` MLIR Ops to generate griddepcontrol instructions. `griddepcontrol` - Allows dependent and prerequisite grids as defined by the runtime to control execution in the following ways: - `griddepcontrol.wait` - causes the executing thread to wait until all prerequisite grids in flight have completed and all the memory operations from the prerequisite grids are performed and made visible to the current grid. - `griddepcontrol.launch.dependents` - signals that specific dependents the runtime system designated to react to this instruction can be scheduled as soon as all other CTAs in the grid issue the same instruction or have completed. PTX Spec Reference: https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol
@llvm/pr-subscribers-mlir-llvm Author: Srinivasa Ravi (Wolfram70) ChangesAdds
PTX Spec Reference: Full diff: https://github.com/llvm/llvm-project/pull/124603.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 8c8e44a054a627..11143151ddd858 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2512,6 +2512,33 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async",
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM Griddepcontrol Ops
+//===----------------------------------------------------------------------===//
+
+def NVVM_GriddepcontrolWaitOp : NVVM_IntrOp<"griddepcontrol.wait", [], 0> {
+ let assemblyFormat = "attr-dict";
+
+ let description = [{
+ Causes the executing thread to wait until all prerequisite grids in flight
+ have completed and all the memory operations from the prerequisite grids
+ are performed and made visible to the current grid.
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
+ }];
+}
+
+def NVVM_GriddepcontrolLaunchDependentsOp
+ : NVVM_IntrOp<"griddepcontrol.launch.dependents", [], 0> {
+ let assemblyFormat = "attr-dict";
+
+ let description = [{
+ Signals that specific dependents the runtime system designated to react to
+ this instruction can be scheduled as soon as all other CTAs in the grid
+ issue the same instruction or have completed.
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
+ }];
+}
+
def NVVM_Exit : NVVM_Op<"exit"> {
let summary = "Exit Op";
let description = [{
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 4c3b6648a41c00..7d1efdfa44150a 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -509,6 +509,19 @@ func.func @wgmma_wait_group_sync_aligned() {
return
}
+func.func @griddepcontrol_wait() {
+ // CHECK: nvvm.griddepcontrol.wait
+ nvvm.griddepcontrol.wait
+ return
+}
+
+func.func @griddepcontrol_launch_dependents()
+{
+ // CHECK: nvvm.griddepcontrol.launch.dependents
+ nvvm.griddepcontrol.launch.dependents
+ return
+}
+
// -----
// Just check these don't emit errors.
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 7dad9a403def0e..99a71748b0a163 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -757,3 +757,19 @@ llvm.func @nvvm_wgmma_wait_group_aligned() {
nvvm.wgmma.wait.group.sync.aligned 20
llvm.return
}
+
+// -----
+// CHECK-LABEL: @nvvm_griddepcontrol_wait
+llvm.func @nvvm_griddepcontrol_wait() {
+ // CHECK: call void @llvm.nvvm.griddepcontrol.wait()
+ nvvm.griddepcontrol.wait
+ llvm.return
+}
+
+// -----
+// CHECK-LABEL: @nvvm_griddepcontrol_launch_dependents
+llvm.func @nvvm_griddepcontrol_launch_dependents() {
+ // CHECK: call void @llvm.nvvm.griddepcontrol.launch.dependents()
+ nvvm.griddepcontrol.launch.dependents
+ llvm.return
+}
|
@llvm/pr-subscribers-mlir Author: Srinivasa Ravi (Wolfram70) ChangesAdds
PTX Spec Reference: Full diff: https://github.com/llvm/llvm-project/pull/124603.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 8c8e44a054a627..11143151ddd858 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2512,6 +2512,33 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async",
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM Griddepcontrol Ops
+//===----------------------------------------------------------------------===//
+
+def NVVM_GriddepcontrolWaitOp : NVVM_IntrOp<"griddepcontrol.wait", [], 0> {
+ let assemblyFormat = "attr-dict";
+
+ let description = [{
+ Causes the executing thread to wait until all prerequisite grids in flight
+ have completed and all the memory operations from the prerequisite grids
+ are performed and made visible to the current grid.
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
+ }];
+}
+
+def NVVM_GriddepcontrolLaunchDependentsOp
+ : NVVM_IntrOp<"griddepcontrol.launch.dependents", [], 0> {
+ let assemblyFormat = "attr-dict";
+
+ let description = [{
+ Signals that specific dependents the runtime system designated to react to
+ this instruction can be scheduled as soon as all other CTAs in the grid
+ issue the same instruction or have completed.
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
+ }];
+}
+
def NVVM_Exit : NVVM_Op<"exit"> {
let summary = "Exit Op";
let description = [{
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 4c3b6648a41c00..7d1efdfa44150a 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -509,6 +509,19 @@ func.func @wgmma_wait_group_sync_aligned() {
return
}
+func.func @griddepcontrol_wait() {
+ // CHECK: nvvm.griddepcontrol.wait
+ nvvm.griddepcontrol.wait
+ return
+}
+
+func.func @griddepcontrol_launch_dependents()
+{
+ // CHECK: nvvm.griddepcontrol.launch.dependents
+ nvvm.griddepcontrol.launch.dependents
+ return
+}
+
// -----
// Just check these don't emit errors.
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 7dad9a403def0e..99a71748b0a163 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -757,3 +757,19 @@ llvm.func @nvvm_wgmma_wait_group_aligned() {
nvvm.wgmma.wait.group.sync.aligned 20
llvm.return
}
+
+// -----
+// CHECK-LABEL: @nvvm_griddepcontrol_wait
+llvm.func @nvvm_griddepcontrol_wait() {
+ // CHECK: call void @llvm.nvvm.griddepcontrol.wait()
+ nvvm.griddepcontrol.wait
+ llvm.return
+}
+
+// -----
+// CHECK-LABEL: @nvvm_griddepcontrol_launch_dependents
+llvm.func @nvvm_griddepcontrol_launch_dependents() {
+ // CHECK: call void @llvm.nvvm.griddepcontrol.launch.dependents()
+ nvvm.griddepcontrol.launch.dependents
+ llvm.return
+}
|
Merging as per offline request, |
Adds
griddepcontrol.wait
andgriddepcontrol.launch.dependents
MLIR Ops to generate griddepcontrol instructions.
griddepcontrol
- Allows dependent and prerequisite grids as defined bythe runtime to control execution in the following ways:
griddepcontrol.wait
- causes the executing thread to wait until allprerequisite grids in flight have completed and all the memory
operations from the prerequisite grids are performed and made visible
to the current grid.
griddepcontrol.launch.dependents
- signals that specific dependentsthe runtime system designated to react to this instruction can be
scheduled as soon as all other CTAs in the grid issue the same
instruction or have completed.
PTX Spec Reference:
https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol