-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][NVVM] Add support for mapa MLIR Ops #124514
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir Author: Srinivasa Ravi (Wolfram70) ChangesAdds
PTX Spec Reference: Full diff: https://github.com/llvm/llvm-project/pull/124514.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 8c8e44a054a627..a914ab030695e9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2512,6 +2512,43 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async",
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM Mapa Ops
+//===----------------------------------------------------------------------===//
+
+def NVVM_MapaOp : NVVM_IntrOp<"mapa", [], 1> {
+ let results = (outs LLVM_PointerGeneric:$res);
+ let arguments = (ins LLVM_PointerGeneric:$a, I32:$b);
+
+ let description = [{
+ Maps the generic address pointing to a shared memory variable in the
+ target CTA. Source `a` and `res` are registers containing generic
+ addresses pointing to shared memory.
+ `b` is a 32-bit integer operand representing the rank of the target CTA.
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mapa)
+ }];
+
+ let assemblyFormat ="$a`,` $b attr-dict";
+}
+
+def NVVM_MapaSharedClusterOp : NVVM_IntrOp<"mapa.shared.cluster", [], 1> {
+ let results = (outs LLVM_PointerShared:$res);
+ let arguments = (ins LLVM_PointerShared:$a, I32:$b);
+
+ let description = [{
+ Maps the address pointing to a shared memory variable in the target CTA.
+ source `a` is either a shared memory variable or a register containing a
+ valid shared memory address and register `res` contains a shared memory
+ address. `b` is a 32-bit integer operand representing the rank of the
+ target CTA.
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mapa)
+ }];
+
+ let assemblyFormat = "$a`,` $b attr-dict";
+}
+
def NVVM_Exit : NVVM_Op<"exit"> {
let summary = "Exit Op";
let description = [{
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 4c3b6648a41c00..e2b116551aac22 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -509,6 +509,15 @@ func.func @wgmma_wait_group_sync_aligned() {
return
}
+// CHECK-LABEL: @mapa
+func.func @mapa(%a: !llvm.ptr, %a_shared: !llvm.ptr<3>, %b : i32) {
+ // CHECK: nvvm.mapa %{{.*}}
+ %0 = nvvm.mapa %a, %b
+ // CHECK: nvvm.mapa.shared.cluster %{{.*}}
+ %1 = nvvm.mapa.shared.cluster %a_shared, %b
+ return
+}
+
// -----
// Just check these don't emit errors.
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 7dad9a403def0e..bae006a50ab4e6 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -757,3 +757,13 @@ llvm.func @nvvm_wgmma_wait_group_aligned() {
nvvm.wgmma.wait.group.sync.aligned 20
llvm.return
}
+
+// -----
+// CHECK-LABEL: @nvvm_mapa
+llvm.func @nvvm_mapa(%a: !llvm.ptr, %a_shared: !llvm.ptr<3>, %b : i32) {
+ // CHECK-LLVM: call ptr @llvm.nvvm.mapa(ptr %{{.*}}, i32 %{{.*}})
+ %0 = nvvm.mapa %a, %b
+ // CHECK-LLVM: call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
+ %1 = nvvm.mapa.shared.cluster %a_shared, %b
+ llvm.return
+}
|
@llvm/pr-subscribers-mlir-llvm Author: Srinivasa Ravi (Wolfram70) ChangesAdds
PTX Spec Reference: Full diff: https://github.com/llvm/llvm-project/pull/124514.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 8c8e44a054a627..a914ab030695e9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2512,6 +2512,43 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async",
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM Mapa Ops
+//===----------------------------------------------------------------------===//
+
+def NVVM_MapaOp : NVVM_IntrOp<"mapa", [], 1> {
+ let results = (outs LLVM_PointerGeneric:$res);
+ let arguments = (ins LLVM_PointerGeneric:$a, I32:$b);
+
+ let description = [{
+ Maps the generic address pointing to a shared memory variable in the
+ target CTA. Source `a` and `res` are registers containing generic
+ addresses pointing to shared memory.
+ `b` is a 32-bit integer operand representing the rank of the target CTA.
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mapa)
+ }];
+
+ let assemblyFormat ="$a`,` $b attr-dict";
+}
+
+def NVVM_MapaSharedClusterOp : NVVM_IntrOp<"mapa.shared.cluster", [], 1> {
+ let results = (outs LLVM_PointerShared:$res);
+ let arguments = (ins LLVM_PointerShared:$a, I32:$b);
+
+ let description = [{
+ Maps the address pointing to a shared memory variable in the target CTA.
+ source `a` is either a shared memory variable or a register containing a
+ valid shared memory address and register `res` contains a shared memory
+ address. `b` is a 32-bit integer operand representing the rank of the
+ target CTA.
+ [For more information, see PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mapa)
+ }];
+
+ let assemblyFormat = "$a`,` $b attr-dict";
+}
+
def NVVM_Exit : NVVM_Op<"exit"> {
let summary = "Exit Op";
let description = [{
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index 4c3b6648a41c00..e2b116551aac22 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -509,6 +509,15 @@ func.func @wgmma_wait_group_sync_aligned() {
return
}
+// CHECK-LABEL: @mapa
+func.func @mapa(%a: !llvm.ptr, %a_shared: !llvm.ptr<3>, %b : i32) {
+ // CHECK: nvvm.mapa %{{.*}}
+ %0 = nvvm.mapa %a, %b
+ // CHECK: nvvm.mapa.shared.cluster %{{.*}}
+ %1 = nvvm.mapa.shared.cluster %a_shared, %b
+ return
+}
+
// -----
// Just check these don't emit errors.
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 7dad9a403def0e..bae006a50ab4e6 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -757,3 +757,13 @@ llvm.func @nvvm_wgmma_wait_group_aligned() {
nvvm.wgmma.wait.group.sync.aligned 20
llvm.return
}
+
+// -----
+// CHECK-LABEL: @nvvm_mapa
+llvm.func @nvvm_mapa(%a: !llvm.ptr, %a_shared: !llvm.ptr<3>, %b : i32) {
+ // CHECK-LLVM: call ptr @llvm.nvvm.mapa(ptr %{{.*}}, i32 %{{.*}})
+ %0 = nvvm.mapa %a, %b
+ // CHECK-LLVM: call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
+ %1 = nvvm.mapa.shared.cluster %a_shared, %b
+ llvm.return
+}
|
5b97941
to
6893033
Compare
6893033
to
9792ced
Compare
Adds `mapa` and `mapa.shared.cluster` MLIR Ops to generate mapa instructions. `mapa` - Map the address of the shared variable in the target CTA. - `mapa` - source is a register containing generic address pointing to shared memory. - `mapa.shared.cluster` - source is a shared memory variable or a register containing a valid shared memory address.
9792ced
to
f834921
Compare
Adds
mapa
andmapa.shared.cluster
MLIR Ops to generate mapa instructions.mapa
- Map the address of the shared variable in the target CTA.mapa
- source is a register containing generic address pointing to shared memory.mapa.shared.cluster
- source is a shared memory variable or a register containing a valid shared memory address.PTX Spec Reference:
https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-mapa